transformer_inference.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. from .builder import CUDAOpBuilder, installed_cuda_version
  5. class InferenceBuilder(CUDAOpBuilder):
  6. BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE"
  7. NAME = "transformer_inference"
  8. def __init__(self, name=None):
  9. name = self.NAME if name is None else name
  10. super().__init__(name=name)
  11. def absolute_name(self):
  12. return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
  13. def is_compatible(self, verbose=True):
  14. try:
  15. import torch
  16. except ImportError:
  17. self.warning("Please install torch if trying to pre-compile inference kernels")
  18. return False
  19. cuda_okay = True
  20. if not self.is_rocm_pytorch() and torch.cuda.is_available():
  21. sys_cuda_major, _ = installed_cuda_version()
  22. torch_cuda_major = int(torch.version.cuda.split('.')[0])
  23. cuda_capability = torch.cuda.get_device_properties(0).major
  24. if cuda_capability < 6:
  25. self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
  26. cuda_okay = False
  27. if cuda_capability >= 8:
  28. if torch_cuda_major < 11 or sys_cuda_major < 11:
  29. self.warning("On Ampere and higher architectures please use CUDA 11+")
  30. cuda_okay = False
  31. return super().is_compatible(verbose) and cuda_okay
  32. def filter_ccs(self, ccs):
  33. ccs_retained = []
  34. ccs_pruned = []
  35. for cc in ccs:
  36. if int(cc[0]) >= 6:
  37. ccs_retained.append(cc)
  38. else:
  39. ccs_pruned.append(cc)
  40. if len(ccs_pruned) > 0:
  41. self.warning(f"Filtered compute capabilities {ccs_pruned}")
  42. return ccs_retained
  43. def sources(self):
  44. return [
  45. 'csrc/transformer/inference/csrc/pt_binding.cpp',
  46. 'csrc/transformer/inference/csrc/gelu.cu',
  47. 'csrc/transformer/inference/csrc/relu.cu',
  48. 'csrc/transformer/inference/csrc/layer_norm.cu',
  49. 'csrc/transformer/inference/csrc/rms_norm.cu',
  50. 'csrc/transformer/inference/csrc/softmax.cu',
  51. 'csrc/transformer/inference/csrc/dequantize.cu',
  52. 'csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu',
  53. 'csrc/transformer/inference/csrc/transform.cu',
  54. 'csrc/transformer/inference/csrc/pointwise_ops.cu',
  55. ]
  56. def extra_ldflags(self):
  57. if not self.is_rocm_pytorch():
  58. return ['-lcurand']
  59. else:
  60. return []
  61. def include_paths(self):
  62. return ['csrc/transformer/inference/includes', 'csrc/includes']