transformer_inference.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. from .builder import CUDAOpBuilder, installed_cuda_version
  2. class InferenceBuilder(CUDAOpBuilder):
  3. BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE"
  4. NAME = "transformer_inference"
  5. def __init__(self, name=None):
  6. name = self.NAME if name is None else name
  7. super().__init__(name=name)
  8. def absolute_name(self):
  9. return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
  10. def is_compatible(self, verbose=True):
  11. try:
  12. import torch
  13. except ImportError:
  14. self.warning(
  15. "Please install torch if trying to pre-compile inference kernels")
  16. return False
  17. cuda_okay = True
  18. if not self.is_rocm_pytorch() and torch.cuda.is_available():
  19. sys_cuda_major, _ = installed_cuda_version()
  20. torch_cuda_major = int(torch.version.cuda.split('.')[0])
  21. cuda_capability = torch.cuda.get_device_properties(0).major
  22. if cuda_capability < 6:
  23. self.warning(
  24. "NVIDIA Inference is only supported on Pascal and newer architectures"
  25. )
  26. cuda_okay = False
  27. if cuda_capability >= 8:
  28. if torch_cuda_major < 11 or sys_cuda_major < 11:
  29. self.warning(
  30. "On Ampere and higher architectures please use CUDA 11+")
  31. cuda_okay = False
  32. return super().is_compatible(verbose) and cuda_okay
  33. def filter_ccs(self, ccs):
  34. ccs_retained = []
  35. ccs_pruned = []
  36. for cc in ccs:
  37. if int(cc[0]) >= 6:
  38. ccs_retained.append(cc)
  39. else:
  40. ccs_pruned.append(cc)
  41. if len(ccs_pruned) > 0:
  42. self.warning(f"Filtered compute capabilities {ccs_pruned}")
  43. return ccs_retained
  44. def sources(self):
  45. return [
  46. 'csrc/transformer/inference/csrc/pt_binding.cpp',
  47. 'csrc/transformer/inference/csrc/gelu.cu',
  48. 'csrc/transformer/inference/csrc/relu.cu',
  49. 'csrc/transformer/inference/csrc/layer_norm.cu',
  50. 'csrc/transformer/inference/csrc/softmax.cu',
  51. 'csrc/transformer/inference/csrc/dequantize.cu',
  52. 'csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu',
  53. 'csrc/transformer/inference/csrc/transform.cu',
  54. ]
  55. def extra_ldflags(self):
  56. if not self.is_rocm_pytorch():
  57. return ['-lcurand']
  58. else:
  59. return []
  60. def include_paths(self):
  61. return ['csrc/transformer/inference/includes', 'csrc/includes']