inference_core_ops.py 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import os
  5. from .builder import CUDAOpBuilder, installed_cuda_version
  6. class InferenceCoreBuilder(CUDAOpBuilder):
  7. BUILD_VAR = "DS_BUILD_INFERENCE_CORE_OPS"
  8. NAME = "inference_core_ops"
  9. def __init__(self, name=None):
  10. name = self.NAME if name is None else name
  11. super().__init__(name=name)
  12. def absolute_name(self):
  13. return f'deepspeed.inference.v2.kernels{self.NAME}'
  14. def is_compatible(self, verbose=True):
  15. try:
  16. import torch
  17. except ImportError:
  18. self.warning("Please install torch if trying to pre-compile inference kernels")
  19. return False
  20. cuda_okay = True
  21. if not self.is_rocm_pytorch() and torch.cuda.is_available(): #ignore-cuda
  22. sys_cuda_major, _ = installed_cuda_version()
  23. torch_cuda_major = int(torch.version.cuda.split('.')[0])
  24. cuda_capability = torch.cuda.get_device_properties(0).major #ignore-cuda
  25. if cuda_capability < 6:
  26. self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
  27. cuda_okay = False
  28. if cuda_capability >= 8:
  29. if torch_cuda_major < 11 or sys_cuda_major < 11:
  30. self.warning("On Ampere and higher architectures please use CUDA 11+")
  31. cuda_okay = False
  32. return super().is_compatible(verbose) and cuda_okay
  33. def filter_ccs(self, ccs):
  34. ccs_retained = []
  35. ccs_pruned = []
  36. for cc in ccs:
  37. if int(cc[0]) >= 6:
  38. ccs_retained.append(cc)
  39. else:
  40. ccs_pruned.append(cc)
  41. if len(ccs_pruned) > 0:
  42. self.warning(f"Filtered compute capabilities {ccs_pruned}")
  43. return ccs_retained
  44. def get_prefix(self):
  45. ds_path = self.deepspeed_src_path("deepspeed")
  46. return "deepspeed" if os.path.isdir(ds_path) else ".."
  47. def sources(self):
  48. sources = [
  49. "inference/v2/kernels/core_ops/core_ops.cpp",
  50. "inference/v2/kernels/core_ops/bias_activations/bias_activation.cpp",
  51. "inference/v2/kernels/core_ops/bias_activations/bias_activation.cu",
  52. "inference/v2/kernels/core_ops/cuda_layer_norm/layer_norm.cpp",
  53. "inference/v2/kernels/core_ops/cuda_layer_norm/layer_norm.cu",
  54. "inference/v2/kernels/core_ops/cuda_rms_norm/rms_norm.cpp",
  55. "inference/v2/kernels/core_ops/cuda_rms_norm/rms_norm.cu",
  56. "inference/v2/kernels/core_ops/gated_activations/gated_activation_kernels.cpp",
  57. "inference/v2/kernels/core_ops/gated_activations/gated_activation_kernels.cu",
  58. ]
  59. prefix = self.get_prefix()
  60. sources = [os.path.join(prefix, src) for src in sources]
  61. return sources
  62. def extra_ldflags(self):
  63. return []
  64. def include_paths(self):
  65. sources = [
  66. 'inference/v2/kernels/core_ops/bias_activations',
  67. 'inference/v2/kernels/core_ops/blas_kernels',
  68. 'inference/v2/kernels/core_ops/cuda_layer_norm',
  69. 'inference/v2/kernels/core_ops/cuda_rms_norm',
  70. 'inference/v2/kernels/core_ops/gated_activations',
  71. 'inference/v2/kernels/includes',
  72. ]
  73. prefix = self.get_prefix()
  74. sources = [os.path.join(prefix, src) for src in sources]
  75. return sources