1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- from .builder import CUDAOpBuilder, installed_cuda_version
- class InferenceBuilder(CUDAOpBuilder):
- BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE"
- NAME = "transformer_inference"
- def __init__(self, name=None):
- name = self.NAME if name is None else name
- super().__init__(name=name)
- def absolute_name(self):
- return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
- def is_compatible(self, verbose=True):
- try:
- import torch
- except ImportError:
- self.warning(
- "Please install torch if trying to pre-compile inference kernels")
- return False
- cuda_okay = True
- if not self.is_rocm_pytorch() and torch.cuda.is_available():
- sys_cuda_major, _ = installed_cuda_version()
- torch_cuda_major = int(torch.version.cuda.split('.')[0])
- cuda_capability = torch.cuda.get_device_properties(0).major
- if cuda_capability < 6:
- self.warning(
- "NVIDIA Inference is only supported on Pascal and newer architectures"
- )
- cuda_okay = False
- if cuda_capability >= 8:
- if torch_cuda_major < 11 or sys_cuda_major < 11:
- self.warning(
- "On Ampere and higher architectures please use CUDA 11+")
- cuda_okay = False
- return super().is_compatible(verbose) and cuda_okay
- def filter_ccs(self, ccs):
- ccs_retained = []
- ccs_pruned = []
- for cc in ccs:
- if int(cc[0]) >= 6:
- ccs_retained.append(cc)
- else:
- ccs_pruned.append(cc)
- if len(ccs_pruned) > 0:
- self.warning(f"Filtered compute capabilities {ccs_pruned}")
- return ccs_retained
- def sources(self):
- return [
- 'csrc/transformer/inference/csrc/pt_binding.cpp',
- 'csrc/transformer/inference/csrc/gelu.cu',
- 'csrc/transformer/inference/csrc/relu.cu',
- 'csrc/transformer/inference/csrc/layer_norm.cu',
- 'csrc/transformer/inference/csrc/softmax.cu',
- 'csrc/transformer/inference/csrc/dequantize.cu',
- 'csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu',
- 'csrc/transformer/inference/csrc/transform.cu',
- ]
- def extra_ldflags(self):
- if not self.is_rocm_pytorch():
- return ['-lcurand']
- else:
- return []
- def include_paths(self):
- return ['csrc/transformer/inference/includes', 'csrc/includes']
|