transformer_inference.py 1.1 KB

123456789101112131415161718192021222324252627282930313233
  1. from .builder import CUDAOpBuilder
  2. class InferenceBuilder(CUDAOpBuilder):
  3. BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE"
  4. NAME = "transformer_inference"
  5. def __init__(self, name=None):
  6. name = self.NAME if name is None else name
  7. super().__init__(name=name)
  8. def absolute_name(self):
  9. return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
  10. def sources(self):
  11. return [
  12. 'csrc/transformer/inference/csrc/pt_binding.cpp',
  13. 'csrc/transformer/inference/csrc/gelu.cu',
  14. 'csrc/transformer/inference/csrc/normalize.cu',
  15. 'csrc/transformer/inference/csrc/softmax.cu',
  16. 'csrc/transformer/inference/csrc/dequantize.cu',
  17. 'csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu',
  18. 'csrc/transformer/inference/csrc/transform.cu',
  19. ]
  20. def extra_ldflags(self):
  21. if not self.is_rocm_pytorch():
  22. return ['-lcurand']
  23. else:
  24. return []
  25. def include_paths(self):
  26. return ['csrc/transformer/inference/includes']