123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- # Copyright (c) Microsoft Corporation.
- # SPDX-License-Identifier: Apache-2.0
- # DeepSpeed Team
- from .builder import CUDAOpBuilder, installed_cuda_version
- import os
- class EvoformerAttnBuilder(CUDAOpBuilder):
- BUILD_VAR = "DS_BUILD_EVOFORMER_ATTN"
- NAME = "evoformer_attn"
- def __init__(self, name=None):
- name = self.NAME if name is None else name
- super().__init__(name=name)
- self.cutlass_path = os.environ.get('CUTLASS_PATH')
- def absolute_name(self):
- return f'deepspeed.ops.{self.NAME}_op'
- def extra_ldflags(self):
- if not self.is_rocm_pytorch():
- return ['-lcurand']
- else:
- return []
- def sources(self):
- src_dir = 'csrc/deepspeed4science/evoformer_attn'
- return [f'{src_dir}/attention.cpp', f'{src_dir}/attention_back.cu', f'{src_dir}/attention_cu.cu']
- def nvcc_args(self):
- args = super().nvcc_args()
- try:
- import torch
- except ImportError:
- self.warning("Please install torch if trying to pre-compile kernels")
- return args
- major = torch.cuda.get_device_properties(0).major #ignore-cuda
- minor = torch.cuda.get_device_properties(0).minor #ignore-cuda
- args.append(f"-DGPU_ARCH={major}{minor}")
- return args
- def is_compatible(self, verbose=True):
- try:
- import torch
- except ImportError:
- self.warning("Please install torch if trying to pre-compile kernels")
- return False
- if self.cutlass_path is None:
- self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
- return False
- with open(f'{self.cutlass_path}/CHANGELOG.md', 'r') as f:
- if '3.1.0' not in f.read():
- self.warning("Please use CUTLASS version >= 3.1.0")
- return False
- cuda_okay = True
- if not self.is_rocm_pytorch() and torch.cuda.is_available(): #ignore-cuda
- sys_cuda_major, _ = installed_cuda_version()
- torch_cuda_major = int(torch.version.cuda.split('.')[0])
- cuda_capability = torch.cuda.get_device_properties(0).major #ignore-cuda
- if cuda_capability < 7:
- self.warning("Please use a GPU with compute capability >= 7.0")
- cuda_okay = False
- if torch_cuda_major < 11 or sys_cuda_major < 11:
- self.warning("Please use CUDA 11+")
- cuda_okay = False
- return super().is_compatible(verbose) and cuda_okay
- def include_paths(self):
- includes = [f'{self.cutlass_path}/include', f'{self.cutlass_path}/tools/util/include']
- return includes
|