setup.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. """
  2. Copyright 2020 The Microsoft DeepSpeed Team
  3. DeepSpeed library
  4. To build wheel on Windows:
  5. 1. Install pytorch, such as pytorch 1.8 + cuda 11.1
  6. 2. Install visual cpp build tool
  7. 3. Launch cmd console with Administrator privilege for creating required symlink folders
  8. Create a new wheel via the following command:
  9. python setup.py bdist_wheel
  10. The wheel will be located at: dist/*.whl
  11. """
  12. import os
  13. import sys
  14. import shutil
  15. import subprocess
  16. import warnings
  17. from setuptools import setup, find_packages
  18. import time
  19. torch_available = True
  20. try:
  21. import torch
  22. from torch.utils.cpp_extension import BuildExtension
  23. except ImportError:
  24. torch_available = False
  25. print('[WARNING] Unable to import torch, pre-compiling ops will be disabled. ' \
  26. 'Please visit https://pytorch.org/ to see how to properly install torch on your system.')
  27. from op_builder import ALL_OPS, get_default_compute_capatabilities, OpBuilder
  28. # fetch rocm state
  29. is_rocm_pytorch = OpBuilder.is_rocm_pytorch()
  30. rocm_version = OpBuilder.installed_rocm_version()
  31. RED_START = '\033[31m'
  32. RED_END = '\033[0m'
  33. ERROR = f"{RED_START} [ERROR] {RED_END}"
  34. def abort(msg):
  35. print(f"{ERROR} {msg}")
  36. assert False, msg
  37. def fetch_requirements(path):
  38. with open(path, 'r') as fd:
  39. return [r.strip() for r in fd.readlines()]
  40. install_requires = fetch_requirements('requirements/requirements.txt')
  41. extras_require = {
  42. '1bit': [], # add cupy based on cuda/rocm version
  43. '1bit-mpi': fetch_requirements('requirements/requirements-1bit-mpi.txt'),
  44. 'readthedocs': fetch_requirements('requirements/requirements-readthedocs.txt'),
  45. 'dev': fetch_requirements('requirements/requirements-dev.txt'),
  46. }
  47. # Add specific cupy version to 1bit extras
  48. if torch_available and torch.cuda.is_available():
  49. if is_rocm_pytorch:
  50. rocm_major, rocm_minor = rocm_version
  51. cupy = f"cupy-rocm-{rocm_major}-{rocm_minor}"
  52. else:
  53. cupy = f"cupy-cuda{torch.version.cuda.replace('.','')[:3]}"
  54. extras_require['1bit'].append(cupy)
  55. extras_require['1bit-mpi'].append(cupy)
  56. # Make an [all] extra that installs all needed dependencies
  57. all_extras = set()
  58. for extra in extras_require.items():
  59. for req in extra[1]:
  60. all_extras.add(req)
  61. extras_require['all'] = list(all_extras)
  62. cmdclass = {}
  63. # For any pre-installed ops force disable ninja
  64. if torch_available:
  65. cmdclass['build_ext'] = BuildExtension.with_options(use_ninja=False)
  66. if torch_available:
  67. TORCH_MAJOR = torch.__version__.split('.')[0]
  68. TORCH_MINOR = torch.__version__.split('.')[1]
  69. else:
  70. TORCH_MAJOR = "0"
  71. TORCH_MINOR = "0"
  72. if torch_available and not torch.cuda.is_available():
  73. # Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486
  74. print(
  75. "[WARNING] Torch did not find cuda available, if cross-compiling or running with cpu only "
  76. "you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
  77. "(compute capabilities 6.0, 6.1, 6.2)")
  78. if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
  79. os.environ["TORCH_CUDA_ARCH_LIST"] = get_default_compute_capatabilities()
  80. ext_modules = []
  81. # Default to pre-install kernels to false so we rely on JIT on Linux, opposite on Windows.
  82. BUILD_OP_PLATFORM = 1 if sys.platform == "win32" else 0
  83. BUILD_OP_DEFAULT = int(os.environ.get('DS_BUILD_OPS', BUILD_OP_PLATFORM))
  84. print(f"DS_BUILD_OPS={BUILD_OP_DEFAULT}")
  85. if BUILD_OP_DEFAULT:
  86. assert torch_available, "Unable to pre-compile ops without torch installed. Please install torch before attempting to pre-compile ops."
  87. def command_exists(cmd):
  88. if sys.platform == "win32":
  89. result = subprocess.Popen(f'{cmd}', stdout=subprocess.PIPE, shell=True)
  90. return result.wait() == 1
  91. else:
  92. result = subprocess.Popen(f'type {cmd}', stdout=subprocess.PIPE, shell=True)
  93. return result.wait() == 0
  94. def op_envvar(op_name):
  95. assert hasattr(ALL_OPS[op_name], 'BUILD_VAR'), \
  96. f"{op_name} is missing BUILD_VAR field"
  97. return ALL_OPS[op_name].BUILD_VAR
  98. def op_enabled(op_name):
  99. env_var = op_envvar(op_name)
  100. return int(os.environ.get(env_var, BUILD_OP_DEFAULT))
  101. compatible_ops = dict.fromkeys(ALL_OPS.keys(), False)
  102. install_ops = dict.fromkeys(ALL_OPS.keys(), False)
  103. for op_name, builder in ALL_OPS.items():
  104. op_compatible = builder.is_compatible()
  105. compatible_ops[op_name] = op_compatible
  106. # If op is requested but not available, throw an error
  107. if op_enabled(op_name) and not op_compatible:
  108. env_var = op_envvar(op_name)
  109. if env_var not in os.environ:
  110. builder.warning(f"One can disable {op_name} with {env_var}=0")
  111. abort(f"Unable to pre-compile {op_name}")
  112. # If op is compatible update install reqs so it can potentially build/run later
  113. if op_compatible:
  114. reqs = builder.python_requirements()
  115. install_requires += builder.python_requirements()
  116. # If op install enabled, add builder to extensions
  117. if op_enabled(op_name) and op_compatible:
  118. assert torch_available, f"Unable to pre-compile {op_name}, please first install torch"
  119. install_ops[op_name] = op_enabled(op_name)
  120. ext_modules.append(builder.builder())
  121. print(f'Install Ops={install_ops}')
  122. # Write out version/git info
  123. git_hash_cmd = "git rev-parse --short HEAD"
  124. git_branch_cmd = "git rev-parse --abbrev-ref HEAD"
  125. if command_exists('git') and 'DS_BUILD_STRING' not in os.environ:
  126. try:
  127. result = subprocess.check_output(git_hash_cmd, shell=True)
  128. git_hash = result.decode('utf-8').strip()
  129. result = subprocess.check_output(git_branch_cmd, shell=True)
  130. git_branch = result.decode('utf-8').strip()
  131. except subprocess.CalledProcessError:
  132. git_hash = "unknown"
  133. git_branch = "unknown"
  134. else:
  135. git_hash = "unknown"
  136. git_branch = "unknown"
  137. def create_dir_symlink(src, dest):
  138. if not os.path.islink(dest):
  139. if os.path.exists(dest):
  140. os.remove(dest)
  141. assert not os.path.exists(dest)
  142. os.symlink(src, dest)
  143. if sys.platform == "win32":
  144. # This creates a symbolic links on Windows.
  145. # It needs Administrator privilege to create symlinks on Windows.
  146. create_dir_symlink('..\\..\\csrc', '.\\deepspeed\\ops\\csrc')
  147. create_dir_symlink('..\\..\\op_builder', '.\\deepspeed\\ops\\op_builder')
  148. # Parse the DeepSpeed version string from version.txt
  149. version_str = open('version.txt', 'r').read().strip()
  150. # Build specifiers like .devX can be added at install time. Otherwise, add the git hash.
  151. # example: DS_BUILD_STR=".dev20201022" python setup.py sdist bdist_wheel
  152. # Building wheel for distribution, update version file
  153. if 'DS_BUILD_STRING' in os.environ:
  154. # Build string env specified, probably building for distribution
  155. with open('build.txt', 'w') as fd:
  156. fd.write(os.environ.get('DS_BUILD_STRING'))
  157. version_str += os.environ.get('DS_BUILD_STRING')
  158. elif os.path.isfile('build.txt'):
  159. # build.txt exists, probably installing from distribution
  160. with open('build.txt', 'r') as fd:
  161. version_str += fd.read().strip()
  162. else:
  163. # None of the above, probably installing from source
  164. version_str += f'+{git_hash}'
  165. torch_version = ".".join([TORCH_MAJOR, TORCH_MINOR])
  166. # Set cuda_version to 0.0 if cpu-only
  167. cuda_version = "0.0"
  168. # Set hip_version to 0.0 if cpu-only
  169. hip_version = "0.0"
  170. if torch_available and torch.version.cuda is not None:
  171. cuda_version = ".".join(torch.version.cuda.split('.')[:2])
  172. if torch_available and hasattr(torch.version, 'hip') and torch.version.hip is not None:
  173. hip_version = ".".join(torch.version.hip.split('.')[:2])
  174. torch_info = {
  175. "version": torch_version,
  176. "cuda_version": cuda_version,
  177. "hip_version": hip_version
  178. }
  179. print(f"version={version_str}, git_hash={git_hash}, git_branch={git_branch}")
  180. with open('deepspeed/git_version_info_installed.py', 'w') as fd:
  181. fd.write(f"version='{version_str}'\n")
  182. fd.write(f"git_hash='{git_hash}'\n")
  183. fd.write(f"git_branch='{git_branch}'\n")
  184. fd.write(f"installed_ops={install_ops}\n")
  185. fd.write(f"compatible_ops={compatible_ops}\n")
  186. fd.write(f"torch_info={torch_info}\n")
  187. print(f'install_requires={install_requires}')
  188. print(f'compatible_ops={compatible_ops}')
  189. print(f'ext_modules={ext_modules}')
  190. # Parse README.md to make long_description for PyPI page.
  191. thisdir = os.path.abspath(os.path.dirname(__file__))
  192. with open(os.path.join(thisdir, 'README.md'), encoding='utf-8') as fin:
  193. readme_text = fin.read()
  194. start_time = time.time()
  195. setup(name='deepspeed',
  196. version=version_str,
  197. description='DeepSpeed library',
  198. long_description=readme_text,
  199. long_description_content_type='text/markdown',
  200. author='DeepSpeed Team',
  201. author_email='deepspeed@microsoft.com',
  202. url='http://deepspeed.ai',
  203. install_requires=install_requires,
  204. extras_require=extras_require,
  205. packages=find_packages(exclude=["docker",
  206. "third_party"]),
  207. include_package_data=True,
  208. scripts=[
  209. 'bin/deepspeed',
  210. 'bin/deepspeed.pt',
  211. 'bin/ds',
  212. 'bin/ds_ssh',
  213. 'bin/ds_report',
  214. 'bin/ds_elastic'
  215. ],
  216. classifiers=[
  217. 'Programming Language :: Python :: 3.6',
  218. 'Programming Language :: Python :: 3.7',
  219. 'Programming Language :: Python :: 3.8'
  220. ],
  221. license='MIT',
  222. ext_modules=ext_modules,
  223. cmdclass=cmdclass)
  224. end_time = time.time()
  225. print(f'deepspeed build time = {end_time - start_time} secs')