async_io.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import distutils.spawn
  5. import subprocess
  6. from .builder import NPUOpBuilder
  7. class AsyncIOBuilder(NPUOpBuilder):
  8. BUILD_VAR = "DS_BUILD_AIO"
  9. NAME = "async_io"
  10. def __init__(self):
  11. super().__init__(name=self.NAME)
  12. def absolute_name(self):
  13. return f'deepspeed.ops.aio.{self.NAME}_op'
  14. def sources(self):
  15. return [
  16. 'csrc/aio/py_lib/deepspeed_py_copy.cpp', 'csrc/aio/py_lib/py_ds_aio.cpp',
  17. 'csrc/aio/py_lib/deepspeed_py_aio.cpp', 'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp',
  18. 'csrc/aio/py_lib/deepspeed_aio_thread.cpp', 'csrc/aio/common/deepspeed_aio_utils.cpp',
  19. 'csrc/aio/common/deepspeed_aio_common.cpp', 'csrc/aio/common/deepspeed_aio_types.cpp',
  20. 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp'
  21. ]
  22. def include_paths(self):
  23. args = super().include_paths()
  24. args += ['csrc/aio/py_lib', 'csrc/aio/common']
  25. return args
  26. def cxx_args(self):
  27. args = super().cxx_args()
  28. # -O0 for improved debugging, since performance is bound by I/O
  29. CPU_ARCH = self.cpu_arch()
  30. SIMD_WIDTH = self.simd_width()
  31. import torch # Keep this import here to avoid errors when building DeepSpeed wheel without torch installed
  32. TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[0:2])
  33. if TORCH_MAJOR >= 2 and TORCH_MINOR >= 1:
  34. CPP_STD = '-std=c++17'
  35. else:
  36. CPP_STD = '-std=c++14'
  37. return args + [
  38. '-g',
  39. '-Wall',
  40. '-O0',
  41. CPP_STD,
  42. '-shared',
  43. '-fPIC',
  44. '-Wno-reorder',
  45. CPU_ARCH,
  46. '-fopenmp',
  47. SIMD_WIDTH,
  48. '-laio',
  49. ]
  50. def extra_ldflags(self):
  51. args = super().extra_ldflags()
  52. return args + ['-laio']
  53. def check_for_libaio_pkg(self):
  54. libs = dict(
  55. dpkg=["-l", "libaio-dev", "apt"],
  56. pacman=["-Q", "libaio", "pacman"],
  57. rpm=["-q", "libaio-devel", "yum"],
  58. )
  59. found = False
  60. for pkgmgr, data in libs.items():
  61. flag, lib, tool = data
  62. path = distutils.spawn.find_executable(pkgmgr)
  63. if path is not None:
  64. cmd = f"{pkgmgr} {flag} {lib}"
  65. result = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
  66. if result.wait() == 0:
  67. found = True
  68. else:
  69. self.warning(f"{self.NAME}: please install the {lib} package with {tool}")
  70. break
  71. return found
  72. def is_compatible(self, verbose=True):
  73. # Check for the existence of libaio by using distutils
  74. # to compile and link a test program that calls io_submit,
  75. # which is a function provided by libaio that is used in the async_io op.
  76. # If needed, one can define -I and -L entries in CFLAGS and LDFLAGS
  77. # respectively to specify the directories for libaio.h and libaio.so.
  78. aio_compatible = self.has_function('io_pgetevents', ('aio', ))
  79. if verbose and not aio_compatible:
  80. self.warning(f"{self.NAME} requires the dev libaio .so object and headers but these were not found.")
  81. # Check for the libaio package via known package managers
  82. # to print suggestions on which package to install.
  83. self.check_for_libaio_pkg()
  84. self.warning(
  85. "If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found."
  86. )
  87. return super().is_compatible(verbose) and aio_compatible