env_report.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. '''Copyright The Microsoft DeepSpeed Team'''
  2. import torch
  3. import deepspeed
  4. import subprocess
  5. import argparse
  6. from .ops.op_builder.all_ops import ALL_OPS
  7. from .git_version_info import installed_ops, torch_info
  8. from deepspeed.accelerator import get_accelerator
  9. GREEN = '\033[92m'
  10. RED = '\033[91m'
  11. YELLOW = '\033[93m'
  12. END = '\033[0m'
  13. SUCCESS = f"{GREEN} [SUCCESS] {END}"
  14. OKAY = f"{GREEN}[OKAY]{END}"
  15. WARNING = f"{YELLOW}[WARNING]{END}"
  16. FAIL = f'{RED}[FAIL]{END}'
  17. INFO = '[INFO]'
  18. color_len = len(GREEN) + len(END)
  19. okay = f"{GREEN}[OKAY]{END}"
  20. warning = f"{YELLOW}[WARNING]{END}"
  21. def op_report(verbose=True):
  22. max_dots = 23
  23. max_dots2 = 11
  24. h = ["op name", "installed", "compatible"]
  25. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  26. print("DeepSpeed C++/CUDA extension op report")
  27. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  28. print("NOTE: Ops not installed will be just-in-time (JIT) compiled at\n"
  29. " runtime if needed. Op compatibility means that your system\n"
  30. " meet the required dependencies to JIT install the op.")
  31. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  32. print("JIT compiled ops requires ninja")
  33. ninja_status = OKAY if ninja_installed() else FAIL
  34. print('ninja', "." * (max_dots - 5), ninja_status)
  35. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  36. print(h[0], "." * (max_dots - len(h[0])), h[1], "." * (max_dots2 - len(h[1])), h[2])
  37. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  38. installed = f"{GREEN}[YES]{END}"
  39. no = f"{YELLOW}[NO]{END}"
  40. for op_name, builder in ALL_OPS.items():
  41. dots = "." * (max_dots - len(op_name))
  42. is_compatible = OKAY if builder.is_compatible(verbose) else no
  43. is_installed = installed if installed_ops[op_name] else no
  44. dots2 = '.' * ((len(h[1]) + (max_dots2 - len(h[1]))) -
  45. (len(is_installed) - color_len))
  46. print(op_name, dots, is_installed, dots2, is_compatible)
  47. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  48. def ninja_installed():
  49. try:
  50. import ninja # noqa: F401
  51. except ImportError:
  52. return False
  53. return True
  54. def nvcc_version():
  55. import torch.utils.cpp_extension
  56. cuda_home = torch.utils.cpp_extension.CUDA_HOME
  57. if cuda_home is None:
  58. return f"{RED} [FAIL] cannot find CUDA_HOME via torch.utils.cpp_extension.CUDA_HOME={torch.utils.cpp_extension.CUDA_HOME} {END}"
  59. try:
  60. output = subprocess.check_output([cuda_home + "/bin/nvcc",
  61. "-V"],
  62. universal_newlines=True)
  63. except FileNotFoundError:
  64. return f"{RED} [FAIL] nvcc missing {END}"
  65. output_split = output.split()
  66. release_idx = output_split.index("release")
  67. release = output_split[release_idx + 1].replace(',', '').split(".")
  68. return ".".join(release)
  69. def debug_report():
  70. max_dots = 33
  71. report = [
  72. ("torch install path",
  73. torch.__path__),
  74. ("torch version",
  75. torch.__version__),
  76. ("deepspeed install path",
  77. deepspeed.__path__),
  78. ("deepspeed info",
  79. f"{deepspeed.__version__}, {deepspeed.__git_hash__}, {deepspeed.__git_branch__}"
  80. )
  81. ]
  82. if get_accelerator().device_name() == 'cuda':
  83. hip_version = getattr(torch.version, "hip", None)
  84. report.extend([("torch cuda version",
  85. torch.version.cuda),
  86. ("torch hip version",
  87. hip_version),
  88. ("nvcc version",
  89. (None if hip_version else nvcc_version())),
  90. ("deepspeed wheel compiled w.",
  91. f"torch {torch_info['version']}, " +
  92. (f"hip {torch_info['hip_version']}"
  93. if hip_version else f"cuda {torch_info['cuda_version']}"))])
  94. else:
  95. report.extend([("deepspeed wheel compiled w.",
  96. f"torch {torch_info['version']} ")])
  97. print("DeepSpeed general environment info:")
  98. for name, value in report:
  99. print(name, "." * (max_dots - len(name)), value)
  100. def parse_arguments():
  101. parser = argparse.ArgumentParser()
  102. parser.add_argument(
  103. '--hide_operator_status',
  104. action='store_true',
  105. help=
  106. 'Suppress display of installation and compatibility statuses of DeepSpeed operators. '
  107. )
  108. parser.add_argument('--hide_errors_and_warnings',
  109. action='store_true',
  110. help='Suppress warning and error messages.')
  111. args = parser.parse_args()
  112. return args
  113. def main(hide_operator_status=False, hide_errors_and_warnings=False):
  114. if not hide_operator_status:
  115. op_report(verbose=not hide_errors_and_warnings)
  116. debug_report()
  117. def cli_main():
  118. args = parse_arguments()
  119. main(hide_operator_status=args.hide_operator_status,
  120. hide_errors_and_warnings=args.hide_errors_and_warnings)
  121. if __name__ == "__main__":
  122. main()