env_report.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import torch
  2. import deepspeed
  3. import subprocess
  4. from .ops.op_builder import ALL_OPS
  5. from .git_version_info import installed_ops, torch_info
  6. from .ops import __compatible_ops__ as compatible_ops
  7. GREEN = '\033[92m'
  8. RED = '\033[91m'
  9. YELLOW = '\033[93m'
  10. END = '\033[0m'
  11. SUCCESS = f"{GREEN} [SUCCESS] {END}"
  12. OKAY = f"{GREEN}[OKAY]{END}"
  13. WARNING = f"{YELLOW}[WARNING]{END}"
  14. FAIL = f'{RED}[FAIL]{END}'
  15. INFO = '[INFO]'
  16. color_len = len(GREEN) + len(END)
  17. okay = f"{GREEN}[OKAY]{END}"
  18. warning = f"{YELLOW}[WARNING]{END}"
  19. def op_report():
  20. max_dots = 23
  21. max_dots2 = 11
  22. h = ["op name", "installed", "compatible"]
  23. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  24. print("DeepSpeed C++/CUDA extension op report")
  25. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  26. print("NOTE: Ops not installed will be just-in-time (JIT) compiled at\n"
  27. " runtime if needed. Op compatibility means that your system\n"
  28. " meet the required dependencies to JIT install the op.")
  29. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  30. print("JIT compiled ops requires ninja")
  31. ninja_status = OKAY if ninja_installed() else FAIL
  32. print('ninja', "." * (max_dots - 5), ninja_status)
  33. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  34. print(h[0], "." * (max_dots - len(h[0])), h[1], "." * (max_dots2 - len(h[1])), h[2])
  35. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  36. installed = f"{GREEN}[YES]{END}"
  37. no = f"{YELLOW}[NO]{END}"
  38. for op_name, builder in ALL_OPS.items():
  39. dots = "." * (max_dots - len(op_name))
  40. is_compatible = OKAY if builder.is_compatible() else no
  41. is_installed = installed if installed_ops[op_name] else no
  42. dots2 = '.' * ((len(h[1]) + (max_dots2 - len(h[1]))) -
  43. (len(is_installed) - color_len))
  44. print(op_name, dots, is_installed, dots2, is_compatible)
  45. print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
  46. def ninja_installed():
  47. try:
  48. import ninja
  49. except ImportError:
  50. return False
  51. return True
  52. def nvcc_version():
  53. import torch.utils.cpp_extension
  54. cuda_home = torch.utils.cpp_extension.CUDA_HOME
  55. if cuda_home is None:
  56. return f"{RED} [FAIL] cannot find CUDA_HOME via torch.utils.cpp_extension.CUDA_HOME={torch.utils.cpp_extension.CUDA_HOME} {END}"
  57. try:
  58. output = subprocess.check_output([cuda_home + "/bin/nvcc",
  59. "-V"],
  60. universal_newlines=True)
  61. except FileNotFoundError:
  62. return f"{RED} [FAIL] nvcc missing {END}"
  63. output_split = output.split()
  64. release_idx = output_split.index("release")
  65. release = output_split[release_idx + 1].replace(',', '').split(".")
  66. return ".".join(release)
  67. def debug_report():
  68. max_dots = 33
  69. report = [
  70. ("torch install path",
  71. torch.__path__),
  72. ("torch version",
  73. torch.__version__),
  74. ("torch cuda version",
  75. torch.version.cuda),
  76. ("nvcc version",
  77. nvcc_version()),
  78. ("deepspeed install path",
  79. deepspeed.__path__),
  80. ("deepspeed info",
  81. f"{deepspeed.__version__}, {deepspeed.__git_hash__}, {deepspeed.__git_branch__}"
  82. ),
  83. ("deepspeed wheel compiled w.",
  84. f"torch {torch_info['version']}, cuda {torch_info['cuda_version']}"),
  85. ]
  86. print("DeepSpeed general environment info:")
  87. for name, value in report:
  88. print(name, "." * (max_dots - len(name)), value)
  89. def main():
  90. op_report()
  91. debug_report()
  92. if __name__ == "__main__":
  93. main()