test_common.py 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # coding=utf-8
  2. # Copyright (c) 2019, The Microsoft DeepSpeed Team. All rights reserved.
  3. #
  4. import unittest
  5. import subprocess
  6. import os
  7. import time
  8. import re
  9. class BaseTestCase(unittest.TestCase):
  10. def __init__(self, methodName="DeepSpeed performance test"):
  11. super(BaseTestCase, self).__init__(methodName)
  12. self.test_dir = "./test"
  13. self.baseline_dir = "./baseline"
  14. self.timestr = time.strftime("%Y%m%d-%H%M%S")
  15. def gen_output_name(self, test_config, prefix, baseline_config=False):
  16. other_args = test_config["other_args"] if "other_args" in test_config else ""
  17. zero_args = "_zero" if "zero" in test_config and test_config["zero"] else ""
  18. other_args = other_args.strip(' -\\').replace(" ", "").replace("\"", "")
  19. if other_args:
  20. other_args = "_" + other_args
  21. if test_config["deepspeed"] and not baseline_config:
  22. file_name = "_mp{0}_gpu{1}_node{2}_bs{3}_step{4}_layer{5}_hidden{6}_seq{7}_head{8}{9}_ds{10}-{11}.log".format(
  23. test_config["mp"],
  24. test_config["gpus"],
  25. test_config["nodes"],
  26. test_config["bs"],
  27. test_config["steps"],
  28. test_config["layers"],
  29. test_config["hidden_size"],
  30. test_config["seq_length"],
  31. test_config["heads"],
  32. other_args,
  33. zero_args,
  34. self.timestr)
  35. save_dir = self.test_dir
  36. else:
  37. file_name = "_mp{0}_gpu{1}_node{2}_bs{3}_step{4}_layer{5}_hidden{6}_seq{7}_head{8}{9}.log".format(
  38. test_config["mp"],
  39. test_config["gpus"],
  40. test_config["nodes"],
  41. test_config["bs"],
  42. test_config["steps"],
  43. test_config["layers"],
  44. test_config["hidden_size"],
  45. test_config["seq_length"],
  46. test_config["heads"],
  47. other_args)
  48. save_dir = self.baseline_dir
  49. return os.path.join(save_dir, prefix + file_name)
  50. def ensure_directory_exists(self, filename):
  51. dirname = os.path.dirname(filename)
  52. if not os.path.exists(dirname):
  53. os.makedirs(dirname)
  54. def clean_test_env(self):
  55. cmd = "dlts_ssh pkill -9 -f /usr/bin/python"
  56. print(cmd)
  57. subprocess.run(cmd, shell=True, check=False, executable='/bin/bash')
  58. time.sleep(20)
  59. def run_gpt2_test(self, test_config, output):
  60. ds_flag = "-d " + test_config["json"] if test_config["deepspeed"] else ""
  61. ckpt_num = test_config[
  62. "ckpt_num_layers"] if "ckpt_num_layers" in test_config else 1
  63. other_args = "-o " + test_config[
  64. "other_args"] if "other_args" in test_config else ""
  65. cmd = "./ds_gpt2_test.sh -m {0} -g {1} -n {2} -b {3} -s {4} -l {5} -h {6} -q {7} -e {8} -c {9} {10} {11}".format(
  66. test_config["mp"],
  67. test_config["gpus"],
  68. test_config["nodes"],
  69. test_config["bs"],
  70. test_config["steps"],
  71. test_config["layers"],
  72. test_config["hidden_size"],
  73. test_config["seq_length"],
  74. test_config["heads"],
  75. ckpt_num,
  76. other_args,
  77. ds_flag)
  78. self.ensure_directory_exists(output)
  79. with open(output, "w") as f:
  80. print(cmd)
  81. subprocess.run(cmd,
  82. shell=True,
  83. check=False,
  84. executable='/bin/bash',
  85. stdout=f,
  86. stderr=f)