test_common.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import unittest
  5. import subprocess
  6. import os
  7. import time
  8. import shlex
  9. class BaseTestCase(unittest.TestCase):
  10. def __init__(self, methodName="DeepSpeed performance test"):
  11. super(BaseTestCase, self).__init__(methodName)
  12. self.test_dir = "./test"
  13. self.baseline_dir = "./baseline"
  14. self.timestr = time.strftime("%Y%m%d-%H%M%S")
  15. def gen_output_name(self, test_config, prefix, baseline_config=False):
  16. other_args = test_config["other_args"] if "other_args" in test_config else ""
  17. zero_args = "_zero" if "zero" in test_config and test_config["zero"] else ""
  18. other_args = other_args.strip(' -\\').replace(" ", "").replace("\"", "")
  19. if other_args:
  20. other_args = "_" + other_args
  21. if test_config["deepspeed"] and not baseline_config:
  22. file_name = "_mp{0}_gpu{1}_node{2}_bs{3}_step{4}_layer{5}_hidden{6}_seq{7}_head{8}{9}_ds{10}-{11}.log".format(
  23. test_config["mp"], test_config["gpus"], test_config["nodes"], test_config["bs"], test_config["steps"],
  24. test_config["layers"], test_config["hidden_size"], test_config["seq_length"], test_config["heads"],
  25. other_args, zero_args, self.timestr)
  26. save_dir = self.test_dir
  27. else:
  28. file_name = "_mp{0}_gpu{1}_node{2}_bs{3}_step{4}_layer{5}_hidden{6}_seq{7}_head{8}{9}.log".format(
  29. test_config["mp"], test_config["gpus"], test_config["nodes"], test_config["bs"], test_config["steps"],
  30. test_config["layers"], test_config["hidden_size"], test_config["seq_length"], test_config["heads"],
  31. other_args)
  32. save_dir = self.baseline_dir
  33. return os.path.join(save_dir, prefix + file_name)
  34. def ensure_directory_exists(self, filename):
  35. dirname = os.path.dirname(filename)
  36. if not os.path.exists(dirname):
  37. os.makedirs(dirname)
  38. def clean_test_env(self):
  39. cmd = shlex.split("dlts_ssh pkill -9 -f /usr/bin/python")
  40. print(cmd)
  41. subprocess.run(cmd, check=False, executable='/bin/bash')
  42. time.sleep(20)
  43. def run_gpt2_test(self, test_config, output):
  44. ds_flag = "-d " + test_config["json"] if test_config["deepspeed"] else ""
  45. ckpt_num = test_config["ckpt_num_layers"] if "ckpt_num_layers" in test_config else 1
  46. other_args = "-o " + test_config["other_args"] if "other_args" in test_config else ""
  47. cmd = "./ds_gpt2_test.sh -m {0} -g {1} -n {2} -b {3} -s {4} -l {5} -h {6} -q {7} -e {8} -c {9} {10} {11}".format(
  48. test_config["mp"], test_config["gpus"], test_config["nodes"], test_config["bs"], test_config["steps"],
  49. test_config["layers"], test_config["hidden_size"], test_config["seq_length"], test_config["heads"],
  50. ckpt_num, other_args, ds_flag)
  51. cmd = shlex.split(cmd)
  52. self.ensure_directory_exists(output)
  53. with open(output, "w") as f:
  54. print(cmd)
  55. subprocess.run(cmd, check=False, executable='/bin/bash', stdout=f, stderr=f)