run_perf_baseline.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. """
  5. Note: please copy webtext data to "Megatron-LM" folder, before running this script.
  6. """
  7. import unittest
  8. import re
  9. from test_common import BaseTestCase
  10. class GPT2PerfBaselineTestCase(BaseTestCase):
  11. def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
  12. super(GPT2PerfBaselineTestCase, self).__init__(methodName)
  13. def test_perf_1_5B(self):
  14. test_config = {
  15. "mp": 2,
  16. "gpus": 16,
  17. "nodes": 4,
  18. "bs": 16,
  19. "steps": 100,
  20. "layers": 48,
  21. "hidden_size": 1600,
  22. "seq_length": 1024,
  23. "heads": 16,
  24. "deepspeed": False,
  25. }
  26. self.run_test(test_config)
  27. def test_perf_4B(self):
  28. test_config = {
  29. "mp": 4,
  30. "gpus": 16,
  31. "nodes": 4,
  32. "bs": 8,
  33. "steps": 100,
  34. "layers": 64,
  35. "hidden_size": 2304,
  36. "seq_length": 1024,
  37. "heads": 16,
  38. "deepspeed": False,
  39. }
  40. self.run_test(test_config)
  41. def test_perf_8B(self):
  42. test_config = {
  43. "mp": 4,
  44. "gpus": 16,
  45. "nodes": 4,
  46. "bs": 8,
  47. "steps": 100,
  48. "layers": 72,
  49. "hidden_size": 3072,
  50. "seq_length": 1024,
  51. "heads": 24,
  52. "deepspeed": False,
  53. }
  54. self.run_test(test_config)
  55. def test_perf_20B(self):
  56. test_config = {
  57. "mp": 16,
  58. "gpus": 16,
  59. "nodes": 4,
  60. "bs": 4,
  61. "steps": 50,
  62. "layers": 111,
  63. "hidden_size": 3808,
  64. "seq_length": 1024,
  65. "heads": 32,
  66. "ckpt_num_layers": 1,
  67. "deepspeed": False,
  68. }
  69. self.run_test(test_config)
  70. def run_test(self, test_config):
  71. print("\n")
  72. print("{0}: starting......".format(self.id()))
  73. prefix = "gpt2_perf"
  74. test_file = self.gen_output_name(test_config, prefix)
  75. self.run_gpt2_test(test_config, test_file)
  76. exec_time = self.grep_latency_from_file(test_file)
  77. if exec_time == 0.0:
  78. print("{0}: no latency found in file {1}".format(self.id(), test_file))
  79. else:
  80. print("{0}: execution time per iteration is {1}ms.".format(self.id(), exec_time))
  81. def grep_latency_from_file(self, file_name):
  82. latency = 0.0
  83. count = 0
  84. with open(file_name, 'r') as f:
  85. lines = f.readlines()
  86. line_filter = "elapsed time per iteration"
  87. match_number = re.compile(r'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)')
  88. for line in lines:
  89. if line_filter in line:
  90. ms_per_iter = re.findall(match_number, line)
  91. latency += float(ms_per_iter[0])
  92. count += 1
  93. if count > 0:
  94. latency /= count
  95. return latency
  96. def suite():
  97. suite = unittest.TestSuite()
  98. suite.addTest(GPT2PerfBaselineTestCase('test_perf_1_5B'))
  99. suite.addTest(GPT2PerfBaselineTestCase('test_perf_4B'))
  100. suite.addTest(GPT2PerfBaselineTestCase('test_perf_8B'))
  101. suite.addTest(GPT2PerfBaselineTestCase('test_perf_20B'))
  102. return suite
  103. if __name__ == '__main__':
  104. runner = unittest.TextTestRunner(failfast=True)
  105. runner.run(suite())