run_perf_test.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. """
  5. Note: please copy webtext data to "Megatron-LM" folder, before running this script.
  6. """
  7. import unittest
  8. import re
  9. from test_common import BaseTestCase
  10. class GPT2PerfTestCase(BaseTestCase):
  11. def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
  12. super(GPT2PerfTestCase, self).__init__(methodName)
  13. def test_perf_1_5B(self):
  14. test_config = {
  15. "mp": 1,
  16. "gpus": 16,
  17. "nodes": 4,
  18. "bs": 32,
  19. "steps": 100,
  20. "layers": 48,
  21. "hidden_size": 1600,
  22. "seq_length": 1024,
  23. "heads": 16,
  24. "deepspeed": True,
  25. "json": "ds_config_perf_bs32.json",
  26. }
  27. self.run_test(test_config)
  28. def test_perf_4B(self):
  29. test_config = {
  30. "mp": 1,
  31. "gpus": 16,
  32. "nodes": 4,
  33. "bs": 8,
  34. "steps": 100,
  35. "layers": 64,
  36. "hidden_size": 2304,
  37. "seq_length": 1024,
  38. "heads": 16,
  39. "deepspeed": True,
  40. "json": "ds_config_perf_bs8.json",
  41. }
  42. self.run_test(test_config)
  43. def test_perf_8B(self):
  44. test_config = {
  45. "mp": 2,
  46. "gpus": 16,
  47. "nodes": 4,
  48. "bs": 16,
  49. "steps": 100,
  50. "layers": 72,
  51. "hidden_size": 3072,
  52. "seq_length": 1024,
  53. "heads": 24,
  54. "deepspeed": True,
  55. "json": "ds_config_perf_bs16.json",
  56. }
  57. self.run_test(test_config)
  58. def test_perf_20B(self):
  59. test_config = {
  60. "mp": 4,
  61. "gpus": 16,
  62. "nodes": 4,
  63. "bs": 8,
  64. "steps": 50,
  65. "layers": 111,
  66. "hidden_size": 3808,
  67. "seq_length": 1024,
  68. "heads": 32,
  69. "ckpt_num_layers": 1,
  70. "deepspeed": True,
  71. "json": "ds_config_perf_bs8.json",
  72. }
  73. self.run_test(test_config)
  74. def run_test(self, test_config):
  75. print("\n")
  76. print("{0}: starting......".format(self.id()))
  77. prefix = "gpt2_perf"
  78. test_file = self.gen_output_name(test_config, prefix)
  79. self.run_gpt2_test(test_config, test_file)
  80. exec_time = self.grep_latency_from_file(test_file)
  81. if exec_time == 0.0:
  82. print("{0}: no latency found in file {1}".format(self.id(), test_file))
  83. else:
  84. print("{0}: execution time per iteration is {1}ms.".format(self.id(), exec_time))
  85. def grep_latency_from_file(self, file_name):
  86. latency = 0.0
  87. count = 0
  88. with open(file_name, 'r') as f:
  89. lines = f.readlines()
  90. line_filter = "elapsed time per iteration"
  91. match_number = re.compile(r'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)')
  92. for line in lines:
  93. if line_filter in line:
  94. ms_per_iter = re.findall(match_number, line)
  95. latency += float(ms_per_iter[0])
  96. count += 1
  97. if count > 0:
  98. latency /= count
  99. return latency
  100. def suite():
  101. suite = unittest.TestSuite()
  102. suite.addTest(GPT2PerfTestCase('test_perf_1_5B'))
  103. suite.addTest(GPT2PerfTestCase('test_perf_4B'))
  104. suite.addTest(GPT2PerfTestCase('test_perf_8B'))
  105. suite.addTest(GPT2PerfTestCase('test_perf_20B'))
  106. return suite
  107. if __name__ == '__main__':
  108. runner = unittest.TextTestRunner(failfast=True)
  109. runner.run(suite())