run_perf_baseline.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # coding=utf-8
  2. # Copyright (c) 2019, The Microsoft DeepSpeed Team. All rights reserved.
  3. #
  4. # Note: please copy webtext data to "Megatron-LM" folder, before running this script.
  5. import unittest
  6. import subprocess
  7. import os
  8. import time
  9. import re
  10. from test_common import BaseTestCase
  11. class GPT2PerfBaselineTestCase(BaseTestCase):
  12. def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
  13. super(GPT2PerfBaselineTestCase, self).__init__(methodName)
  14. def test_perf_1_5B(self):
  15. test_config = {
  16. "mp": 2,
  17. "gpus": 16,
  18. "nodes": 4,
  19. "bs": 16,
  20. "steps": 100,
  21. "layers": 48,
  22. "hidden_size": 1600,
  23. "seq_length": 1024,
  24. "heads": 16,
  25. "deepspeed": False,
  26. }
  27. self.run_test(test_config)
  28. def test_perf_4B(self):
  29. test_config = {
  30. "mp": 4,
  31. "gpus": 16,
  32. "nodes": 4,
  33. "bs": 8,
  34. "steps": 100,
  35. "layers": 64,
  36. "hidden_size": 2304,
  37. "seq_length": 1024,
  38. "heads": 16,
  39. "deepspeed": False,
  40. }
  41. self.run_test(test_config)
  42. def test_perf_8B(self):
  43. test_config = {
  44. "mp": 4,
  45. "gpus": 16,
  46. "nodes": 4,
  47. "bs": 8,
  48. "steps": 100,
  49. "layers": 72,
  50. "hidden_size": 3072,
  51. "seq_length": 1024,
  52. "heads": 24,
  53. "deepspeed": False,
  54. }
  55. self.run_test(test_config)
  56. def test_perf_20B(self):
  57. test_config = {
  58. "mp": 16,
  59. "gpus": 16,
  60. "nodes": 4,
  61. "bs": 4,
  62. "steps": 50,
  63. "layers": 111,
  64. "hidden_size": 3808,
  65. "seq_length": 1024,
  66. "heads": 32,
  67. "ckpt_num_layers": 1,
  68. "deepspeed": False,
  69. }
  70. self.run_test(test_config)
  71. def run_test(self, test_config):
  72. print("\n")
  73. print("{0}: starting......".format(self.id()))
  74. prefix = "gpt2_perf"
  75. test_file = self.gen_output_name(test_config, prefix)
  76. self.run_gpt2_test(test_config, test_file)
  77. exec_time = self.grep_latency_from_file(test_file)
  78. if exec_time == 0.0:
  79. print("{0}: no latency found in file {1}".format(self.id(), test_file))
  80. else:
  81. print("{0}: execution time per iteration is {1}ms.".format(
  82. self.id(),
  83. exec_time))
  84. def grep_latency_from_file(self, file_name):
  85. latency = 0.0
  86. count = 0
  87. with open(file_name, 'r') as f:
  88. lines = f.readlines()
  89. line_filter = "elapsed time per iteration"
  90. match_number = re.compile(
  91. 'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)'
  92. )
  93. for line in lines:
  94. if line_filter in line:
  95. ms_per_iter = re.findall(match_number, line)
  96. latency += float(ms_per_iter[0])
  97. count += 1
  98. if count > 0:
  99. latency /= count
  100. return latency
  101. def suite():
  102. suite = unittest.TestSuite()
  103. suite.addTest(GPT2PerfBaselineTestCase('test_perf_1_5B'))
  104. suite.addTest(GPT2PerfBaselineTestCase('test_perf_4B'))
  105. suite.addTest(GPT2PerfBaselineTestCase('test_perf_8B'))
  106. suite.addTest(GPT2PerfBaselineTestCase('test_perf_20B'))
  107. return suite
  108. if __name__ == '__main__':
  109. runner = unittest.TextTestRunner(failfast=True)
  110. runner.run(suite())