run_perf_test.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # coding=utf-8
  2. # Copyright (c) 2019, The Microsoft DeepSpeed Team. All rights reserved.
  3. #
  4. # Note: please copy webtext data to "Megatron-LM" folder, before running this script.
  5. import unittest
  6. import subprocess
  7. import os
  8. import time
  9. import re
  10. from test_common import BaseTestCase
  11. class GPT2PerfTestCase(BaseTestCase):
  12. def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
  13. super(GPT2PerfTestCase, self).__init__(methodName)
  14. def test_perf_1_5B(self):
  15. test_config = {
  16. "mp": 1,
  17. "gpus": 16,
  18. "nodes": 4,
  19. "bs": 32,
  20. "steps": 100,
  21. "layers": 48,
  22. "hidden_size": 1600,
  23. "seq_length": 1024,
  24. "heads": 16,
  25. "deepspeed": True,
  26. "json": "ds_config_perf_bs32.json",
  27. }
  28. self.run_test(test_config)
  29. def test_perf_4B(self):
  30. test_config = {
  31. "mp": 1,
  32. "gpus": 16,
  33. "nodes": 4,
  34. "bs": 8,
  35. "steps": 100,
  36. "layers": 64,
  37. "hidden_size": 2304,
  38. "seq_length": 1024,
  39. "heads": 16,
  40. "deepspeed": True,
  41. "json": "ds_config_perf_bs8.json",
  42. }
  43. self.run_test(test_config)
  44. def test_perf_8B(self):
  45. test_config = {
  46. "mp": 2,
  47. "gpus": 16,
  48. "nodes": 4,
  49. "bs": 16,
  50. "steps": 100,
  51. "layers": 72,
  52. "hidden_size": 3072,
  53. "seq_length": 1024,
  54. "heads": 24,
  55. "deepspeed": True,
  56. "json": "ds_config_perf_bs16.json",
  57. }
  58. self.run_test(test_config)
  59. def test_perf_20B(self):
  60. test_config = {
  61. "mp": 4,
  62. "gpus": 16,
  63. "nodes": 4,
  64. "bs": 8,
  65. "steps": 50,
  66. "layers": 111,
  67. "hidden_size": 3808,
  68. "seq_length": 1024,
  69. "heads": 32,
  70. "ckpt_num_layers": 1,
  71. "deepspeed": True,
  72. "json": "ds_config_perf_bs8.json",
  73. }
  74. self.run_test(test_config)
  75. def run_test(self, test_config):
  76. print("\n")
  77. print("{0}: starting......".format(self.id()))
  78. prefix = "gpt2_perf"
  79. test_file = self.gen_output_name(test_config, prefix)
  80. self.run_gpt2_test(test_config, test_file)
  81. exec_time = self.grep_latency_from_file(test_file)
  82. if exec_time == 0.0:
  83. print("{0}: no latency found in file {1}".format(self.id(), test_file))
  84. else:
  85. print("{0}: execution time per iteration is {1}ms.".format(
  86. self.id(),
  87. exec_time))
  88. def grep_latency_from_file(self, file_name):
  89. latency = 0.0
  90. count = 0
  91. with open(file_name, 'r') as f:
  92. lines = f.readlines()
  93. line_filter = "elapsed time per iteration"
  94. match_number = re.compile(
  95. 'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)'
  96. )
  97. for line in lines:
  98. if line_filter in line:
  99. ms_per_iter = re.findall(match_number, line)
  100. latency += float(ms_per_iter[0])
  101. count += 1
  102. if count > 0:
  103. latency /= count
  104. return latency
  105. def suite():
  106. suite = unittest.TestSuite()
  107. suite.addTest(GPT2PerfTestCase('test_perf_1_5B'))
  108. suite.addTest(GPT2PerfTestCase('test_perf_4B'))
  109. suite.addTest(GPT2PerfTestCase('test_perf_8B'))
  110. suite.addTest(GPT2PerfTestCase('test_perf_20B'))
  111. return suite
  112. if __name__ == '__main__':
  113. runner = unittest.TextTestRunner(failfast=True)
  114. runner.run(suite())