test_e2e_squad.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import subprocess as sp
  5. import os
  6. from math import isclose
  7. import sys
  8. import pytest
  9. import json
  10. sys.path.append("../../../DeepSpeedExamples/BingBertSquad")
  11. import evaluate as eval
  12. squad_dir = "/data/BingBertSquad"
  13. base_dir = "../../../DeepSpeedExamples/BingBertSquad"
  14. script_file_name = "run_squad_deepspeed.sh"
  15. model_file_name = "training_state_checkpoint_162.tar"
  16. eval_file_name = "dev-v1.1.json"
  17. pred_file_name = "predictions.json"
  18. num_gpus = "4"
  19. timeout_sec = 5 * 60 * 60 # 5 hours
  20. eval_version = "1.1"
  21. def create_config_file(tmpdir, zeroenabled=False):
  22. config_dict = {
  23. "train_batch_size": 24,
  24. "train_micro_batch_size_per_gpu": 6,
  25. "steps_per_print": 10,
  26. "optimizer": {
  27. "type": "Adam",
  28. "params": {
  29. "lr": 3e-5,
  30. "weight_decay": 0.0,
  31. "bias_correction": False
  32. }
  33. },
  34. "gradient_clipping": 1.0,
  35. "fp16": {
  36. "enabled": True
  37. }
  38. }
  39. config_dict["zero_optimization"] = zeroenabled
  40. config_path = os.path.join(tmpdir, 'temp_config.json')
  41. with open(config_path, 'w') as fd:
  42. json.dump(config_dict, fd)
  43. return config_path
  44. def test_e2e_squad_deepspeed_base(tmpdir):
  45. config_file = create_config_file(tmpdir)
  46. # base run results => {"exact_match": 83.9829706717124, "f1": 90.71138132004097}
  47. expected_exact_match = 83.98
  48. expected_f1 = 90.71
  49. model_file = os.path.join(squad_dir, model_file_name)
  50. eval_file = os.path.join(squad_dir, eval_file_name)
  51. output_dir = os.path.join(tmpdir, "output")
  52. pred_file = os.path.join(output_dir, pred_file_name)
  53. proc = sp.Popen(["bash", script_file_name, num_gpus, model_file, squad_dir, output_dir, config_file], cwd=base_dir)
  54. try:
  55. proc.communicate(timeout=timeout_sec)
  56. if os.path.exists(pred_file):
  57. eval_result = eval.evaluate(eval_version, eval_file, pred_file)
  58. print("evaluation result: ", json.dumps(eval_result))
  59. assert isclose(eval_result["exact_match"], expected_exact_match, abs_tol=1e-2)
  60. assert isclose(eval_result["f1"], expected_f1, abs_tol=1e-2)
  61. else:
  62. pytest.fail("Error: Run Failed")
  63. except sp.TimeoutExpired:
  64. proc.kill()
  65. pytest.fail("Error: Timeout")
  66. except sp.CalledProcessError:
  67. pytest.fail("Error: Run Failed")
  68. def test_e2e_squad_deepspeed_zero(tmpdir):
  69. config_file = create_config_file(tmpdir, True)
  70. # base run results => {"exact_match": 84.1438032166509, "f1": 90.89776136505441}
  71. expected_exact_match = 84.14
  72. expected_f1 = 90.89
  73. model_file = os.path.join(squad_dir, model_file_name)
  74. eval_file = os.path.join(squad_dir, eval_file_name)
  75. output_dir = os.path.join(tmpdir, "output")
  76. pred_file = os.path.join(output_dir, pred_file_name)
  77. proc = sp.Popen(["bash", script_file_name, num_gpus, model_file, squad_dir, output_dir, config_file], cwd=base_dir)
  78. try:
  79. proc.communicate(timeout=timeout_sec)
  80. if os.path.exists(pred_file):
  81. eval_result = eval.evaluate(eval_version, eval_file, pred_file)
  82. print("evaluation result: ", json.dumps(eval_result))
  83. assert isclose(eval_result["exact_match"], expected_exact_match, abs_tol=1e-2)
  84. assert isclose(eval_result["f1"], expected_f1, abs_tol=1e-2)
  85. else:
  86. pytest.fail("Error: Run Failed")
  87. except sp.TimeoutExpired:
  88. proc.kill()
  89. pytest.fail("Error: Timeout")
  90. except sp.CalledProcessError:
  91. pytest.fail("Error: Run Failed")