123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- import subprocess as sp
- import datetime
- import os
- from math import isclose
- import sys
- import pytest
- import json
- import argparse
- sys.path.append("../../../DeepSpeedExamples/BingBertSquad")
- import evaluate as eval
- squad_dir = "/data/BingBertSquad"
- base_dir = "../../../DeepSpeedExamples/BingBertSquad"
- script_file_name = "run_squad_deepspeed.sh"
- model_file_name = "training_state_checkpoint_162.tar"
- eval_file_name = "dev-v1.1.json"
- pred_file_name = "predictions.json"
- num_gpus = "4"
- timeout_sec = 5 * 60 * 60 # 5 hours
- eval_version = "1.1"
- def create_config_file(tmpdir, zeroenabled=False):
- config_dict = {
- "train_batch_size": 24,
- "train_micro_batch_size_per_gpu": 6,
- "steps_per_print": 10,
- "optimizer": {
- "type": "Adam",
- "params": {
- "lr": 3e-5,
- "weight_decay": 0.0,
- "bias_correction": False
- }
- },
- "gradient_clipping": 1.0,
- "fp16": {
- "enabled": True
- }
- }
- config_dict["zero_optimization"] = zeroenabled
- config_path = os.path.join(tmpdir, 'temp_config.json')
- with open(config_path, 'w') as fd:
- json.dump(config_dict, fd)
- return config_path
- def test_e2e_squad_deepspeed_base(tmpdir):
- config_file = create_config_file(tmpdir)
- # base run results => {"exact_match": 83.9829706717124, "f1": 90.71138132004097}
- expected_exact_match = 83.98
- expected_f1 = 90.71
- model_file = os.path.join(squad_dir, model_file_name)
- eval_file = os.path.join(squad_dir, eval_file_name)
- output_dir = os.path.join(tmpdir, "output")
- pred_file = os.path.join(output_dir, pred_file_name)
- proc = sp.Popen([
- "bash",
- script_file_name,
- num_gpus,
- model_file,
- squad_dir,
- output_dir,
- config_file
- ],
- cwd=base_dir)
- try:
- proc.communicate(timeout=timeout_sec)
- if os.path.exists(pred_file):
- eval_result = eval.evaluate(eval_version, eval_file, pred_file)
- print("evaluation result: ", json.dumps(eval_result))
- assert isclose(eval_result["exact_match"],
- expected_exact_match,
- abs_tol=1e-2)
- assert isclose(eval_result["f1"], expected_f1, abs_tol=1e-2)
- else:
- pytest.fail("Error: Run Failed")
- except sp.TimeoutExpired:
- proc.kill()
- pytest.fail("Error: Timeout")
- except sp.CalledProcessError:
- pytest.fail("Error: Run Failed")
- def test_e2e_squad_deepspeed_zero(tmpdir):
- config_file = create_config_file(tmpdir, True)
- # base run results => {"exact_match": 84.1438032166509, "f1": 90.89776136505441}
- expected_exact_match = 84.14
- expected_f1 = 90.89
- model_file = os.path.join(squad_dir, model_file_name)
- eval_file = os.path.join(squad_dir, eval_file_name)
- output_dir = os.path.join(tmpdir, "output")
- pred_file = os.path.join(output_dir, pred_file_name)
- proc = sp.Popen([
- "bash",
- script_file_name,
- num_gpus,
- model_file,
- squad_dir,
- output_dir,
- config_file
- ],
- cwd=base_dir)
- try:
- proc.communicate(timeout=timeout_sec)
- if os.path.exists(pred_file):
- eval_result = eval.evaluate(eval_version, eval_file, pred_file)
- print("evaluation result: ", json.dumps(eval_result))
- assert isclose(eval_result["exact_match"],
- expected_exact_match,
- abs_tol=1e-2)
- assert isclose(eval_result["f1"], expected_f1, abs_tol=1e-2)
- else:
- pytest.fail("Error: Run Failed")
- except sp.TimeoutExpired:
- proc.kill()
- pytest.fail("Error: Timeout")
- except sp.CalledProcessError:
- pytest.fail("Error: Run Failed")
|