test_autotuning.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import os
  5. import pytest
  6. from unit.simple_model import create_config_from_dict
  7. from deepspeed.launcher import runner as dsrun
  8. from deepspeed.autotuning.autotuner import Autotuner
  9. from deepspeed.autotuning.scheduler import ResourceManager
  10. RUN_OPTION = 'run'
  11. TUNE_OPTION = 'tune'
  12. def test_command_line():
  13. '''Validate handling of command line arguments'''
  14. for opt in [RUN_OPTION, TUNE_OPTION]:
  15. dsrun.parse_args(args=f"--num_nodes 1 --num_gpus 1 --autotuning {opt} foo.py".split())
  16. for error_opts in [
  17. "--autotuning --num_nodes 1 --num_gpus 1 foo.py".split(),
  18. "--autotuning test --num_nodes 1 -- num_gpus 1 foo.py".split(), "--autotuning".split()
  19. ]:
  20. with pytest.raises(SystemExit):
  21. dsrun.parse_args(args=error_opts)
  22. @pytest.mark.parametrize("arg_mappings",
  23. [
  24. None,
  25. {
  26. },
  27. {
  28. "train_micro_batch_size_per_gpu": "--per_device_train_batch_size"
  29. },
  30. {
  31. "train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
  32. "gradient_accumulation_steps": "--gradient_accumulation_steps"
  33. },
  34. {
  35. "train_batch_size": "-tbs"
  36. }
  37. ]) # yapf: disable
  38. def test_resource_manager_arg_mappings(arg_mappings):
  39. rm = ResourceManager(args=None,
  40. hosts="worker-0, worker-1",
  41. num_gpus_per_node=4,
  42. results_dir=None,
  43. exps_dir=None,
  44. arg_mappings=arg_mappings)
  45. if arg_mappings is not None:
  46. for k, v in arg_mappings.items():
  47. assert k.strip() in rm.arg_mappings.keys()
  48. assert arg_mappings[k.strip()].strip() == rm.arg_mappings[k.strip()]
  49. @pytest.mark.parametrize("active_resources",
  50. [
  51. {"worker-0": [0, 1, 2, 3]},
  52. {"worker-0": [0, 1, 2, 3], "worker-1": [0, 1, 2, 3]},
  53. {"worker-0": [0], "worker-1": [0, 1, 2], "worker-2": [0, 1, 2]},
  54. {"worker-0": [0, 1], "worker-2": [4, 5]}
  55. ]
  56. ) # yapf: disable
  57. def test_autotuner_resources(tmpdir, active_resources):
  58. config_dict = {"autotuning": {"enabled": True, "exps_dir": os.path.join(tmpdir, 'exps_dir'), "arg_mappings": {}}}
  59. config_path = create_config_from_dict(tmpdir, config_dict)
  60. args = dsrun.parse_args(args=f'--autotuning {TUNE_OPTION} foo.py --deepspeed_config {config_path}'.split())
  61. tuner = Autotuner(args=args, active_resources=active_resources)
  62. expected_num_nodes = len(list(active_resources.keys()))
  63. assert expected_num_nodes == tuner.exp_num_nodes
  64. expected_num_gpus = min([len(v) for v in active_resources.values()])
  65. assert expected_num_gpus == tuner.exp_num_gpus