test_autotuning.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import os
  2. import pytest
  3. from .simple_model import create_config_from_dict
  4. from deepspeed.launcher import runner as dsrun
  5. from deepspeed.autotuning.autotuner import Autotuner
  6. from deepspeed.autotuning.scheduler import ResourceManager
  7. RUN_OPTION = 'run'
  8. TUNE_OPTION = 'tune'
  9. def test_command_line():
  10. '''Validate handling of command line arguments'''
  11. for opt in [RUN_OPTION, TUNE_OPTION]:
  12. dsrun.parse_args(
  13. args=f"--num_nodes 1 --num_gpus 1 --autotuning {opt} foo.py".split())
  14. for error_opts in [
  15. "--autotuning --num_nodes 1 --num_gpus 1 foo.py".split(),
  16. "--autotuning test --num_nodes 1 -- num_gpus 1 foo.py".split(),
  17. "--autotuning".split()
  18. ]:
  19. with pytest.raises(SystemExit):
  20. dsrun.parse_args(args=error_opts)
  21. @pytest.mark.parametrize("arg_mappings",
  22. [
  23. None,
  24. {
  25. },
  26. {
  27. "train_micro_batch_size_per_gpu": "--per_device_train_batch_size"
  28. },
  29. {
  30. "train_micro_batch_size_per_gpu": "--per_device_train_batch_size",
  31. "gradient_accumulation_steps": "--gradient_accumulation_steps"
  32. },
  33. {
  34. "train_batch_size": "-tbs"
  35. }
  36. ]) # yapf: disable
  37. def test_resource_manager_arg_mappings(arg_mappings):
  38. rm = ResourceManager(args=None,
  39. hosts="worker-0, worker-1",
  40. num_gpus_per_node=4,
  41. results_dir=None,
  42. exps_dir=None,
  43. arg_mappings=arg_mappings)
  44. if arg_mappings is not None:
  45. for k, v in arg_mappings.items():
  46. assert k.strip() in rm.arg_mappings.keys()
  47. assert arg_mappings[k.strip()].strip() == rm.arg_mappings[k.strip()]
  48. @pytest.mark.parametrize("active_resources",
  49. [
  50. {"worker-0": [0, 1, 2, 3]},
  51. {"worker-0": [0, 1, 2, 3], "worker-1": [0, 1, 2, 3]},
  52. {"worker-0": [0], "worker-1": [0, 1, 2], "worker-2": [0, 1, 2]},
  53. {"worker-0": [0, 1], "worker-2": [4, 5]}
  54. ]
  55. ) # yapf: disable
  56. def test_autotuner_resources(tmpdir, active_resources):
  57. config_dict = {
  58. "autotuning": {
  59. "enabled": True,
  60. "exps_dir": os.path.join(tmpdir,
  61. 'exps_dir'),
  62. "arg_mappings": {}
  63. }
  64. }
  65. config_path = create_config_from_dict(tmpdir, config_dict)
  66. args = dsrun.parse_args(
  67. args=f'--autotuning {TUNE_OPTION} foo.py --deepspeed_config {config_path}'.split(
  68. ))
  69. tuner = Autotuner(args=args, active_resources=active_resources)
  70. expected_num_nodes = len(list(active_resources.keys()))
  71. assert expected_num_nodes == tuner.exp_num_nodes
  72. expected_num_gpus = min([len(v) for v in active_resources.values()])
  73. assert expected_num_gpus == tuner.exp_num_gpus