adam_test.py 881 B

12345678910111213141516171819202122232425262728293031323334353637
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import torch
  5. from deepspeed.ops.adam import DeepSpeedCPUAdam
  6. import time
  7. NUM_ITERS = 100
  8. def _test_perf(param, optimizer_func):
  9. optimizer = optimizer_func(param)
  10. avg = 0
  11. for i in range(NUM_ITERS):
  12. for i, p in enumerate(param):
  13. p.grad = torch.ones_like(p) * 2
  14. start = time.time()
  15. optimizer.step()
  16. stop = time.time()
  17. avg += (stop - start)
  18. return avg / NUM_ITERS
  19. def _main():
  20. device = 'cpu'
  21. model_size = 1 * 1024**3
  22. group_size = [model_size, 274432]
  23. param = [torch.nn.Parameter(torch.ones(size, device=device)) for size in group_size]
  24. torch_time = _test_perf(param, torch.optim.Adam)
  25. ds_time = _test_perf(param, DeepSpeedCPUAdam)
  26. print(f"Step time: {torch_time=} {ds_time=}")
  27. _main()