adam_test.py 750 B

123456789101112131415161718192021222324
  1. import torch
  2. from deepspeed.ops.adam import DeepSpeedCPUAdam
  3. import time
  4. device = 'cpu'
  5. model_size = 1 * 1024**3
  6. group_size = [model_size, 274432]
  7. param = [torch.nn.Parameter(torch.ones(size, device=device)) for size in group_size]
  8. optimizer = DeepSpeedCPUAdam(param)
  9. #torch.set_num_threads(128)
  10. for i, p in enumerate(param):
  11. p.grad = torch.ones(group_size[i], device=device)
  12. #param.grad = torch.ones(model_size, device=device)
  13. avg = 0
  14. for i in range(100):
  15. start = time.time()
  16. optimizer.step()
  17. stop = time.time()
  18. avg += (stop - start)
  19. for i, p in enumerate(param):
  20. p.grad = torch.ones(group_size[i], device=device) * 2
  21. #param.grad = torch.ones(model_size, device=device) * 2
  22. print("Elapsed Time is ", avg / 100)