1234567891011121314151617181920212223242526 |
- # Copyright (c) Microsoft Corporation.
- # SPDX-License-Identifier: Apache-2.0
- # DeepSpeed Team
- import torch
- from deepspeed.ops.adam import DeepSpeedCPUAdam
- import time
- from deepspeed.accelerator import get_accelerator
- device = 'cpu'
- model_size = 1 * 1024**3
- param = torch.nn.Parameter(torch.ones(model_size, device=device))
- param_fp16 = torch.nn.Parameter(torch.ones(model_size, dtype=torch.half, device=get_accelerator().device_name(0)))
- optimizer = DeepSpeedCPUAdam([param])
- #torch.set_num_threads(128)
- param.grad = torch.ones(model_size, device=device)
- avg = 0
- for i in range(100):
- start = time.time()
- optimizer.step(fp16_param_groups=[param_fp16])
- stop = time.time()
- avg += (stop - start)
- param.grad = torch.ones(model_size, device=device) * 2
- print("Elapsed Time is ", avg / 100)
|