1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- import torch
- import deepspeed
- import argparse
- import pytest
- import json
- import os
- from common import distributed_test
- from simple_model import SimpleModel, SimpleOptimizer, random_dataloader, args_from_dict
- @pytest.mark.parametrize("scheduler_type,params",
- [("WarmupLR",
- {}),
- ("OneCycle",
- {
- 'cycle_min_lr': 0,
- 'cycle_max_lr': 0
- }),
- ("LRRangeTest",
- {})])
- def test_get_lr_before_train(tmpdir, scheduler_type, params):
- config_dict = {
- "train_batch_size": 2,
- "steps_per_print": 1,
- "optimizer": {
- "type": "Adam",
- "params": {
- "lr": 0.00015
- },
- },
- "scheduler": {
- "type": scheduler_type,
- "params": params
- },
- "gradient_clipping": 1.0
- }
- args = args_from_dict(tmpdir, config_dict)
- hidden_dim = 10
- model = SimpleModel(hidden_dim, empty_grad=False)
- @distributed_test(world_size=[1])
- def _test_get_lr_before_train(args, model, hidden_dim):
- model, _, _, lr_scheduler = deepspeed.initialize(args=args,
- model=model,
- model_parameters=model.parameters())
- data_loader = random_dataloader(model=model,
- total_samples=50,
- hidden_dim=hidden_dim,
- device=model.device,
- dtype=torch.float)
- for n, batch in enumerate(data_loader):
- # get lr before training starts
- lr_scheduler.get_lr()
- loss = model(batch[0], batch[1])
- model.backward(loss)
- model.step()
- _test_get_lr_before_train(args=args, model=model, hidden_dim=hidden_dim)
|