12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- from deepspeed.utils import RepeatingLoader
- import torch
- import pytest
- import deepspeed
- from .common import distributed_test
- from .simple_model import SimpleModel, args_from_dict, random_dataset
- def test_repeating_loader():
- loader = [1, 2, 3]
- loader = RepeatingLoader(loader)
- for idx in range(50):
- assert next(loader) == 1
- assert next(loader) == 2
- assert next(loader) == 3
- @pytest.mark.parametrize('train_batch_size, drop_last',
- [(1,
- True),
- (4,
- True),
- (1,
- False),
- (4,
- False)])
- def test_dataloader_drop_last(tmpdir, train_batch_size, drop_last):
- config_dict = {
- "train_batch_size": train_batch_size,
- "dataloader_drop_last": drop_last,
- "steps_per_print": 1
- }
- args = args_from_dict(tmpdir, config_dict)
- hidden_dim = 10
- model = SimpleModel(hidden_dim)
- @distributed_test(world_size=[1])
- def _test_dataloader_drop_last(args, model, hidden_dim):
- optimizer = torch.optim.AdamW(params=model.parameters())
- #TODO: Figure out why this breaks with cuda device
- train_dataset = random_dataset(total_samples=50,
- hidden_dim=hidden_dim,
- device=torch.device('cpu'),
- dtype=torch.float32)
- model, _, training_dataloader, _ = deepspeed.initialize(args=args,
- model=model,
- training_data=train_dataset,
- optimizer=optimizer)
- for n, batch in enumerate(training_dataloader):
- x = batch[0].to(torch.cuda.current_device())
- y = batch[1].to(torch.cuda.current_device())
- loss = model(x, y)
- model.backward(loss)
- model.step()
- _test_dataloader_drop_last(args=args, model=model, hidden_dim=hidden_dim)
|