ds_config_z2.json 774 B

12345678910111213141516171819202122232425262728293031323334353637383940
  1. {
  2. "train_batch_size": 8,
  3. "steps_per_print": 2000,
  4. "optimizer": {
  5. "type": "Adam",
  6. "params": {
  7. "lr": 0.001,
  8. "betas": [
  9. 0.8,
  10. 0.999
  11. ],
  12. "eps": 1e-8,
  13. "weight_decay": 3e-7
  14. }
  15. },
  16. "scheduler": {
  17. "type": "WarmupLR",
  18. "params": {
  19. "warmup_min_lr": 0,
  20. "warmup_max_lr": 0.001,
  21. "warmup_num_steps": 1000
  22. }
  23. },
  24. "gradient_clipping": 1.0,
  25. "prescale_gradients": false,
  26. "bf16": {
  27. "enabled": true,
  28. "loss_scale": 0,
  29. "loss_scale_window": 500,
  30. "hysteresis": 2,
  31. "min_loss_scale": 1,
  32. "initial_scale_power": 15
  33. },
  34. "wall_clock_breakdown": false,
  35. "zero_optimization": {
  36. "stage": 2,
  37. "overlap_comm": false,
  38. "contiguous_gradients": false
  39. }
  40. }