ds_config_func_bs8_no_zero.json 296 B

12345678910111213141516
  1. {
  2. "train_batch_size": 8,
  3. "gradient_accumulation_steps": 1,
  4. "steps_per_print": 1,
  5. "zero_optimization": {
  6. "stage": 0
  7. },
  8. "gradient_clipping": 1.0,
  9. "fp16": {
  10. "enabled": true,
  11. "loss_scale": 0,
  12. "loss_scale_window": 1000,
  13. "hysteresis": 2,
  14. "min_loss_scale": 1
  15. }
  16. }