ds_config_func_bs4_zero2_offload.json 422 B

1234567891011121314151617181920
  1. {
  2. "train_batch_size": 4,
  3. "gradient_accumulation_steps": 1,
  4. "steps_per_print": 1,
  5. "zero_optimization": {
  6. "stage": 2,
  7. "reduce_bucket_size": 7000000,
  8. "allgather_bucket_size": 7000000,
  9. "reduce_scatter": true,
  10. "cpu_offload": true
  11. },
  12. "gradient_clipping": 1.0,
  13. "fp16": {
  14. "enabled": true,
  15. "loss_scale": 0,
  16. "loss_scale_window": 1000,
  17. "hysteresis": 2,
  18. "min_loss_scale": 1
  19. }
  20. }