ds_config_func_bs8_zero2_gas3.json 526 B

1234567891011121314151617181920212223
  1. {
  2. "train_micro_batch_size_per_gpu": 8,
  3. "gradient_accumulation_steps": 3,
  4. "steps_per_print": 1,
  5. "zero_optimization": {
  6. "stage": 2,
  7. "reduce_bucket_size": 7000000,
  8. "allgather_bucket_size": 7000000,
  9. "reduce_scatter": true
  10. },
  11. "gradient_clipping": 1.0,
  12. "fp16": {
  13. "enabled": true,
  14. "loss_scale": 0,
  15. "loss_scale_window": 1000,
  16. "hysteresis": 2,
  17. "min_loss_scale": 1
  18. },
  19. "activation_checkpointing": {
  20. "partition_activations": true,
  21. "contiguous_memory_optimization": true
  22. }
  23. }