deepspeed_bsz24_fp16_eigenvalue_quantize_config.json 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. {
  2. "train_batch_size": 24,
  3. "train_micro_batch_size_per_gpu": 3,
  4. "steps_per_print": 1,
  5. "optimizer": {
  6. "type": "Adam",
  7. "params": {
  8. "lr": 3e-5,
  9. "weight_decay": 0.0,
  10. "bias_correction": false
  11. }
  12. },
  13. "gradient_clipping": 1.0,
  14. "fp16": {
  15. "enabled": true
  16. },
  17. "tensorboard": {
  18. "enabled": true,
  19. "output_path": "/tmp/eigenvalue_quantize_output",
  20. "job_name": "eigenvalue_quantize"
  21. },
  22. "eigenvalue": {
  23. "enabled": true,
  24. "verbose": true,
  25. "max_iter": 50,
  26. "tol": 1e-2,
  27. "stability": 0,
  28. "gas_boundary_resolution": 1,
  29. "model_name": "bert-large"
  30. },
  31. "quantize_training": {
  32. "quantize_bits": {
  33. "start_bits": 12,
  34. "target_bits": 4
  35. },
  36. "quantize_type": "symmetric",
  37. "quantize_schedule": {
  38. "quantize_period": 400,
  39. "schedule_offset": 400
  40. },
  41. "quantize_groups": 16,
  42. "fp16_mixed_quantize": {
  43. "enabled": true,
  44. "quantize_change_ratio": 0.001
  45. },
  46. "quantize_verbose": true,
  47. "quantize_eigenvalue": true
  48. }
  49. }