{ "train_batch_size": 2, "gradient_accumulation_steps": 1, "optimizer": { "type": "Adam", "params": { "lr": 0.00015 } }, "gradient_clipping": 1.0, "fp16": { "enabled": true, "loss_scale": 0 } }