RezaYazdaniAminabadi bae81311b3 fixing corner cases (#371) 4 年之前
..
common.py f2ac7eafd5 ZeRO-2 (#217) 4 年之前
ds_batch_config.json abe2204ddd Support fp32 grad clipping and fix max_grad_norm confusion (#232) 4 年之前
modeling.py 734d8991c8 Transformer kernel release (#242) 4 年之前
modelingpreln.py 734d8991c8 Transformer kernel release (#242) 4 年之前
multi_output_model.py 53c73fe3e2 Support multi-output models (#170) 4 年之前
simple_model.py 376818ef9d Empty grad fix (#291) 4 年之前
test_adam_acuracy.py bae81311b3 fixing corner cases (#371) 4 年之前
test_checkpointing.py 19aac8ad19 ZeRO-Offload: Integration code fixes (#370) 4 年之前
test_config.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_csr.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_cuda_backward.py bbd8cd7d70 update tests 4 年之前
test_cuda_forward.py 734d8991c8 Transformer kernel release (#242) 4 年之前
test_dist.py 438aa01773 Enables NCCL backend in @distributed_test (#13) 4 年之前
test_ds_arguments.py 8326aff279 Improve doc string for add_XXX_arguments (#32) 4 年之前
test_ds_config.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_dynamic_loss_scale.py e45b5e4cd0 ZeRO-Offload v1 (squash) (#345) 4 年之前
test_fp16.py 253b044793 Generalize detection of ZeRO supported optimizers (#349) 4 年之前
test_lr_schedulers.py cd68e6e55a Fix+tests for get_lr from lr_scheduler before training starts (#310) 4 年之前
test_multi_output_model.py f2ac7eafd5 ZeRO-2 (#217) 4 年之前
test_run.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_sparse_attention.py 6604a5da1f Correctness and perf fixes (#354) 4 年之前