Jeff Rasley 28592d692a reyazda/cpu adam jit (squash) (#455) 4 年之前
..
common.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
ds_batch_config.json abe2204ddd Support fp32 grad clipping and fix max_grad_norm confusion (#232) 4 年之前
modeling.py 734d8991c8 Transformer kernel release (#242) 4 年之前
modelingpreln.py 734d8991c8 Transformer kernel release (#242) 4 年之前
multi_output_model.py 53c73fe3e2 Support multi-output models (#170) 4 年之前
simple_model.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_activation_checkpointing.py a825f99688 Fix activation checkpoint unit tests for GPU systems (#421) 4 年之前
test_checkpointing.py 91b4a93db0 pytest skips for tests requiring certain ops (#411) 4 年之前
test_config.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_cpu_adam.py 28592d692a reyazda/cpu adam jit (squash) (#455) 4 年之前
test_csr.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_cuda_backward.py f0f2a70268 support dynamic sequence length in transformer kernels (#424) 4 年之前
test_cuda_forward.py f0f2a70268 support dynamic sequence length in transformer kernels (#424) 4 年之前
test_data.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_dist.py 438aa01773 Enables NCCL backend in @distributed_test (#13) 4 年之前
test_ds_arguments.py 8326aff279 Improve doc string for add_XXX_arguments (#32) 4 年之前
test_ds_config.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_dynamic_loss_scale.py 91b4a93db0 pytest skips for tests requiring certain ops (#411) 4 年之前
test_fp16.py 91b4a93db0 pytest skips for tests requiring certain ops (#411) 4 年之前
test_lr_schedulers.py 0e942df008 Add Linear warmup+decay lr schedule (#414) 4 年之前
test_multi_output_model.py f2ac7eafd5 ZeRO-2 (#217) 4 年之前
test_partition.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_pipe.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_pipe_module.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_pipe_schedule.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_run.py e5bbc2e559 Sparse attn + ops/runtime refactor + v0.3.0 (#343) 4 年之前
test_runtime_utils.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前
test_sparse_attention.py 91b4a93db0 pytest skips for tests requiring certain ops (#411) 4 年之前
test_topology.py 65c2f974d8 Pipeline parallel training engine. (#392) 4 年之前