title: DeepSpeed email: deepspeed@microsoft.com description: >- DeepSpeed is a deep learning optimization library that makes distributed training easy, efficient, and effective. locale : "en-US" logo: /assets/images/deepspeed-logo-uppercase-bold-white-1.15.svg repository: microsoft/DeepSpeed baseurl: "/" # the subpath of your site, e.g. /blog url: "https://www.deepspeed.ai" # the base hostname & protocol for your site, e.g. http://example.com # Build settings remote_theme: "mmistakes/minimal-mistakes@4.19.0" minimal_mistakes_skin : "air" search: true plugins: - jekyll-feed - jekyll-include-cache - jekyll-paginate #paginate: 10 #paginate_path: /blog/page:num include: ["_pages"] exclude: ["code-docs"] collections: tutorials: output: true permalink: /:collection/:path/ order: - advanced-install.md - getting-started.md - azure.md - automatic-tensor-parallelism.md - bert-finetuning.md - bert-pretraining.md - cifar-10.md - curriculum-learning.md - data-efficiency.md - ds4sci_evoformerattention.md - flops-profiler.md - pytorch-profiler.md - autotuning.md - gan.md - lrrt.md - megatron.md - mixture-of-experts.md - mixture-of-experts-nlg.md - mixture-of-experts-inference.md - model-compression.md - monitor.md - comms-logging.md - one-cycle.md - onebit-adam.md - zero-one-adam.md - onebit-lamb.md - pipeline.md - progressive_layer_dropping.md - sparse-attention.md - transformer_kernel.md - zero-offload.md - zero.md defaults: - scope: path: "" values: layout: single author_profile: false read_time: false comments: false share: false related: false sneak_preview: false toc: true toc_label: "Contents" sidebar: nav: "lnav" - scope: path: "_pages" values: permalink: /docs/:basename/ toc: true toc_label: "Contents" - scope: path: "" type: posts values: layout: single-full author_profile: false read_time: false comments: false share: true related: false toc: true toc_label: "Contents" toc_sticky: true show_date: true - scope: path: "" type: tutorials values: layout: single toc_sticky: true analytics: provider: "google-gtag" google: tracking_id: "UA-169781858-1" timezone: America/Los_Angeles breadcrumbs: true press_release_v3: https://www.microsoft.com/en-us/research/blog/deepspeed-extreme-scale-model-training-for-everyone/ press_release_v5: https://www.microsoft.com/en-us/research/blog/deepspeed-powers-8x-larger-moe-model-training-with-high-performance/ press_release_v6: https://www.microsoft.com/en-us/research/blog/deepspeed-advancing-moe-inference-and-training-to-power-next-generation-ai-scale/