12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- title: DeepSpeed
- email: deepspeed@microsoft.com
- description: >-
- DeepSpeed is a deep learning optimization library that makes distributed
- training easy, efficient, and effective.
- locale : "en-US"
- repository: microsoft/DeepSpeed
- baseurl: "/" # the subpath of your site, e.g. /blog
- url: "https://www.deepspeed.ai" # the base hostname & protocol for your site, e.g. http://example.com
- # Build settings
- remote_theme: "mmistakes/minimal-mistakes@4.19.0"
- minimal_mistakes_skin : "air"
- plugins:
- - jekyll-feed
- - jekyll-include-cache
- - jekyll-paginate
- #paginate: 10
- #paginate_path: /blog/page:num
- include: ["_pages"]
- exclude: ["code-docs"]
- collections:
- tutorials:
- output: true
- permalink: /:collection/:path/
- order:
- - advanced-install.md
- - getting-started.md
- - azure.md
- - bert-finetuning.md
- - bert-pretraining.md
- - cifar-10.md
- - curriculum-learning.md
- - flops-profiler.md
- - pytorch-profiler.md
- - gan.md
- - lrrt.md
- - megatron.md
- - mixture-of-experts.md
- - one-cycle.md
- - onebit-adam.md
- - onebit-lamb.md
- - pipeline.md
- - progressive_layer_dropping.md
- - sparse-attention.md
- - transformer_kernel.md
- - zero-offload.md
- - zero.md
- defaults:
- - scope:
- path: ""
- values:
- layout: single
- author_profile: false
- read_time: false
- comments: false
- share: false
- related: false
- sneak_preview: false
- toc: true
- toc_label: "Contents"
- sidebar:
- nav: "lnav"
- - scope:
- path: "_pages"
- values:
- permalink: /docs/:basename/
- - scope:
- path: ""
- type: posts
- values:
- layout: single
- share: true
- analytics:
- provider: "google-gtag"
- google:
- tracking_id: "UA-169781858-1"
- timezone: America/Los_Angeles
- breadcrumbs: true
- press_release_v3: https://www.microsoft.com/en-us/research/blog/deepspeed-extreme-scale-model-training-for-everyone/
- press_release_v5: https://www.microsoft.com/en-us/research/blog/deepspeed-powers-8x-larger-moe-model-training-with-high-performance/
|