1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # Copyright (c) Microsoft Corporation.
- # SPDX-License-Identifier: Apache-2.0
- # DeepSpeed Team
- """
- DeepSpeed Communication Backend.
- In the future, directly use NCCL/MPI/Gloo/etc without requiring torch.distributed. Simply wrap torch.distributed for now.
- # Custom DS Backends -- Direct C/Ops
- - NCCL -- [EXPERIMENTAL]
- - MPI -- [EXPERIMENTAL]
- - RCCL -- [EXPERIMENTAL]
- - GLOO -- [EXPERIMENTAL]
- # DS backend wrapper for torch.distributed [DEFAULT]
- - T-NCCL -- [DEFAULT]
- - T-GLOO
- - T-MPI
- # Backend is the base class
- -- NcclBackend, MpiBackend, and TorchBackend are the main subclasses. TorchBackend is the only officially supported backend for now.
- """
- class Backend(object):
- def __init__(self, name='backend', rank=0, size=1):
- self.name = name
- # The world size and rank of the world process group
- self.world_group = None
- self.world_size = size
- self.world_rank = rank
- # Single process group (pg) implementation for now but keep a list for future
- self.process_groups = []
- self.initialized = False
- def is_initialized(self):
- return self.initialized
- def new_group(self):
- # create a new pg and add it to pg list
- pass
- def init_process_group(self):
- # subclasses will initialize them fully
- # - initialize a default world process group and add it to pg list
- self.initialized = True
|