12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- '''Copyright The Microsoft DeepSpeed Team'''
- ''' DeepSpeed Communication Backend.
- In the future, directly use NCCL/MPI/Gloo/etc without requiring torch.distributed. Simply wrap torch.distributed for now.
- # Custom DS Backends -- Direct C/Ops
- - NCCL -- [EXPERIMENTAL]
- - MPI -- [EXPERIMENTAL]
- - RCCL -- [EXPERIMENTAL]
- - GLOO -- [EXPERIMENTAL]
- # DS backend wrapper for torch.distributed [DEFAULT]
- - T-NCCL -- [DEFAULT]
- - T-GLOO
- - T-MPI
- '''
- ''' Backend is the base class
- -- NcclBackend, MpiBackend, and TorchBackend are the main subclasses. TorchBackend is the only officially supported backend for now.
- '''
- class Backend(object):
- def __init__(self, name='backend', rank=0, size=1):
- self.name = name
- # The world size and rank of the world process group
- self.world_group = None
- self.world_size = rank
- self.world_rank = size
- # Single process group (pg) implementation for now but keep a list for future
- self.process_groups = []
- self.initialized = False
- def is_initialized(self):
- return self.initialized
- def new_group(self):
- # create a new pg and add it to pg list
- pass
- def init_process_group(self):
- # subclasses will initialize them fully
- # - initialize a default world process group and add it to pg list
- self.initialized = True
|