backend.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. '''Copyright The Microsoft DeepSpeed Team'''
  2. ''' DeepSpeed Communication Backend.
  3. In the future, directly use NCCL/MPI/Gloo/etc without requiring torch.distributed. Simply wrap torch.distributed for now.
  4. # Custom DS Backends -- Direct C/Ops
  5. - NCCL -- [EXPERIMENTAL]
  6. - MPI -- [EXPERIMENTAL]
  7. - RCCL -- [EXPERIMENTAL]
  8. - GLOO -- [EXPERIMENTAL]
  9. # DS backend wrapper for torch.distributed [DEFAULT]
  10. - T-NCCL -- [DEFAULT]
  11. - T-GLOO
  12. - T-MPI
  13. '''
  14. ''' Backend is the base class
  15. -- NcclBackend, MpiBackend, and TorchBackend are the main subclasses. TorchBackend is the only officially supported backend for now.
  16. '''
  17. class Backend(object):
  18. def __init__(self, name='backend', rank=0, size=1):
  19. self.name = name
  20. # The world size and rank of the world process group
  21. self.world_group = None
  22. self.world_size = rank
  23. self.world_rank = size
  24. # Single process group (pg) implementation for now but keep a list for future
  25. self.process_groups = []
  26. self.initialized = False
  27. def is_initialized(self):
  28. return self.initialized
  29. def new_group(self):
  30. # create a new pg and add it to pg list
  31. pass
  32. def init_process_group(self):
  33. # subclasses will initialize them fully
  34. # - initialize a default world process group and add it to pg list
  35. self.initialized = True