backend.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. """
  5. DeepSpeed Communication Backend.
  6. In the future, directly use NCCL/MPI/Gloo/etc without requiring torch.distributed. Simply wrap torch.distributed for now.
  7. # Custom DS Backends -- Direct C/Ops
  8. - NCCL -- [EXPERIMENTAL]
  9. - MPI -- [EXPERIMENTAL]
  10. - RCCL -- [EXPERIMENTAL]
  11. - GLOO -- [EXPERIMENTAL]
  12. # DS backend wrapper for torch.distributed [DEFAULT]
  13. - T-NCCL -- [DEFAULT]
  14. - T-GLOO
  15. - T-MPI
  16. # Backend is the base class
  17. -- NcclBackend, MpiBackend, and TorchBackend are the main subclasses. TorchBackend is the only officially supported backend for now.
  18. """
  19. class Backend(object):
  20. def __init__(self, name='backend', rank=0, size=1):
  21. self.name = name
  22. # The world size and rank of the world process group
  23. self.world_group = None
  24. self.world_size = size
  25. self.world_rank = rank
  26. # Single process group (pg) implementation for now but keep a list for future
  27. self.process_groups = []
  28. self.initialized = False
  29. def is_initialized(self):
  30. return self.initialized
  31. def new_group(self):
  32. # create a new pg and add it to pg list
  33. pass
  34. def init_process_group(self):
  35. # subclasses will initialize them fully
  36. # - initialize a default world process group and add it to pg list
  37. self.initialized = True