config_v2.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. from typing import Optional
  5. from deepspeed.pydantic_v1 import Field
  6. from deepspeed.runtime.config_utils import DeepSpeedConfigModel
  7. from .ragged import DSStateManagerConfig
  8. class DeepSpeedTPConfig(DeepSpeedConfigModel):
  9. """ Configure tensor parallelism settings """
  10. tp_size: int = 1
  11. """ Number of devices to split the model across using tensor parallelism. """
  12. class QuantizationConfig(DeepSpeedConfigModel):
  13. """ Configure tensor parallelism settings """
  14. quantization_mode: Optional[str] = None
  15. """ The quantization mode in string format. The supported modes are as follows:
  16. - 'wf6af16', weight-only quantization with FP6 weight and FP16 activation.
  17. """
  18. # TODO: may reuse the constants in deepspeed/compression/constants.py
  19. class RaggedInferenceEngineConfig(DeepSpeedConfigModel):
  20. """ Sets parameters for DeepSpeed Inference Engine. """
  21. tensor_parallel: DeepSpeedTPConfig = Field({}, alias="tp")
  22. """
  23. Configuration for tensor parallelism used to split the model across several
  24. GPUs. Expects a dictionary containing values for :any:`DeepSpeedTPConfig`.
  25. """
  26. state_manager: DSStateManagerConfig = Field({}, alias="manager")
  27. """
  28. Configuration for managing persistent state
  29. """
  30. quantization: QuantizationConfig = {}