constants.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. """
  5. Data efficiency library
  6. See sample config at https://www.deepspeed.ai/docs/config-json/data-efficiency
  7. """
  8. DATA_EFFICIENCY = "data_efficiency"
  9. DATA_EFFICIENCY_ENABLED = "enabled"
  10. DATA_EFFICIENCY_ENABLED_DEFAULT = False
  11. DATA_EFFICIENCY_SEED = "seed"
  12. DATA_EFFICIENCY_SEED_DEFAULT = 1234
  13. #########################################
  14. # Data efficiency - Data Sampling
  15. #########################################
  16. DATA_SAMPLING = "data_sampling"
  17. DATA_SAMPLING_ENABLED = "enabled"
  18. DATA_SAMPLING_ENABLED_DEFAULT = False
  19. DATA_SAMPLING_NUM_EPOCHS = "num_epochs"
  20. DATA_SAMPLING_NUM_EPOCHS_DEFAULT = 1000
  21. DATA_SAMPLING_NUM_WORKERS = "num_workers"
  22. DATA_SAMPLING_NUM_WORKERS_DEFAULT = 0
  23. #########################################
  24. # Data efficiency - Data Sampling - Curriculum Learning
  25. #########################################
  26. CURRICULUM_LEARNING = "curriculum_learning"
  27. CURRICULUM_LEARNING_ENABLED = "enabled"
  28. CURRICULUM_LEARNING_ENABLED_DEFAULT = False
  29. CURRICULUM_LEARNING_CLUSTER_PATH = "data_cluster_path"
  30. CURRICULUM_LEARNING_METRICS = "curriculum_metrics"
  31. CURRICULUM_LEARNING_SAMPLE_PATH = "index_to_sample_path"
  32. CURRICULUM_LEARNING_METRIC_PATH = "index_to_metric_path"
  33. CURRICULUM_LEARNING_CLUSTERING_TYPE = "clustering_type"
  34. CURRICULUM_LEARNING_SINGLE_CLUSTER = "single_cluster"
  35. CURRICULUM_LEARNING_CLUSTER_PREFIX = "cluster"
  36. CURRICULUM_LEARNING_DIFFICULTY_TYPE = "difficulty_type"
  37. CURRICULUM_LEARNING_VALUE_BASED = "value"
  38. CURRICULUM_LEARNING_PERCENTILE_BASED = "percentile"
  39. CURRICULUM_LEARNING_MIN_DIFFICULTY = "min_difficulty"
  40. CURRICULUM_LEARNING_MAX_DIFFICULTY = "max_difficulty"
  41. CURRICULUM_LEARNING_SCHEDULE_TYPE = "schedule_type"
  42. CURRICULUM_LEARNING_SCHEDULE_CONFIG = "schedule_config"
  43. CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY = "difficulty"
  44. CURRICULUM_LEARNING_SCHEDULE_MAX_STEP = "max_step"
  45. CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP = "total_curriculum_step"
  46. CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP = "difficulty_step"
  47. CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE = "root_degree"
  48. CURRICULUM_LEARNING_SCHEDULE_FIXED_DISCRETE = "fixed_discrete"
  49. CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT = "fixed_root"
  50. CURRICULUM_LEARNING_SCHEDULE_FIXED_LINEAR = "fixed_linear"
  51. CURRICULUM_LEARNING_SCHEDULE_CUSTOM = "custom"
  52. CURRICULUM_LEARNING_CURRENT_DIFFICULTY = "current_difficulty"
  53. CURRICULUM_LEARNING_BATCH = "batch"
  54. CURRICULUM_LEARNING_CONSUMED_SAMPLES = "consumed_samples"
  55. CURRICULUM_LEARNING_STEP = "curriculum_step"
  56. CURRICULUM_LEARNING_CURRENT_DIFFICULTIES = "current_difficulties"
  57. CURRICULUM_LEARNING_DATA_CLUSTER_PATHS = "data_cluster_paths"
  58. CURRICULUM_LEARNING_DATA_CLUSTER_CURRENT_POSITION = "data_cluster_current_position"
  59. CURRICULUM_LEARNING_NP_RNG_STATE = "np_rng_state"
  60. #########################################
  61. # Curriculum Learning legacy implementation
  62. #########################################
  63. CURRICULUM_LEARNING_LEGACY = "curriculum_learning"
  64. CURRICULUM_ENABLED_LEGACY = "enabled"
  65. CURRICULUM_ENABLED_DEFAULT_LEGACY = False
  66. #########################################
  67. # Data efficiency - Data Routing
  68. #########################################
  69. DATA_ROUTING = "data_routing"
  70. DATA_ROUTING_ENABLED = "enabled"
  71. DATA_ROUTING_ENABLED_DEFAULT = False
  72. #########################################
  73. # Data efficiency - Data Routing - Random LTD
  74. #########################################
  75. RANDOM_LTD = "random_ltd"
  76. RANDOM_LTD_ENABLED = "enabled"
  77. RANDOM_LTD_ENABLED_DEFAULT = False
  78. RANDOM_LTD_MODEL_MASK_NAME = "model_mask_name"
  79. RANDOM_LTD_MODEL_TYPE = "model_type"
  80. RANDOM_LTD_MICRO_BATCH_SIZE = "micro_batch_size"
  81. RANDOM_LTD_GLOBAL_BATCH_SIZE = "global_batch_size"
  82. RANDOM_LTD_SAMPLE_INDEX = "sample_idx"
  83. RANDOM_LTD_ATTENTION_MASK = "attention_mask"
  84. RANDOM_LTD_HIDDEN_STATE_ORDER = "hidden_state_order"
  85. RANDOM_LTD_LAYER_NUM = "random_ltd_layer_num"
  86. RANDOM_LTD_LAYER_ID = "random_ltd_layer_id"
  87. RANDOM_LTD_TOTAL_LAYER_NUM = "total_layer_num"
  88. RANDOM_LTD_CONSUMED_LAYER_TOKENS = "consumed_layer_tokens"
  89. # scheduler
  90. RANDOM_LTD_SCHEDULER = "random_ltd_schedule"
  91. RANDOM_LTD_MAX_VALUE = "max_value"
  92. RANDOM_LTD_MIN_VALUE = "min_value"
  93. RANDOM_LTD_CURRENT_VALUE = "current_value"
  94. RANDOM_LTD_SCHEDULE_CONFIG = "schedule_config"
  95. RANDOM_LTD_INCREASE_STEP = "seq_per_step"
  96. RANDOM_LTD_REQUIRE_STEP = "require_steps"
  97. RANDOM_LTD_SCHEDULER_TYPE = "schedule_type"
  98. RANDOM_LTD_CURR_STEP = "current_steps"
  99. # learning rate schedulers
  100. RANDOM_LTD_LAYER_TOKEN_LR_SCHEDULE = "layer_token_lr_schedule"
  101. RANDOM_LTD_LAYER_TOKEN_LR_ENABLED = "enabled"
  102. RANDOM_LTD_LAYER_TOKEN_LR_ENABLED_DEFAULT = False
  103. RANDOM_LTD_TOTAL_LAYER_TOKENS = "total_layer_tokens"
  104. RANDOM_LTD_WARMUP_TYPE = "warmup_type"
  105. RANDOM_LTD_WARMUP_LAYER_TOKENS = "warmup_layer_tokens"