curriculum_scheduler.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import math
  5. from deepspeed.utils import logger
  6. from .constants import *
  7. class CurriculumScheduler(object):
  8. def __init__(self, config):
  9. super().__init__()
  10. self.state = {}
  11. assert CURRICULUM_LEARNING_MIN_DIFFICULTY in config, \
  12. f"Curriculum learning requires the config '{CURRICULUM_LEARNING_MIN_DIFFICULTY}'"
  13. assert CURRICULUM_LEARNING_MAX_DIFFICULTY in config, \
  14. f"Curriculum learning requires the config '{CURRICULUM_LEARNING_MAX_DIFFICULTY}'"
  15. assert CURRICULUM_LEARNING_SCHEDULE_TYPE in config, \
  16. f"Curriculum learning requires the config '{CURRICULUM_LEARNING_SCHEDULE_TYPE}'"
  17. self.state[CURRICULUM_LEARNING_MIN_DIFFICULTY] = config[CURRICULUM_LEARNING_MIN_DIFFICULTY]
  18. self.state[CURRICULUM_LEARNING_MAX_DIFFICULTY] = config[CURRICULUM_LEARNING_MAX_DIFFICULTY]
  19. self.state[CURRICULUM_LEARNING_CURRENT_DIFFICULTY] = config[CURRICULUM_LEARNING_MIN_DIFFICULTY]
  20. self.state[CURRICULUM_LEARNING_SCHEDULE_TYPE] = config[CURRICULUM_LEARNING_SCHEDULE_TYPE]
  21. self.first_step = True
  22. if config[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_FIXED_DISCRETE:
  23. """
  24. The schedule_config is a list of difficulty and a list of max
  25. step belonging to each difficulty. Example json config:
  26. "schedule_config": {
  27. "difficulty": [1,2,3],
  28. "max_step": [5,10]
  29. }
  30. The "max_step" has one less element than "difficulty", because
  31. the last difficulty will be used for all following steps.
  32. The self.state[CURRICULUM_LEARNING_SCHEDULE_CONFIG] is a dictionary of
  33. difficulty : [max step for this difficulty, next difficulty].
  34. """
  35. assert CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  36. f"Curriculum learning with fixed_discrete schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY}'"
  37. assert CURRICULUM_LEARNING_SCHEDULE_MAX_STEP in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  38. f"Curriculum learning with fixed_discrete schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_MAX_STEP}'"
  39. assert len(config[CURRICULUM_LEARNING_SCHEDULE_CONFIG][CURRICULUM_LEARNING_SCHEDULE_MAX_STEP]) > 0
  40. assert len(config[CURRICULUM_LEARNING_SCHEDULE_CONFIG][CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY]) > 0
  41. assert len(config[CURRICULUM_LEARNING_SCHEDULE_CONFIG][CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY]) == len(
  42. config[CURRICULUM_LEARNING_SCHEDULE_CONFIG][CURRICULUM_LEARNING_SCHEDULE_MAX_STEP]) + 1
  43. self.state[CURRICULUM_LEARNING_SCHEDULE_CONFIG] = config[CURRICULUM_LEARNING_SCHEDULE_CONFIG]
  44. elif config[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT:
  45. """
  46. The schedule_config includes:
  47. total_curriculum_step: how many steps the curriculum learning takes to go
  48. from min difficulty to max difficulty.
  49. difficulty_step: the difficulty level determined every time must
  50. be a multiple of this difficulty_step. This is used to determine
  51. the step of difficulty increase, and to ensure the use of NVIDIA
  52. Tensor Core acceleration (requires multiple of 8 (FP16) or
  53. 16 (INT8)).
  54. root_degree: the degree of the root function. Degree of 2 means
  55. square root and degree of 3 means cube root. Degree of 1 is
  56. equivalent to linear.
  57. "schedule_config": {
  58. "total_curriculum_step": 30000,
  59. "difficulty_step": 8,
  60. "root_degree": 2
  61. }
  62. """
  63. assert CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  64. f"Curriculum learning with fixed_root schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP}'"
  65. assert CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  66. f"Curriculum learning with fixed_root schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP}'"
  67. assert CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  68. f"Curriculum learning with fixed_root schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE}'"
  69. if config[CURRICULUM_LEARNING_SCHEDULE_CONFIG][CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP] % 8 != 0:
  70. logger.warning(
  71. f'When using seqlen metric, the difficulty_step for curriculum learning has to be multiple of 8 (for FP16 data) or 16 (for INT8 data) to enable NVIDIA Tensor Core acceleration. Disregard this warning if this is unrelated to your metric/hardware.'
  72. )
  73. self.state[CURRICULUM_LEARNING_SCHEDULE_CONFIG] = config[CURRICULUM_LEARNING_SCHEDULE_CONFIG]
  74. elif config[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_FIXED_LINEAR:
  75. """
  76. The schedule_config is the same as CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT but without the
  77. root_degree.
  78. "schedule_config": {
  79. "total_curriculum_step": 30000,
  80. "difficulty_step": 8
  81. }
  82. """
  83. assert CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  84. f"Curriculum learning with fixed_linear schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP}'"
  85. assert CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP in config[CURRICULUM_LEARNING_SCHEDULE_CONFIG], \
  86. f"Curriculum learning with fixed_linear schedule requires the schedule_config '{CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP}'"
  87. if config[CURRICULUM_LEARNING_SCHEDULE_CONFIG][CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP] % 8 != 0:
  88. logger.warning(
  89. f'When using seqlen metric, the difficulty_step for curriculum learning has to be multiple of 8 (for FP16 data) or 16 (for INT8 data) to enable NVIDIA Tensor Core acceleration. Disregard this warning if this is unrelated to your metric/hardware.'
  90. )
  91. self.state[CURRICULUM_LEARNING_SCHEDULE_CONFIG] = config[CURRICULUM_LEARNING_SCHEDULE_CONFIG]
  92. elif config[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_CUSTOM:
  93. """
  94. Fully customized schedule. User need to provide a custom schedule
  95. function by using the set_custom_curriculum_learning_schedule API
  96. in deepspeed/runtime/engine.py
  97. """
  98. self.custom_get_difficulty = None
  99. else:
  100. raise RuntimeError('Unsupported curriculum schedule type')
  101. def get_current_difficulty(self):
  102. return self.state[CURRICULUM_LEARNING_CURRENT_DIFFICULTY]
  103. def set_current_difficulty(self, difficulty):
  104. self.state[CURRICULUM_LEARNING_CURRENT_DIFFICULTY] = difficulty
  105. def set_custom_get_difficulty(self, schedule_function):
  106. self.custom_get_difficulty = schedule_function
  107. def get_state(self):
  108. return self.state
  109. def set_state(self, state):
  110. self.state = state
  111. def __fixed_discrete_get_difficulty(self, global_steps):
  112. s_state = self.state[CURRICULUM_LEARNING_SCHEDULE_CONFIG]
  113. if global_steps > s_state[CURRICULUM_LEARNING_SCHEDULE_MAX_STEP][-1]:
  114. return s_state[CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY][-1]
  115. for i in range(len(s_state[CURRICULUM_LEARNING_SCHEDULE_MAX_STEP])):
  116. if global_steps <= s_state[CURRICULUM_LEARNING_SCHEDULE_MAX_STEP][i]:
  117. return s_state[CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY][i]
  118. def __fixed_root_get_difficulty(self, global_steps, root_degree=None):
  119. s_state = self.state[CURRICULUM_LEARNING_SCHEDULE_CONFIG]
  120. if root_degree is None:
  121. root_degree = s_state[CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE]
  122. next_difficulty = (float(global_steps) / s_state[CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP])**(1.0 / root_degree)
  123. next_difficulty = math.floor(
  124. next_difficulty *
  125. (self.state[CURRICULUM_LEARNING_MAX_DIFFICULTY] - self.state[CURRICULUM_LEARNING_MIN_DIFFICULTY]) +
  126. self.state[CURRICULUM_LEARNING_MIN_DIFFICULTY])
  127. next_difficulty -= (next_difficulty % s_state[CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP])
  128. next_difficulty = min(next_difficulty, self.state[CURRICULUM_LEARNING_MAX_DIFFICULTY])
  129. return next_difficulty
  130. def get_difficulty(self, global_steps):
  131. if self.state[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_FIXED_DISCRETE:
  132. return self.__fixed_discrete_get_difficulty(global_steps)
  133. elif self.state[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_FIXED_LINEAR:
  134. return self.__fixed_root_get_difficulty(global_steps, 1)
  135. elif self.state[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT:
  136. return self.__fixed_root_get_difficulty(global_steps)
  137. elif self.state[CURRICULUM_LEARNING_SCHEDULE_TYPE] == CURRICULUM_LEARNING_SCHEDULE_CUSTOM:
  138. return self.custom_get_difficulty(global_steps)
  139. else:
  140. raise RuntimeError('Unsupported curriculum schedule type')
  141. def update_difficulty(self, global_steps):
  142. if self.state[CURRICULUM_LEARNING_CURRENT_DIFFICULTY] < self.state[CURRICULUM_LEARNING_MAX_DIFFICULTY]:
  143. self.state[CURRICULUM_LEARNING_CURRENT_DIFFICULTY] = self.get_difficulty(global_steps)
  144. return self.state[CURRICULUM_LEARNING_CURRENT_DIFFICULTY]