123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- # Copyright (c) Microsoft Corporation.
- # SPDX-License-Identifier: Apache-2.0
- # DeepSpeed Team
- """
- Data efficiency library
- See sample config at https://www.deepspeed.ai/docs/config-json/data-efficiency
- """
- DATA_EFFICIENCY = "data_efficiency"
- DATA_EFFICIENCY_ENABLED = "enabled"
- DATA_EFFICIENCY_ENABLED_DEFAULT = False
- DATA_EFFICIENCY_SEED = "seed"
- DATA_EFFICIENCY_SEED_DEFAULT = 1234
- #########################################
- # Data efficiency - Data Sampling
- #########################################
- DATA_SAMPLING = "data_sampling"
- DATA_SAMPLING_ENABLED = "enabled"
- DATA_SAMPLING_ENABLED_DEFAULT = False
- DATA_SAMPLING_NUM_EPOCHS = "num_epochs"
- DATA_SAMPLING_NUM_EPOCHS_DEFAULT = 1000
- DATA_SAMPLING_NUM_WORKERS = "num_workers"
- DATA_SAMPLING_NUM_WORKERS_DEFAULT = 0
- #########################################
- # Data efficiency - Data Sampling - Curriculum Learning
- #########################################
- CURRICULUM_LEARNING = "curriculum_learning"
- CURRICULUM_LEARNING_ENABLED = "enabled"
- CURRICULUM_LEARNING_ENABLED_DEFAULT = False
- CURRICULUM_LEARNING_CLUSTER_PATH = "data_cluster_path"
- CURRICULUM_LEARNING_METRICS = "curriculum_metrics"
- CURRICULUM_LEARNING_SAMPLE_PATH = "index_to_sample_path"
- CURRICULUM_LEARNING_METRIC_PATH = "index_to_metric_path"
- CURRICULUM_LEARNING_CLUSTERING_TYPE = "clustering_type"
- CURRICULUM_LEARNING_SINGLE_CLUSTER = "single_cluster"
- CURRICULUM_LEARNING_CLUSTER_PREFIX = "cluster"
- CURRICULUM_LEARNING_DIFFICULTY_TYPE = "difficulty_type"
- CURRICULUM_LEARNING_VALUE_BASED = "value"
- CURRICULUM_LEARNING_PERCENTILE_BASED = "percentile"
- CURRICULUM_LEARNING_MIN_DIFFICULTY = "min_difficulty"
- CURRICULUM_LEARNING_MAX_DIFFICULTY = "max_difficulty"
- CURRICULUM_LEARNING_SCHEDULE_TYPE = "schedule_type"
- CURRICULUM_LEARNING_SCHEDULE_CONFIG = "schedule_config"
- CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY = "difficulty"
- CURRICULUM_LEARNING_SCHEDULE_MAX_STEP = "max_step"
- CURRICULUM_LEARNING_SCHEDULE_TOTAL_STEP = "total_curriculum_step"
- CURRICULUM_LEARNING_SCHEDULE_DIFFICULTY_STEP = "difficulty_step"
- CURRICULUM_LEARNING_SCHEDULE_ROOT_DEGREE = "root_degree"
- CURRICULUM_LEARNING_SCHEDULE_FIXED_DISCRETE = "fixed_discrete"
- CURRICULUM_LEARNING_SCHEDULE_FIXED_ROOT = "fixed_root"
- CURRICULUM_LEARNING_SCHEDULE_FIXED_LINEAR = "fixed_linear"
- CURRICULUM_LEARNING_SCHEDULE_CUSTOM = "custom"
- CURRICULUM_LEARNING_CURRENT_DIFFICULTY = "current_difficulty"
- CURRICULUM_LEARNING_BATCH = "batch"
- CURRICULUM_LEARNING_CONSUMED_SAMPLES = "consumed_samples"
- CURRICULUM_LEARNING_STEP = "curriculum_step"
- CURRICULUM_LEARNING_CURRENT_DIFFICULTIES = "current_difficulties"
- CURRICULUM_LEARNING_DATA_CLUSTER_PATHS = "data_cluster_paths"
- CURRICULUM_LEARNING_DATA_CLUSTER_CURRENT_POSITION = "data_cluster_current_position"
- CURRICULUM_LEARNING_NP_RNG_STATE = "np_rng_state"
- #########################################
- # Curriculum Learning legacy implementation
- #########################################
- CURRICULUM_LEARNING_LEGACY = "curriculum_learning"
- CURRICULUM_ENABLED_LEGACY = "enabled"
- CURRICULUM_ENABLED_DEFAULT_LEGACY = False
- #########################################
- # Data efficiency - Data Routing
- #########################################
- DATA_ROUTING = "data_routing"
- DATA_ROUTING_ENABLED = "enabled"
- DATA_ROUTING_ENABLED_DEFAULT = False
- #########################################
- # Data efficiency - Data Routing - Random LTD
- #########################################
- RANDOM_LTD = "random_ltd"
- RANDOM_LTD_ENABLED = "enabled"
- RANDOM_LTD_ENABLED_DEFAULT = False
- RANDOM_LTD_MODEL_MASK_NAME = "model_mask_name"
- RANDOM_LTD_MODEL_TYPE = "model_type"
- RANDOM_LTD_MICRO_BATCH_SIZE = "micro_batch_size"
- RANDOM_LTD_GLOBAL_BATCH_SIZE = "global_batch_size"
- RANDOM_LTD_SAMPLE_INDEX = "sample_idx"
- RANDOM_LTD_ATTENTION_MASK = "attention_mask"
- RANDOM_LTD_HIDDEN_STATE_ORDER = "hidden_state_order"
- RANDOM_LTD_LAYER_NUM = "random_ltd_layer_num"
- RANDOM_LTD_LAYER_ID = "random_ltd_layer_id"
- RANDOM_LTD_TOTAL_LAYER_NUM = "total_layer_num"
- RANDOM_LTD_CONSUMED_LAYER_TOKENS = "consumed_layer_tokens"
- # scheduler
- RANDOM_LTD_SCHEDULER = "random_ltd_schedule"
- RANDOM_LTD_MAX_VALUE = "max_value"
- RANDOM_LTD_MIN_VALUE = "min_value"
- RANDOM_LTD_CURRENT_VALUE = "current_value"
- RANDOM_LTD_SCHEDULE_CONFIG = "schedule_config"
- RANDOM_LTD_INCREASE_STEP = "seq_per_step"
- RANDOM_LTD_REQUIRE_STEP = "require_steps"
- RANDOM_LTD_SCHEDULER_TYPE = "schedule_type"
- RANDOM_LTD_CURR_STEP = "current_steps"
- # learning rate schedulers
- RANDOM_LTD_LAYER_TOKEN_LR_SCHEDULE = "layer_token_lr_schedule"
- RANDOM_LTD_LAYER_TOKEN_LR_ENABLED = "enabled"
- RANDOM_LTD_LAYER_TOKEN_LR_ENABLED_DEFAULT = False
- RANDOM_LTD_TOTAL_LAYER_TOKENS = "total_layer_tokens"
- RANDOM_LTD_WARMUP_TYPE = "warmup_type"
- RANDOM_LTD_WARMUP_LAYER_TOKENS = "warmup_layer_tokens"
|