# Copyright (c) Microsoft Corporation. # SPDX-License-Identifier: Apache-2.0 # DeepSpeed Team ######################################### # Compression Methods # It has several sub-components # ######################################### COMPRESSION_TRAINING = "compression_training" SHARED_PARAMETERS = "shared_parameters" DIFFERENT_GROUPS = "different_groups" TECHNIQUE_ENABLED = "enabled" TECHNIQUE_SCHEDULE_OFFSET = "schedule_offset" TECHNIQUE_SCHEDULE_OFFSET_END = "schedule_offset_end" DIFFERENT_GROUPS_PARAMETERS = "params" DIFFERENT_GROUPS_MODULE_SCOPE = "modules" DIFFERENT_GROUPS_MODULE_SCOPE_DEFAULT = "*" DIFFERENT_GROUPS_RELATED_MODULE_SCOPE = "related_modules" DIFFERENT_GROUPS_RELATED_MODULE_SCOPE_DEFAULT = None # COMPRESSION_TRAINING_ENABLED = "enabled" # COMPRESSION_TRAINING_ENABLED_DEFAULT = False #### # Layer Reduction #### LAYER_REDUCTION = "layer_reduction" LAYER_REDUCTION_ENABLED = "enabled" LAYER_REDUCTION_ENABLED_DEFAULT = False KEEP_NUMBER_LAYER = "keep_number_layer" MODULE_NAME_PREFIX = "module_name_prefix" TEACHER_LAYER = "teacher_layer" OTHER_MODULE_NAME = "other_module_name" #### # Weight Quantization #### WEIGHT_QUANTIZATION = "weight_quantization" WEIGHT_QUANTIZATION_PERIOD = "quantization_period" WEIGHT_QUANTIZATION_PERIOD_DEFAULT = 1 WEIGHT_QUANTIZE_IN_FORWARD_ENABLED = "quantize_weight_in_forward" WEIGHT_QUANTIZE_IN_FORWARD_ENABLED_DEFAULT = False WEIGHT_QUANTIZE_ENABLED = TECHNIQUE_ENABLED WEIGHT_QUANTIZE_ENABLED_DEFAULT = False WEIGHT_QUANTIZE_KERNEL = "quantizer_kernel" WEIGHT_QUANTIZE_KERNEL_DEFAULT = False WEIGHT_QUANTIZE_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET WEIGHT_QUANTIZE_SCHEDULE_OFFSET_DEFAULT = 0 WEIGHT_QUANTIZE_GROUPS = "quantize_groups" WEIGHT_QUANTIZE_GROUPS_DEFAULT = 1 WEIGHT_QUANTIZE_VERBOSE = "quantize_verbose" WEIGHT_QUANTIZE_VERBOSE_DEFAULT = False WEIGHT_QUANTIZE_TYPE = "quantization_type" WEIGHT_QUANTIZE_TYPE_DEFAULT = "symmetric" WEIGHT_QUANTIZE_SYMMETRIC = "symmetric" WEIGHT_QUANTIZE_ASYMMETRIC = "asymmetric" WEIGHT_QUANTIZE_ROUNDING = "rounding" WEIGHT_QUANTIZE_ROUNDING_DEFAULT = "nearest" WEIGHT_QUANTIZE_STOCHASTIC_ROUNDING = "stochastic" WEIGHT_QUANTIZE_NEAREST_ROUNDING = "nearest" # maybe deleted for a cleaner version WEIGHT_QUANTIZE_FP16_MIXED_QUANTIZE = "fp16_mixed_quantize" WEIGHT_QUANTIZE_FP16_MIXED_QUANTIZE_ENABLED = "enabled" WEIGHT_QUANTIZE_FP16_MIXED_QUANTIZE_ENABLED_DEFAULT = False WEIGHT_QUANTIZE_CHANGE_RATIO = "quantize_change_ratio" WEIGHT_QUANTIZE_CHANGE_RATIO_DEFAULT = 0.001 WEIGHT_QUANTIZE_START_BITS = "start_bits" WEIGHT_QUANTIZE_TARGET_BITS = "target_bits" ### # Activation Quantization ### ACTIVATION_QUANTIZATION = "activation_quantization" ACTIVATION_QUANTIZATION_ENABLED = TECHNIQUE_ENABLED ACTIVATION_QUANTIZATION_ENABLED_DEFAULT = False ACTIVATION_QUANTIZE_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET ACTIVATION_QUANTIZE_SCHEDULE_OFFSET_DEFAULT = 1000 ACTIVATION_QUANTIZE_TYPE = "quantization_type" ACTIVATION_QUANTIZE_TYPE_DEFAULT = "symmetric" ACTIVATION_QUANTIZE_SYMMETRIC = "symmetric" ACTIVATION_QUANTIZE_ASYMMETRIC = "asymmetric" ACTIVATION_QUANTIZE_RANGE = 'range_calibration' ACTIVATION_QUANTIZE_RANGE_DEFAULT = 'dynamic' ACTIVATION_QUANTIZE_RANGE_STATIC = 'static' ACTIVATION_QUANTIZE_RANGE_DYNAMIC = 'dynamic' ACTIVATION_QUANTIZE_BITS = "bits" ### # Sparse Pruning ### SPARSE_PRUNING = "sparse_pruning" SPARSE_PRUNING_ENABLED = TECHNIQUE_ENABLED SPARSE_PRUNING_ENABLED_DEFAULT = False SPARSE_PRUNING_METHOD = "method" SPARSE_PRUNING_METHOD_DEFAULT = "l1" SPARSE_PRUNING_METHOD_L1 = "l1" SPARSE_PRUNING_METHOD_TOPK = "topk" SPARSE_PRUNING_METHOD_SNIP_MOMENTUM = "snip_momentum" SPARSE_PRUNING_BLOCK_PATTERN = "block_pattern" SPARSE_PRUNING_BLOCK_PATTERN_DEFAULT = "4x1" SPARSE_PRUNING_SCHEDULE_OFFSET_STRIDE = "schedule_offset_stride" SPARSE_PRUNING_SCHEDULE_OFFSET_STRIDE_DEFAULT = 1 SPARSE_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET SPARSE_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000 SPARSE_PRUNING_SCHEDULE_OFFSET_END = TECHNIQUE_SCHEDULE_OFFSET_END SPARSE_PRUNING_SCHEDULE_OFFSET_END_DEFAULT = SPARSE_PRUNING_SCHEDULE_OFFSET_DEFAULT SPARSE_PRUNING_DENSE_RATIO = "dense_ratio" SPARSE_PRUNING_DENSE_RATIO_DEFAULT = 0.1 SPARSE_PRUNING_EXCLUDED_MODULES = "excluded_modules" SPARSE_PRUNING_EXCLUDED_MODULES_DEFAULT = [] ### # Row Pruning ### ROW_PRUNING = "row_pruning" ROW_PRUNING_ENABLED = TECHNIQUE_ENABLED ROW_PRUNING_ENABLED_DEFAULT = False ROW_PRUNING_METHOD = "method" ROW_PRUNING_METHOD_DEFAULT = "l1" ROW_PRUNING_METHOD_L1 = "l1" ROW_PRUNING_METHOD_TOPK = "topk" ROW_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET ROW_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000 ROW_PRUNING_DENSE_RATIO = "dense_ratio" ### # Head Pruning ### HEAD_PRUNING = "head_pruning" HEAD_PRUNING_ENABLED = TECHNIQUE_ENABLED HEAD_PRUNING_ENABLED_DEFAULT = False HEAD_PRUNING_METHOD = "method" HEAD_PRUNING_METHOD_DEFAULT = "topk" HEAD_PRUNING_METHOD_L1 = "l1" HEAD_PRUNING_METHOD_TOPK = "topk" HEAD_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET HEAD_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000 HEAD_PRUNING_NUM_HEADS = "num_heads" HEAD_PRUNING_DENSE_RATIO = "dense_ratio" ### # Channel Pruning ### CHANNEL_PRUNING = "channel_pruning" CHANNEL_PRUNING_ENABLED = TECHNIQUE_ENABLED CHANNEL_PRUNING_ENABLED_DEFAULT = False CHANNEL_PRUNING_METHOD = "method" CHANNEL_PRUNING_METHOD_DEFAULT = "l1" CHANNEL_PRUNING_METHOD_L1 = "l1" CHANNEL_PRUNING_METHOD_TOPK = "topk" CHANNEL_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET CHANNEL_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000 CHANNEL_PRUNING_DENSE_RATIO = "dense_ratio"