helper.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. from .basic_layer import RandomLayerTokenDrop
  5. from collections import OrderedDict
  6. from deepspeed.compression.helper import recursive_getattr, recursive_setattr
  7. def convert_to_random_ltd(model, convert_type):
  8. if hasattr(model, 'module'):
  9. c_model = model.module
  10. else:
  11. c_model = model
  12. for name, module in c_model.named_modules():
  13. if isinstance(module, convert_type):
  14. old_module = recursive_getattr(c_model, name)
  15. new_module = RandomLayerTokenDrop(old_module)
  16. recursive_setattr(c_model, name, new_module)
  17. model.random_ltd_initialize()
  18. return model
  19. def save_without_random_ltd(model):
  20. if hasattr(model, 'module'):
  21. c_model = model.module
  22. else:
  23. c_model = model
  24. model_dic = c_model.state_dict()
  25. return remove_random_ltd_state_dict(model_dic)
  26. def remove_random_ltd_state_dict(state_dict):
  27. new_state_dict = OrderedDict()
  28. for key, value in state_dict.items():
  29. if '.random_ltd_layer' in key:
  30. new_key = ''.join(key.split('.random_ltd_layer'))
  31. else:
  32. new_key = key
  33. new_state_dict[new_key] = value
  34. return new_state_dict