test_reshape_checkpoint.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. from deepspeed.checkpoint import model_3d_desc
  5. def _do_reshape(src_3d, tgt_3d):
  6. assert src_3d.can_reshape(tgt_3d)
  7. new_3d_map = src_3d.reshape(tgt_3d)
  8. assert len(new_3d_map) == tgt_3d.dp_degree
  9. for new_2d_map in new_3d_map:
  10. assert new_2d_map.pp_degree == tgt_3d.pp_degree
  11. assert new_2d_map.tp_degree == tgt_3d.tp_degree
  12. return new_3d_map
  13. # Specify 3d shape as pp/tp/dp
  14. def test_reshape_222_to_111():
  15. src_3d = model_3d_desc(pp_degree=2, tp_degree=2, dp_degree=2)
  16. tgt_3d = model_3d_desc(pp_degree=1, tp_degree=1, dp_degree=1)
  17. new_3d_map = _do_reshape(src_3d, tgt_3d)
  18. assert new_3d_map[0].get_data(pp_index=0, tp_index=0) == [0, 4, 1, 5, 2, 6, 3, 7]
  19. def test_reshape_222_to_121():
  20. src_3d = model_3d_desc(pp_degree=2, tp_degree=2, dp_degree=2)
  21. tgt_3d = model_3d_desc(pp_degree=1, tp_degree=2, dp_degree=1)
  22. new_3d_map = _do_reshape(src_3d, tgt_3d)
  23. assert new_3d_map[0].get_data(pp_index=0, tp_index=0) == [0, 4, 2, 6]
  24. assert new_3d_map[0].get_data(pp_index=0, tp_index=1) == [1, 5, 3, 7]
  25. def test_reshape_222_to_122():
  26. src_3d = model_3d_desc(pp_degree=2, tp_degree=2, dp_degree=2)
  27. tgt_3d = model_3d_desc(pp_degree=1, tp_degree=2, dp_degree=2)
  28. new_3d_map = _do_reshape(src_3d, tgt_3d)
  29. assert new_3d_map[0].get_data(pp_index=0, tp_index=0) == [0, 4]
  30. assert new_3d_map[0].get_data(pp_index=0, tp_index=1) == [1, 5]
  31. assert new_3d_map[1].get_data(pp_index=0, tp_index=0) == [2, 6]
  32. assert new_3d_map[1].get_data(pp_index=0, tp_index=1) == [3, 7]
  33. def test_reshape_222_to_211():
  34. src_3d = model_3d_desc(pp_degree=2, tp_degree=2, dp_degree=2)
  35. tgt_3d = model_3d_desc(pp_degree=2, tp_degree=1, dp_degree=1)
  36. new_3d_map = _do_reshape(src_3d, tgt_3d)
  37. assert new_3d_map[0].get_data(pp_index=0, tp_index=0) == [0, 4, 1, 5]
  38. assert new_3d_map[0].get_data(pp_index=1, tp_index=0) == [2, 6, 3, 7]