simple_rpg_model.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. from ray.rllib.models.tf.tf_modelv2 import TFModelV2
  2. from ray.rllib.models.tf.fcnet import FullyConnectedNetwork as TFFCNet
  3. from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
  4. from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNet
  5. from ray.rllib.utils.framework import try_import_tf, try_import_torch
  6. tf1, tf, tfv = try_import_tf()
  7. torch, nn = try_import_torch()
  8. class CustomTorchRPGModel(TorchModelV2, nn.Module):
  9. """Example of interpreting repeated observations."""
  10. def __init__(self, obs_space, action_space, num_outputs, model_config,
  11. name):
  12. super().__init__(obs_space, action_space, num_outputs, model_config,
  13. name)
  14. nn.Module.__init__(self)
  15. self.model = TorchFCNet(obs_space, action_space, num_outputs,
  16. model_config, name)
  17. def forward(self, input_dict, state, seq_lens):
  18. # The unpacked input tensors, where M=MAX_PLAYERS, N=MAX_ITEMS:
  19. # {
  20. # 'items', <torch.Tensor shape=(?, M, N, 5)>,
  21. # 'location', <torch.Tensor shape=(?, M, 2)>,
  22. # 'status', <torch.Tensor shape=(?, M, 10)>,
  23. # }
  24. print("The unpacked input tensors:", input_dict["obs"])
  25. print()
  26. print("Unbatched repeat dim", input_dict["obs"].unbatch_repeat_dim())
  27. print()
  28. print("Fully unbatched", input_dict["obs"].unbatch_all())
  29. print()
  30. return self.model.forward(input_dict, state, seq_lens)
  31. def value_function(self):
  32. return self.model.value_function()
  33. class CustomTFRPGModel(TFModelV2):
  34. """Example of interpreting repeated observations."""
  35. def __init__(self, obs_space, action_space, num_outputs, model_config,
  36. name):
  37. super().__init__(obs_space, action_space, num_outputs, model_config,
  38. name)
  39. self.model = TFFCNet(obs_space, action_space, num_outputs,
  40. model_config, name)
  41. def forward(self, input_dict, state, seq_lens):
  42. # The unpacked input tensors, where M=MAX_PLAYERS, N=MAX_ITEMS:
  43. # {
  44. # 'items', <tf.Tensor shape=(?, M, N, 5)>,
  45. # 'location', <tf.Tensor shape=(?, M, 2)>,
  46. # 'status', <tf.Tensor shape=(?, M, 10)>,
  47. # }
  48. print("The unpacked input tensors:", input_dict["obs"])
  49. print()
  50. print("Unbatched repeat dim", input_dict["obs"].unbatch_repeat_dim())
  51. print()
  52. if tf.executing_eagerly():
  53. print("Fully unbatched", input_dict["obs"].unbatch_all())
  54. print()
  55. return self.model.forward(input_dict, state, seq_lens)
  56. def value_function(self):
  57. return self.model.value_function()