123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- from gym.spaces import Box
- from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
- from ray.rllib.models.tf.tf_modelv2 import TFModelV2
- from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as \
- TorchFullyConnectedNetwork
- from ray.rllib.models.torch.misc import SlimFC
- from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
- from ray.rllib.utils.framework import try_import_tf, try_import_torch
- tf1, tf, tfv = try_import_tf()
- torch, nn = try_import_torch()
- # __sphinx_doc_model_api_1_begin__
- class DuelingQModel(TFModelV2): # or: TorchModelV2
- """A simple, hard-coded dueling head model."""
- def __init__(self, obs_space, action_space, num_outputs, model_config,
- name):
- # Pass num_outputs=None into super constructor (so that no action/
- # logits output layer is built).
- # Alternatively, you can pass in num_outputs=[last layer size of
- # config[model][fcnet_hiddens]] AND set no_last_linear=True, but
- # this seems more tedious as you will have to explain users of this
- # class that num_outputs is NOT the size of your Q-output layer.
- super(DuelingQModel, self).__init__(obs_space, action_space, None,
- model_config, name)
- # Now: self.num_outputs contains the last layer's size, which
- # we can use to construct the dueling head (see torch: SlimFC
- # below).
- # Construct advantage head ...
- self.A = tf.keras.layers.Dense(num_outputs)
- # torch:
- # self.A = SlimFC(
- # in_size=self.num_outputs, out_size=num_outputs)
- # ... and value head.
- self.V = tf.keras.layers.Dense(1)
- # torch:
- # self.V = SlimFC(in_size=self.num_outputs, out_size=1)
- def get_q_values(self, underlying_output):
- # Calculate q-values following dueling logic:
- v = self.V(underlying_output) # value
- a = self.A(underlying_output) # advantages (per action)
- advantages_mean = tf.reduce_mean(a, 1)
- advantages_centered = a - tf.expand_dims(advantages_mean, 1)
- return v + advantages_centered # q-values
- # __sphinx_doc_model_api_1_end__
- class TorchDuelingQModel(TorchModelV2):
- """A simple, hard-coded dueling head model."""
- def __init__(self, obs_space, action_space, num_outputs, model_config,
- name):
- # Pass num_outputs=None into super constructor (so that no action/
- # logits output layer is built).
- # Alternatively, you can pass in num_outputs=[last layer size of
- # config[model][fcnet_hiddens]] AND set no_last_linear=True, but
- # this seems more tedious as you will have to explain users of this
- # class that num_outputs is NOT the size of your Q-output layer.
- nn.Module.__init__(self)
- super(TorchDuelingQModel, self).__init__(obs_space, action_space, None,
- model_config, name)
- # Now: self.num_outputs contains the last layer's size, which
- # we can use to construct the dueling head (see torch: SlimFC
- # below).
- # Construct advantage head ...
- self.A = SlimFC(in_size=self.num_outputs, out_size=num_outputs)
- # ... and value head.
- self.V = SlimFC(in_size=self.num_outputs, out_size=1)
- def get_q_values(self, underlying_output):
- # Calculate q-values following dueling logic:
- v = self.V(underlying_output) # value
- a = self.A(underlying_output) # advantages (per action)
- advantages_mean = torch.mean(a, 1)
- advantages_centered = a - torch.unsqueeze(advantages_mean, 1)
- return v + advantages_centered # q-values
- class ContActionQModel(TFModelV2):
- """A simple, q-value-from-cont-action model (for e.g. SAC type algos)."""
- def __init__(self, obs_space, action_space, num_outputs, model_config,
- name):
- # Pass num_outputs=None into super constructor (so that no action/
- # logits output layer is built).
- # Alternatively, you can pass in num_outputs=[last layer size of
- # config[model][fcnet_hiddens]] AND set no_last_linear=True, but
- # this seems more tedious as you will have to explain users of this
- # class that num_outputs is NOT the size of your Q-output layer.
- super(ContActionQModel, self).__init__(obs_space, action_space, None,
- model_config, name)
- # Now: self.num_outputs contains the last layer's size, which
- # we can use to construct the single q-value computing head.
- # Nest an RLlib FullyConnectedNetwork (torch or tf) into this one here
- # to be used for Q-value calculation.
- # Use the current value of self.num_outputs, which is the wrapped
- # model's output layer size.
- combined_space = Box(-1.0, 1.0,
- (self.num_outputs + action_space.shape[0], ))
- self.q_head = FullyConnectedNetwork(combined_space, action_space, 1,
- model_config, "q_head")
- # Missing here: Probably still have to provide action output layer
- # and value layer and make sure self.num_outputs is correctly set.
- def get_single_q_value(self, underlying_output, action):
- # Calculate the q-value after concating the underlying output with
- # the given action.
- input_ = tf.concat([underlying_output, action], axis=-1)
- # Construct a simple input_dict (needed for self.q_head as it's an
- # RLlib ModelV2).
- input_dict = {"obs": input_}
- # Ignore state outputs.
- q_values, _ = self.q_head(input_dict)
- return q_values
- # __sphinx_doc_model_api_2_begin__
- class TorchContActionQModel(TorchModelV2):
- """A simple, q-value-from-cont-action model (for e.g. SAC type algos)."""
- def __init__(self, obs_space, action_space, num_outputs, model_config,
- name):
- nn.Module.__init__(self)
- # Pass num_outputs=None into super constructor (so that no action/
- # logits output layer is built).
- # Alternatively, you can pass in num_outputs=[last layer size of
- # config[model][fcnet_hiddens]] AND set no_last_linear=True, but
- # this seems more tedious as you will have to explain users of this
- # class that num_outputs is NOT the size of your Q-output layer.
- super(TorchContActionQModel, self).__init__(obs_space, action_space,
- None, model_config, name)
- # Now: self.num_outputs contains the last layer's size, which
- # we can use to construct the single q-value computing head.
- # Nest an RLlib FullyConnectedNetwork (torch or tf) into this one here
- # to be used for Q-value calculation.
- # Use the current value of self.num_outputs, which is the wrapped
- # model's output layer size.
- combined_space = Box(-1.0, 1.0,
- (self.num_outputs + action_space.shape[0], ))
- self.q_head = TorchFullyConnectedNetwork(combined_space, action_space,
- 1, model_config, "q_head")
- # Missing here: Probably still have to provide action output layer
- # and value layer and make sure self.num_outputs is correctly set.
- def get_single_q_value(self, underlying_output, action):
- # Calculate the q-value after concating the underlying output with
- # the given action.
- input_ = torch.cat([underlying_output, action], dim=-1)
- # Construct a simple input_dict (needed for self.q_head as it's an
- # RLlib ModelV2).
- input_dict = {"obs": input_}
- # Ignore state outputs.
- q_values, _ = self.q_head(input_dict)
- return q_values
- # __sphinx_doc_model_api_2_end__
|