123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- import numpy as np
- import gym
- from ray.rllib.models.modelv2 import ModelV2
- from ray.rllib.utils.annotations import DeveloperAPI
- from ray.rllib.utils.typing import TensorType, List, Union, ModelConfigDict
- @DeveloperAPI
- class ActionDistribution:
- """The policy action distribution of an agent.
- Attributes:
- inputs (Tensors): input vector to compute samples from.
- model (ModelV2): reference to model producing the inputs.
- """
- @DeveloperAPI
- def __init__(self, inputs: List[TensorType], model: ModelV2):
- """Initializes an ActionDist object.
- Args:
- inputs (Tensors): input vector to compute samples from.
- model (ModelV2): reference to model producing the inputs. This
- is mainly useful if you want to use model variables to compute
- action outputs (i.e., for auto-regressive action distributions,
- see examples/autoregressive_action_dist.py).
- """
- self.inputs = inputs
- self.model = model
- @DeveloperAPI
- def sample(self) -> TensorType:
- """Draw a sample from the action distribution."""
- raise NotImplementedError
- @DeveloperAPI
- def deterministic_sample(self) -> TensorType:
- """
- Get the deterministic "sampling" output from the distribution.
- This is usually the max likelihood output, i.e. mean for Normal, argmax
- for Categorical, etc..
- """
- raise NotImplementedError
- @DeveloperAPI
- def sampled_action_logp(self) -> TensorType:
- """Returns the log probability of the last sampled action."""
- raise NotImplementedError
- @DeveloperAPI
- def logp(self, x: TensorType) -> TensorType:
- """The log-likelihood of the action distribution."""
- raise NotImplementedError
- @DeveloperAPI
- def kl(self, other: "ActionDistribution") -> TensorType:
- """The KL-divergence between two action distributions."""
- raise NotImplementedError
- @DeveloperAPI
- def entropy(self) -> TensorType:
- """The entropy of the action distribution."""
- raise NotImplementedError
- def multi_kl(self, other: "ActionDistribution") -> TensorType:
- """The KL-divergence between two action distributions.
- This differs from kl() in that it can return an array for
- MultiDiscrete. TODO(ekl) consider removing this.
- """
- return self.kl(other)
- def multi_entropy(self) -> TensorType:
- """The entropy of the action distribution.
- This differs from entropy() in that it can return an array for
- MultiDiscrete. TODO(ekl) consider removing this.
- """
- return self.entropy()
- @DeveloperAPI
- @staticmethod
- def required_model_output_shape(
- action_space: gym.Space,
- model_config: ModelConfigDict) -> Union[int, np.ndarray]:
- """Returns the required shape of an input parameter tensor for a
- particular action space and an optional dict of distribution-specific
- options.
- Args:
- action_space (gym.Space): The action space this distribution will
- be used for, whose shape attributes will be used to determine
- the required shape of the input parameter tensor.
- model_config (dict): Model's config dict (as defined in catalog.py)
- Returns:
- model_output_shape (int or np.ndarray of ints): size of the
- required input vector (minus leading batch dimension).
- """
- raise NotImplementedError
|