1234567891011121314151617181920212223242526272829303132333435 |
- from gym.spaces import Box
- import numpy as np
- from gym.envs.classic_control import PendulumEnv
- class StatelessPendulum(PendulumEnv):
- """Partially observable variant of the Pendulum gym environment.
- https://github.com/openai/gym/blob/master/gym/envs/classic_control/
- pendulum.py
- We delete the angular velocity component of the state, so that it
- can only be solved by a memory enhanced model (policy).
- """
- def __init__(self, config=None):
- config = config or {}
- g = config.get("g", 10.0)
- super().__init__(g=g)
- # Fix our observation-space (remove angular velocity component).
- high = np.array([1., 1.], dtype=np.float32)
- self.observation_space = Box(low=-high, high=high, dtype=np.float32)
- def step(self, action):
- next_obs, reward, done, info = super().step(action)
- # next_obs is [cos(theta), sin(theta), theta-dot (angular velocity)]
- return next_obs[:-1], reward, done, info
- def reset(self):
- init_obs = super().reset()
- # init_obs is [cos(theta), sin(theta), theta-dot (angular velocity)]
- return init_obs[:-1]
|