cliff_walking_wall_env.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import gymnasium as gym
  2. from gymnasium import spaces
  3. ACTION_UP = 0
  4. ACTION_RIGHT = 1
  5. ACTION_DOWN = 2
  6. ACTION_LEFT = 3
  7. class CliffWalkingWallEnv(gym.Env):
  8. """Modified version of the CliffWalking environment from Farama-Foundation's
  9. Gymnasium with walls instead of a cliff.
  10. ### Description
  11. The board is a 4x12 matrix, with (using NumPy matrix indexing):
  12. - [3, 0] or obs==36 as the start at bottom-left
  13. - [3, 11] or obs==47 as the goal at bottom-right
  14. - [3, 1..10] or obs==37...46 as the cliff at bottom-center
  15. An episode terminates when the agent reaches the goal.
  16. ### Actions
  17. There are 4 discrete deterministic actions:
  18. - 0: move up
  19. - 1: move right
  20. - 2: move down
  21. - 3: move left
  22. You can also use the constants ACTION_UP, ACTION_RIGHT, ... defined above.
  23. ### Observations
  24. There are 3x12 + 2 possible states, not including the walls. If an action
  25. would move an agent into one of the walls, it simply stays in the same position.
  26. ### Reward
  27. Each time step incurs -1 reward, except reaching the goal which gives +10 reward.
  28. """
  29. def __init__(self, seed=42) -> None:
  30. self.observation_space = spaces.Discrete(48)
  31. self.action_space = spaces.Discrete(4)
  32. self.observation_space.seed(seed)
  33. self.action_space.seed(seed)
  34. def reset(self, *, seed=None, options=None):
  35. self.position = 36
  36. return self.position, {}
  37. def step(self, action):
  38. x = self.position // 12
  39. y = self.position % 12
  40. # UP
  41. if action == ACTION_UP:
  42. x = max(x - 1, 0)
  43. # RIGHT
  44. elif action == ACTION_RIGHT:
  45. if self.position != 36:
  46. y = min(y + 1, 11)
  47. # DOWN
  48. elif action == ACTION_DOWN:
  49. if self.position < 25 or self.position > 34:
  50. x = min(x + 1, 3)
  51. # LEFT
  52. elif action == ACTION_LEFT:
  53. if self.position != 47:
  54. y = max(y - 1, 0)
  55. else:
  56. raise ValueError(f"action {action} not in {self.action_space}")
  57. self.position = x * 12 + y
  58. done = self.position == 47
  59. reward = -1 if not done else 10
  60. return self.position, reward, done, False, {}