pettingzoo_env.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. from ray.rllib.env.multi_agent_env import MultiAgentEnv
  2. class PettingZooEnv(MultiAgentEnv):
  3. """An interface to the PettingZoo MARL environment library.
  4. See: https://github.com/Farama-Foundation/PettingZoo
  5. Inherits from MultiAgentEnv and exposes a given AEC
  6. (actor-environment-cycle) game from the PettingZoo project via the
  7. MultiAgentEnv public API.
  8. Note that the wrapper has some important limitations:
  9. 1. All agents have the same action_spaces and observation_spaces.
  10. Note: If, within your aec game, agents do not have homogeneous action /
  11. observation spaces, apply SuperSuit wrappers
  12. to apply padding functionality: https://github.com/Farama-Foundation/
  13. SuperSuit#built-in-multi-agent-only-functions
  14. 2. Environments are positive sum games (-> Agents are expected to cooperate
  15. to maximize reward). This isn't a hard restriction, it just that
  16. standard algorithms aren't expected to work well in highly competitive
  17. games.
  18. Examples:
  19. >>> from pettingzoo.butterfly import prison_v3
  20. >>> env = PettingZooEnv(prison_v3.env())
  21. >>> obs = env.reset()
  22. >>> print(obs)
  23. # only returns the observation for the agent which should be stepping
  24. {
  25. 'prisoner_0': array([[[0, 0, 0],
  26. [0, 0, 0],
  27. [0, 0, 0],
  28. ...,
  29. [0, 0, 0],
  30. [0, 0, 0],
  31. [0, 0, 0]]], dtype=uint8)
  32. }
  33. >>> obs, rewards, dones, infos = env.step({
  34. ... "prisoner_0": 1
  35. ... })
  36. # only returns the observation, reward, info, etc, for
  37. # the agent who's turn is next.
  38. >>> print(obs)
  39. {
  40. 'prisoner_1': array([[[0, 0, 0],
  41. [0, 0, 0],
  42. [0, 0, 0],
  43. ...,
  44. [0, 0, 0],
  45. [0, 0, 0],
  46. [0, 0, 0]]], dtype=uint8)
  47. }
  48. >>> print(rewards)
  49. {
  50. 'prisoner_1': 0
  51. }
  52. >>> print(dones)
  53. {
  54. 'prisoner_1': False, '__all__': False
  55. }
  56. >>> print(infos)
  57. {
  58. 'prisoner_1': {'map_tuple': (1, 0)}
  59. }
  60. """
  61. def __init__(self, env):
  62. super().__init__()
  63. self.env = env
  64. env.reset()
  65. # Get first observation space, assuming all agents have equal space
  66. self.observation_space = self.env.observation_space(self.env.agents[0])
  67. # Get first action space, assuming all agents have equal space
  68. self.action_space = self.env.action_space(self.env.agents[0])
  69. assert all(self.env.observation_space(agent) == self.observation_space
  70. for agent in self.env.agents), \
  71. "Observation spaces for all agents must be identical. Perhaps " \
  72. "SuperSuit's pad_observations wrapper can help (useage: " \
  73. "`supersuit.aec_wrappers.pad_observations(env)`"
  74. assert all(self.env.action_space(agent) == self.action_space
  75. for agent in self.env.agents), \
  76. "Action spaces for all agents must be identical. Perhaps " \
  77. "SuperSuit's pad_action_space wrapper can help (usage: " \
  78. "`supersuit.aec_wrappers.pad_action_space(env)`"
  79. def reset(self):
  80. self.env.reset()
  81. return {
  82. self.env.agent_selection: self.env.observe(
  83. self.env.agent_selection)
  84. }
  85. def step(self, action):
  86. self.env.step(action[self.env.agent_selection])
  87. obs_d = {}
  88. rew_d = {}
  89. done_d = {}
  90. info_d = {}
  91. while self.env.agents:
  92. obs, rew, done, info = self.env.last()
  93. a = self.env.agent_selection
  94. obs_d[a] = obs
  95. rew_d[a] = rew
  96. done_d[a] = done
  97. info_d[a] = info
  98. if self.env.dones[self.env.agent_selection]:
  99. self.env.step(None)
  100. else:
  101. break
  102. all_done = not self.env.agents
  103. done_d["__all__"] = all_done
  104. return obs_d, rew_d, done_d, info_d
  105. def close(self):
  106. self.env.close()
  107. def seed(self, seed=None):
  108. self.env.seed(seed)
  109. def render(self, mode="human"):
  110. return self.env.render(mode)
  111. @property
  112. def get_sub_environments(self):
  113. return self.env.unwrapped
  114. class ParallelPettingZooEnv(MultiAgentEnv):
  115. def __init__(self, env):
  116. super().__init__()
  117. self.par_env = env
  118. self.par_env.reset()
  119. # Get first observation space, assuming all agents have equal space
  120. self.observation_space = self.par_env.observation_space(
  121. self.par_env.agents[0])
  122. # Get first action space, assuming all agents have equal space
  123. self.action_space = self.par_env.action_space(self.par_env.agents[0])
  124. assert all(
  125. self.par_env.observation_space(agent) == self.observation_space
  126. for agent in self.par_env.agents), \
  127. "Observation spaces for all agents must be identical. Perhaps " \
  128. "SuperSuit's pad_observations wrapper can help (useage: " \
  129. "`supersuit.aec_wrappers.pad_observations(env)`"
  130. assert all(self.par_env.action_space(agent) == self.action_space
  131. for agent in self.par_env.agents), \
  132. "Action spaces for all agents must be identical. Perhaps " \
  133. "SuperSuit's pad_action_space wrapper can help (useage: " \
  134. "`supersuit.aec_wrappers.pad_action_space(env)`"
  135. def reset(self):
  136. return self.par_env.reset()
  137. def step(self, action_dict):
  138. obss, rews, dones, infos = self.par_env.step(action_dict)
  139. dones["__all__"] = all(dones.values())
  140. return obss, rews, dones, infos
  141. def close(self):
  142. self.par_env.close()
  143. def seed(self, seed=None):
  144. self.par_env.seed(seed)
  145. def render(self, mode="human"):
  146. return self.par_env.render(mode)
  147. @property
  148. def unwrapped(self):
  149. return self.par_env.unwrapped