saving_experiences.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. """Simple example of writing experiences to a file using JsonWriter."""
  2. # __sphinx_doc_begin__
  3. import gym
  4. import numpy as np
  5. import os
  6. import ray._private.utils
  7. from ray.rllib.models.preprocessors import get_preprocessor
  8. from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
  9. from ray.rllib.offline.json_writer import JsonWriter
  10. if __name__ == "__main__":
  11. batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder
  12. writer = JsonWriter(
  13. os.path.join(ray._private.utils.get_user_temp_dir(), "demo-out"))
  14. # You normally wouldn't want to manually create sample batches if a
  15. # simulator is available, but let's do it anyways for example purposes:
  16. env = gym.make("CartPole-v0")
  17. # RLlib uses preprocessors to implement transforms such as one-hot encoding
  18. # and flattening of tuple and dict observations. For CartPole a no-op
  19. # preprocessor is used, but this may be relevant for more complex envs.
  20. prep = get_preprocessor(env.observation_space)(env.observation_space)
  21. print("The preprocessor is", prep)
  22. for eps_id in range(100):
  23. obs = env.reset()
  24. prev_action = np.zeros_like(env.action_space.sample())
  25. prev_reward = 0
  26. done = False
  27. t = 0
  28. while not done:
  29. action = env.action_space.sample()
  30. new_obs, rew, done, info = env.step(action)
  31. batch_builder.add_values(
  32. t=t,
  33. eps_id=eps_id,
  34. agent_index=0,
  35. obs=prep.transform(obs),
  36. actions=action,
  37. action_prob=1.0, # put the true action probability here
  38. action_logp=0.0,
  39. rewards=rew,
  40. prev_actions=prev_action,
  41. prev_rewards=prev_reward,
  42. dones=done,
  43. infos=info,
  44. new_obs=prep.transform(new_obs))
  45. obs = new_obs
  46. prev_action = action
  47. prev_reward = rew
  48. t += 1
  49. writer.write(batch_builder.build_and_reset())
  50. # __sphinx_doc_end__