123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- """Simple example of writing experiences to a file using JsonWriter."""
- # __sphinx_doc_begin__
- import gym
- import numpy as np
- import os
- import ray._private.utils
- from ray.rllib.models.preprocessors import get_preprocessor
- from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
- from ray.rllib.offline.json_writer import JsonWriter
- if __name__ == "__main__":
- batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder
- writer = JsonWriter(
- os.path.join(ray._private.utils.get_user_temp_dir(), "demo-out"))
- # You normally wouldn't want to manually create sample batches if a
- # simulator is available, but let's do it anyways for example purposes:
- env = gym.make("CartPole-v0")
- # RLlib uses preprocessors to implement transforms such as one-hot encoding
- # and flattening of tuple and dict observations. For CartPole a no-op
- # preprocessor is used, but this may be relevant for more complex envs.
- prep = get_preprocessor(env.observation_space)(env.observation_space)
- print("The preprocessor is", prep)
- for eps_id in range(100):
- obs = env.reset()
- prev_action = np.zeros_like(env.action_space.sample())
- prev_reward = 0
- done = False
- t = 0
- while not done:
- action = env.action_space.sample()
- new_obs, rew, done, info = env.step(action)
- batch_builder.add_values(
- t=t,
- eps_id=eps_id,
- agent_index=0,
- obs=prep.transform(obs),
- actions=action,
- action_prob=1.0, # put the true action probability here
- action_logp=0.0,
- rewards=rew,
- prev_actions=prev_action,
- prev_rewards=prev_reward,
- dones=done,
- infos=info,
- new_obs=prep.transform(new_obs))
- obs = new_obs
- prev_action = action
- prev_reward = rew
- t += 1
- writer.write(batch_builder.build_and_reset())
- # __sphinx_doc_end__
|