"""Simple example of writing experiences to a file using JsonWriter.""" # __sphinx_doc_begin__ import gym import numpy as np import os import ray._private.utils from ray.rllib.models.preprocessors import get_preprocessor from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder from ray.rllib.offline.json_writer import JsonWriter if __name__ == "__main__": batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder writer = JsonWriter( os.path.join(ray._private.utils.get_user_temp_dir(), "demo-out")) # You normally wouldn't want to manually create sample batches if a # simulator is available, but let's do it anyways for example purposes: env = gym.make("CartPole-v0") # RLlib uses preprocessors to implement transforms such as one-hot encoding # and flattening of tuple and dict observations. For CartPole a no-op # preprocessor is used, but this may be relevant for more complex envs. prep = get_preprocessor(env.observation_space)(env.observation_space) print("The preprocessor is", prep) for eps_id in range(100): obs = env.reset() prev_action = np.zeros_like(env.action_space.sample()) prev_reward = 0 done = False t = 0 while not done: action = env.action_space.sample() new_obs, rew, done, info = env.step(action) batch_builder.add_values( t=t, eps_id=eps_id, agent_index=0, obs=prep.transform(obs), actions=action, action_prob=1.0, # put the true action probability here action_logp=0.0, rewards=rew, prev_actions=prev_action, prev_rewards=prev_reward, dones=done, infos=info, new_obs=prep.transform(new_obs)) obs = new_obs prev_action = action prev_reward = rew t += 1 writer.write(batch_builder.build_and_reset()) # __sphinx_doc_end__