12345678910111213141516 |
- cartpole-ppo:
- env: CartPole-v0
- run: PPO
- num_samples: 3
- stop:
- episode_reward_mean: 200
- time_total_s: 180
- config:
- # Works for both torch and tf.
- framework: tf
- num_workers: 1
- num_sgd_iter:
- grid_search: [1, 4]
- sgd_minibatch_size:
- grid_search: [128, 256, 512]
- observation_filter: MeanStdFilter
|