cartpole-ppo: env: CartPole-v0 run: PPO stop: episode_reward_mean: 200 time_total_s: 180 config: # Works for both torch and tf. framework: tf num_workers: 2 num_sgd_iter: grid_search: [1, 4] sgd_minibatch_size: grid_search: [128, 256, 512]