cartpole-ppo: env: CartPole-v0 run: PPO stop: episode_reward_mean: 150 timesteps_total: 100000 config: # Works for both torch and tf. framework: tf gamma: 0.99 lr: 0.0003 num_workers: 1 observation_filter: MeanStdFilter num_sgd_iter: 6 vf_loss_coeff: 0.01 model: fcnet_hiddens: [32] fcnet_activation: linear vf_share_layers: true