halfcheetah-ppo: env: HalfCheetah-v2 run: PPO stop: episode_reward_mean: 9800 time_total_s: 10800 config: # Works for both torch and tf. framework: tf gamma: 0.99 lambda: 0.95 kl_coeff: 1.0 num_sgd_iter: 32 lr: .0003 vf_loss_coeff: 0.5 clip_param: 0.2 sgd_minibatch_size: 4096 train_batch_size: 65536 num_workers: 16 num_gpus: 1 grad_clip: 0.5 num_envs_per_worker: grid_search: [16, 32] batch_mode: truncate_episodes observation_filter: MeanStdFilter