hopper-ppo: env: Hopper-v1 run: PPO config: # Works for both torch and tf. framework: tf gamma: 0.995 kl_coeff: 1.0 num_sgd_iter: 20 lr: .0001 sgd_minibatch_size: 32768 train_batch_size: 160000 num_workers: 64 num_gpus: 4 batch_mode: complete_episodes observation_filter: MeanStdFilter