humanoid-ppo-gae: env: Humanoid-v1 run: PPO stop: episode_reward_mean: 6000 config: # Works for both torch and tf. framework: tf gamma: 0.995 lambda: 0.95 clip_param: 0.2 kl_coeff: 1.0 num_sgd_iter: 20 lr: .0001 sgd_minibatch_size: 32768 horizon: 5000 train_batch_size: 320000 model: free_log_std: true num_workers: 64 num_gpus: 4 batch_mode: complete_episodes observation_filter: MeanStdFilter