1234567891011121314151617181920212223 |
- humanoid-ppo-gae:
- env: Humanoid-v1
- run: PPO
- stop:
- episode_reward_mean: 6000
- config:
- # Works for both torch and tf.
- framework: tf
- gamma: 0.995
- lambda: 0.95
- clip_param: 0.2
- kl_coeff: 1.0
- num_sgd_iter: 20
- lr: .0001
- sgd_minibatch_size: 32768
- horizon: 5000
- train_batch_size: 320000
- model:
- free_log_std: true
- num_workers: 64
- num_gpus: 4
- batch_mode: complete_episodes
- observation_filter: MeanStdFilter
|