- hopper-ppo:
- env: Hopper-v1
- run: PPO
- config:
- # Works for both torch and tf.
- framework: tf
- gamma: 0.995
- kl_coeff: 1.0
- num_sgd_iter: 20
- lr: .0001
- sgd_minibatch_size: 32768
- train_batch_size: 160000
- num_workers: 64
- num_gpus: 4
- batch_mode: complete_episodes
- observation_filter: MeanStdFilter
|