humanoid-ppo.yaml 511 B

123456789101112131415161718192021
  1. humanoid-ppo:
  2. env: Humanoid-v1
  3. run: PPO
  4. stop:
  5. episode_reward_mean: 6000
  6. config:
  7. # Works for both torch and tf.
  8. framework: tf
  9. gamma: 0.995
  10. kl_coeff: 1.0
  11. num_sgd_iter: 20
  12. lr: .0001
  13. sgd_minibatch_size: 32768
  14. train_batch_size: 320000
  15. model:
  16. free_log_std: true
  17. use_gae: false
  18. num_workers: 64
  19. num_gpus: 4
  20. batch_mode: complete_episodes
  21. observation_filter: MeanStdFilter