humanoid-ppo-gae.yaml 559 B

1234567891011121314151617181920212223
  1. humanoid-ppo-gae:
  2. env: Humanoid-v1
  3. run: PPO
  4. stop:
  5. episode_reward_mean: 6000
  6. config:
  7. # Works for both torch and tf.
  8. framework: tf
  9. gamma: 0.995
  10. lambda: 0.95
  11. clip_param: 0.2
  12. kl_coeff: 1.0
  13. num_sgd_iter: 20
  14. lr: .0001
  15. sgd_minibatch_size: 32768
  16. horizon: 5000
  17. train_batch_size: 320000
  18. model:
  19. free_log_std: true
  20. num_workers: 64
  21. num_gpus: 4
  22. batch_mode: complete_episodes
  23. observation_filter: MeanStdFilter