halfcheetah-ppo.yaml 631 B

12345678910111213141516171819202122232425
  1. halfcheetah-ppo:
  2. env: HalfCheetah-v2
  3. run: PPO
  4. stop:
  5. episode_reward_mean: 9800
  6. time_total_s: 10800
  7. config:
  8. # Works for both torch and tf.
  9. framework: tf
  10. gamma: 0.99
  11. lambda: 0.95
  12. kl_coeff: 1.0
  13. num_sgd_iter: 32
  14. lr: .0003
  15. vf_loss_coeff: 0.5
  16. clip_param: 0.2
  17. sgd_minibatch_size: 4096
  18. train_batch_size: 65536
  19. num_workers: 16
  20. num_gpus: 1
  21. grad_clip: 0.5
  22. num_envs_per_worker:
  23. grid_search: [16, 32]
  24. batch_mode: truncate_episodes
  25. observation_filter: MeanStdFilter