pong-ppo.yaml 739 B

12345678910111213141516171819202122232425262728
  1. # On a single GPU, this achieves maximum reward in ~15-20 minutes.
  2. #
  3. # $ python train.py -f tuned_configs/pong-ppo.yaml
  4. #
  5. pong-ppo:
  6. env: PongNoFrameskip-v4
  7. run: PPO
  8. config:
  9. # Works for both torch and tf.
  10. framework: tf
  11. lambda: 0.95
  12. kl_coeff: 0.5
  13. clip_rewards: True
  14. clip_param: 0.1
  15. vf_clip_param: 10.0
  16. entropy_coeff: 0.01
  17. train_batch_size: 5000
  18. rollout_fragment_length: 20
  19. sgd_minibatch_size: 500
  20. num_sgd_iter: 10
  21. num_workers: 32
  22. num_envs_per_worker: 5
  23. batch_mode: truncate_episodes
  24. observation_filter: NoFilter
  25. num_gpus: 1
  26. model:
  27. dim: 42
  28. vf_share_layers: true