pong-appo.yaml 961 B

12345678910111213141516171819202122232425262728293031
  1. # This can reach 18-19 reward in ~5-7 minutes on a Titan XP GPU
  2. # with 32 workers and 8 envs per worker. IMPALA, when ran with
  3. # similar configurations, solved Pong in 10-12 minutes.
  4. # APPO can also solve Pong in 2.5 million timesteps, which is
  5. # 2x more efficient than that of IMPALA.
  6. pong-appo:
  7. env: PongNoFrameskip-v4
  8. run: APPO
  9. stop:
  10. episode_reward_mean: 18.0
  11. timesteps_total: 5000000
  12. config:
  13. # Works for both torch and tf.
  14. framework: tf
  15. vtrace: True
  16. use_kl_loss: False
  17. rollout_fragment_length: 50
  18. train_batch_size: 750
  19. num_workers: 32
  20. broadcast_interval: 1
  21. max_sample_requests_in_flight_per_worker: 1
  22. num_multi_gpu_tower_stacks: 1
  23. num_envs_per_worker: 8
  24. minibatch_buffer_size: 4
  25. num_sgd_iter: 2
  26. vf_loss_coeff: 1.0
  27. clip_param: 0.3
  28. num_gpus: 1
  29. grad_clip: 10
  30. model:
  31. dim: 42