pong-apex.yaml 493 B

12345678910111213141516
  1. # This reaches ~20 reward in 50 minutes (6M train steps, 2M env steps) on a
  2. # p3.2xlarge AWS instance.
  3. # See https://app.wandb.ai/zplizzi/test/runs/ayuuhixr?workspace=user-zplizzi
  4. # for training curves.
  5. pong-apex:
  6. env: PongNoFrameskip-v4
  7. run: APEX
  8. config:
  9. # Works for both torch and tf.
  10. framework: tf
  11. target_network_update_freq: 20000
  12. num_workers: 4
  13. num_envs_per_worker: 8
  14. lr: .00005
  15. train_batch_size: 64
  16. gamma: 0.99