12345678910111213141516171819202122232425262728293031 |
- # This can reach 18-19 reward in ~5-7 minutes on a Titan XP GPU
- # with 32 workers and 8 envs per worker. IMPALA, when ran with
- # similar configurations, solved Pong in 10-12 minutes.
- # APPO can also solve Pong in 2.5 million timesteps, which is
- # 2x more efficient than that of IMPALA.
- pong-appo:
- env: PongNoFrameskip-v4
- run: APPO
- stop:
- episode_reward_mean: 18.0
- timesteps_total: 5000000
- config:
- # Works for both torch and tf.
- framework: tf
- vtrace: True
- use_kl_loss: False
- rollout_fragment_length: 50
- train_batch_size: 750
- num_workers: 32
- broadcast_interval: 1
- max_sample_requests_in_flight_per_worker: 1
- num_multi_gpu_tower_stacks: 1
- num_envs_per_worker: 8
- minibatch_buffer_size: 4
- num_sgd_iter: 2
- vf_loss_coeff: 1.0
- clip_param: 0.3
- num_gpus: 1
- grad_clip: 10
- model:
- dim: 42
|