12345678910111213141516171819202122232425262728 |
- # On a single GPU, this achieves maximum reward in ~15-20 minutes.
- #
- # $ python train.py -f tuned_configs/pong-ppo.yaml
- #
- pong-ppo:
- env: PongNoFrameskip-v4
- run: PPO
- config:
- # Works for both torch and tf.
- framework: tf
- lambda: 0.95
- kl_coeff: 0.5
- clip_rewards: True
- clip_param: 0.1
- vf_clip_param: 10.0
- entropy_coeff: 0.01
- train_batch_size: 5000
- rollout_fragment_length: 20
- sgd_minibatch_size: 500
- num_sgd_iter: 10
- num_workers: 32
- num_envs_per_worker: 5
- batch_mode: truncate_episodes
- observation_filter: NoFilter
- num_gpus: 1
- model:
- dim: 42
- vf_share_layers: true
|