123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- # This file runs on a single g3.16xl or p3.16xl node. It is suggested
- # to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are
- # inherently high variance, so you'll have to check to see if the
- # rewards reached seem reasonably in line with previous results.
- #
- # You can find the reference results here:
- # https://github.com/ray-project/ray/tree/master/release/release_logs
- atari-impala:
- env: BreakoutNoFrameskip-v4
- run: IMPALA
- num_samples: 4
- stop:
- time_total_s: 3600
- config:
- rollout_fragment_length: 50
- train_batch_size: 500
- num_workers: 10
- num_envs_per_worker: 5
- clip_rewards: True
- lr_schedule: [
- [0, 0.0005],
- [20000000, 0.000000000001],
- ]
- num_gpus: 1
- atari-ppo-tf:
- env: BreakoutNoFrameskip-v4
- run: PPO
- num_samples: 4
- stop:
- time_total_s: 3600
- config:
- lambda: 0.95
- kl_coeff: 0.5
- clip_rewards: True
- clip_param: 0.1
- vf_clip_param: 10.0
- entropy_coeff: 0.01
- train_batch_size: 5000
- rollout_fragment_length: 100
- sgd_minibatch_size: 500
- num_sgd_iter: 10
- num_workers: 10
- num_envs_per_worker: 5
- batch_mode: truncate_episodes
- observation_filter: NoFilter
- model:
- vf_share_layers: true
- num_gpus: 1
- atari-ppo-torch:
- env: BreakoutNoFrameskip-v4
- run: PPO
- num_samples: 4
- stop:
- time_total_s: 3600
- config:
- framework: torch
- lambda: 0.95
- kl_coeff: 0.5
- clip_rewards: True
- clip_param: 0.1
- vf_clip_param: 10.0
- entropy_coeff: 0.01
- train_batch_size: 5000
- rollout_fragment_length: 100
- sgd_minibatch_size: 500
- num_sgd_iter: 10
- num_workers: 10
- num_envs_per_worker: 5
- batch_mode: truncate_episodes
- observation_filter: NoFilter
- model:
- vf_share_layers: true
- num_gpus: 1
- apex:
- env: BreakoutNoFrameskip-v4
- run: APEX
- num_samples: 4
- stop:
- time_total_s: 3600
- config:
- double_q: false
- dueling: false
- num_atoms: 1
- noisy: false
- n_step: 3
- lr: .0001
- adam_epsilon: .00015
- hiddens: [512]
- buffer_size: 1000000
- exploration_config:
- epsilon_timesteps: 200000
- final_epsilon: 0.01
- prioritized_replay_alpha: 0.5
- final_prioritized_replay_beta: 1.0
- prioritized_replay_beta_annealing_timesteps: 2000000
- num_gpus: 1
- num_workers: 8
- num_envs_per_worker: 8
- rollout_fragment_length: 20
- train_batch_size: 512
- target_network_update_freq: 50000
- timesteps_per_iteration: 25000
- atari-a2c:
- env: BreakoutNoFrameskip-v4
- run: A2C
- num_samples: 4
- stop:
- time_total_s: 3600
- config:
- rollout_fragment_length: 20
- clip_rewards: True
- num_workers: 5
- num_envs_per_worker: 5
- num_gpus: 1
- lr_schedule: [
- [0, 0.0007],
- [20000000, 0.000000000001],
- ]
- atari-basic-dqn:
- env: BreakoutNoFrameskip-v4
- run: DQN
- num_samples: 4
- stop:
- time_total_s: 3600
- config:
- double_q: false
- dueling: false
- num_atoms: 1
- noisy: false
- prioritized_replay: false
- n_step: 1
- target_network_update_freq: 8000
- lr: .0000625
- adam_epsilon: .00015
- hiddens: [512]
- learning_starts: 20000
- buffer_size: 1000000
- rollout_fragment_length: 4
- train_batch_size: 32
- exploration_config:
- epsilon_timesteps: 200000
- final_epsilon: 0.01
- prioritized_replay_alpha: 0.5
- final_prioritized_replay_beta: 1.0
- prioritized_replay_beta_annealing_timesteps: 2000000
- num_gpus: 0.2
- timesteps_per_iteration: 10000
|