compact-regression-test.yaml 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # This file runs on a single g3.16xl or p3.16xl node. It is suggested
  2. # to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are
  3. # inherently high variance, so you'll have to check to see if the
  4. # rewards reached seem reasonably in line with previous results.
  5. #
  6. # You can find the reference results here:
  7. # https://github.com/ray-project/ray/tree/master/release/release_logs
  8. atari-impala:
  9. env: BreakoutNoFrameskip-v4
  10. run: IMPALA
  11. num_samples: 4
  12. stop:
  13. time_total_s: 3600
  14. config:
  15. rollout_fragment_length: 50
  16. train_batch_size: 500
  17. num_workers: 10
  18. num_envs_per_worker: 5
  19. clip_rewards: True
  20. lr_schedule: [
  21. [0, 0.0005],
  22. [20000000, 0.000000000001],
  23. ]
  24. num_gpus: 1
  25. atari-ppo-tf:
  26. env: BreakoutNoFrameskip-v4
  27. run: PPO
  28. num_samples: 4
  29. stop:
  30. time_total_s: 3600
  31. config:
  32. lambda: 0.95
  33. kl_coeff: 0.5
  34. clip_rewards: True
  35. clip_param: 0.1
  36. vf_clip_param: 10.0
  37. entropy_coeff: 0.01
  38. train_batch_size: 5000
  39. rollout_fragment_length: 100
  40. sgd_minibatch_size: 500
  41. num_sgd_iter: 10
  42. num_workers: 10
  43. num_envs_per_worker: 5
  44. batch_mode: truncate_episodes
  45. observation_filter: NoFilter
  46. model:
  47. vf_share_layers: true
  48. num_gpus: 1
  49. atari-ppo-torch:
  50. env: BreakoutNoFrameskip-v4
  51. run: PPO
  52. num_samples: 4
  53. stop:
  54. time_total_s: 3600
  55. config:
  56. framework: torch
  57. lambda: 0.95
  58. kl_coeff: 0.5
  59. clip_rewards: True
  60. clip_param: 0.1
  61. vf_clip_param: 10.0
  62. entropy_coeff: 0.01
  63. train_batch_size: 5000
  64. rollout_fragment_length: 100
  65. sgd_minibatch_size: 500
  66. num_sgd_iter: 10
  67. num_workers: 10
  68. num_envs_per_worker: 5
  69. batch_mode: truncate_episodes
  70. observation_filter: NoFilter
  71. model:
  72. vf_share_layers: true
  73. num_gpus: 1
  74. apex:
  75. env: BreakoutNoFrameskip-v4
  76. run: APEX
  77. num_samples: 4
  78. stop:
  79. time_total_s: 3600
  80. config:
  81. double_q: false
  82. dueling: false
  83. num_atoms: 1
  84. noisy: false
  85. n_step: 3
  86. lr: .0001
  87. adam_epsilon: .00015
  88. hiddens: [512]
  89. buffer_size: 1000000
  90. exploration_config:
  91. epsilon_timesteps: 200000
  92. final_epsilon: 0.01
  93. prioritized_replay_alpha: 0.5
  94. final_prioritized_replay_beta: 1.0
  95. prioritized_replay_beta_annealing_timesteps: 2000000
  96. num_gpus: 1
  97. num_workers: 8
  98. num_envs_per_worker: 8
  99. rollout_fragment_length: 20
  100. train_batch_size: 512
  101. target_network_update_freq: 50000
  102. timesteps_per_iteration: 25000
  103. atari-a2c:
  104. env: BreakoutNoFrameskip-v4
  105. run: A2C
  106. num_samples: 4
  107. stop:
  108. time_total_s: 3600
  109. config:
  110. rollout_fragment_length: 20
  111. clip_rewards: True
  112. num_workers: 5
  113. num_envs_per_worker: 5
  114. num_gpus: 1
  115. lr_schedule: [
  116. [0, 0.0007],
  117. [20000000, 0.000000000001],
  118. ]
  119. atari-basic-dqn:
  120. env: BreakoutNoFrameskip-v4
  121. run: DQN
  122. num_samples: 4
  123. stop:
  124. time_total_s: 3600
  125. config:
  126. double_q: false
  127. dueling: false
  128. num_atoms: 1
  129. noisy: false
  130. prioritized_replay: false
  131. n_step: 1
  132. target_network_update_freq: 8000
  133. lr: .0000625
  134. adam_epsilon: .00015
  135. hiddens: [512]
  136. learning_starts: 20000
  137. buffer_size: 1000000
  138. rollout_fragment_length: 4
  139. train_batch_size: 32
  140. exploration_config:
  141. epsilon_timesteps: 200000
  142. final_epsilon: 0.01
  143. prioritized_replay_alpha: 0.5
  144. final_prioritized_replay_beta: 1.0
  145. prioritized_replay_beta_annealing_timesteps: 2000000
  146. num_gpus: 0.2
  147. timesteps_per_iteration: 10000