pong-rainbow.yaml 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. pong-deterministic-rainbow:
  2. env: ALE/Pong-v5
  3. run: DQN
  4. stop:
  5. sampler_results/episode_reward_mean: 20
  6. config:
  7. # Make analogous to old v4 + NoFrameskip.
  8. env_config:
  9. frameskip: 1
  10. full_action_space: false
  11. repeat_action_probability: 0.0
  12. num_atoms: 51
  13. noisy: True
  14. gamma: 0.99
  15. lr: .0001
  16. hiddens: [512]
  17. rollout_fragment_length: 4
  18. train_batch_size: 32
  19. exploration_config:
  20. epsilon_timesteps: 2
  21. final_epsilon: 0.0
  22. target_network_update_freq: 500
  23. replay_buffer_config:
  24. type: MultiAgentPrioritizedReplayBuffer
  25. prioritized_replay_alpha: 0.5
  26. capacity: 50000
  27. num_steps_sampled_before_learning_starts: 10000
  28. n_step: 3
  29. gpu: True
  30. model:
  31. grayscale: True
  32. zero_mean: False
  33. dim: 42
  34. # we should set compress_observations to True because few machines
  35. # would be able to contain the replay buffers in memory otherwise
  36. compress_observations: True