mountaincarcontinuous-ddpg.yaml 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # This configuration can expect to reach 90 reward in 10k-20k timesteps
  2. mountaincarcontinuous-ddpg:
  3. env: MountainCarContinuous-v0
  4. run: DDPG
  5. stop:
  6. episode_reward_mean: 90
  7. time_total_s: 600 # 10 minutes
  8. config:
  9. # Works for both torch and tf.
  10. framework: tf
  11. # === Model ===
  12. actor_hiddens: [32, 64]
  13. critic_hiddens: [64, 64]
  14. n_step: 3
  15. model: {}
  16. gamma: 0.99
  17. env_config: {}
  18. # === Exploration ===
  19. exploration_config:
  20. initial_scale: 1.0
  21. final_scale: 0.02
  22. scale_timesteps: 40000
  23. ou_base_scale: 0.75
  24. ou_theta: 0.15
  25. ou_sigma: 0.2
  26. timesteps_per_iteration: 1000
  27. target_network_update_freq: 0
  28. tau: 0.01
  29. # === Replay buffer ===
  30. buffer_size: 50000
  31. prioritized_replay: False
  32. prioritized_replay_alpha: 0.6
  33. prioritized_replay_beta: 0.4
  34. prioritized_replay_eps: 0.000001
  35. clip_rewards: False
  36. # === Optimization ===
  37. actor_lr: 0.001
  38. critic_lr: 0.001
  39. use_huber: False
  40. huber_threshold: 1.0
  41. l2_reg: 0.00001
  42. learning_starts: 1000
  43. rollout_fragment_length: 1
  44. train_batch_size: 64
  45. # === Parallelism ===
  46. num_workers: 0
  47. num_gpus_per_worker: 0
  48. worker_side_prioritization: False
  49. # === Evaluation ===
  50. evaluation_interval: 5
  51. evaluation_num_episodes: 10