mountaincarcontinuous-apex-ddpg.yaml 572 B

123456789101112131415161718
  1. # This can be expected to reach 90 reward within ~1.5-2.5m timesteps / ~150-250 seconds on a K40 GPU
  2. mountaincarcontinuous-apex-ddpg:
  3. env: MountainCarContinuous-v0
  4. run: APEX_DDPG
  5. stop:
  6. sampler_results/episode_reward_mean: 90
  7. config:
  8. # Works for both torch and tf.
  9. framework: torch
  10. clip_rewards: False
  11. num_workers: 16
  12. exploration_config:
  13. ou_base_scale: 1.0
  14. n_step: 3
  15. target_network_update_freq: 50000
  16. tau: 1.0
  17. evaluation_interval: 5
  18. evaluation_duration: 10