pendulum-apex-ddpg.yaml 505 B

1234567891011121314151617
  1. # This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU
  2. pendulum-apex-ddpg:
  3. env: Pendulum-v1
  4. run: APEX_DDPG
  5. stop:
  6. sampler_results/episode_reward_mean: -160
  7. config:
  8. # Works for both torch and tf.
  9. framework: torch
  10. use_huber: True
  11. clip_rewards: False
  12. num_workers: 16
  13. n_step: 1
  14. target_network_update_freq: 50000
  15. tau: 1.0
  16. evaluation_interval: 5
  17. evaluation_duration: 10