123456789101112131415161718 |
- # This can be expected to reach 90 reward within ~1.5-2.5m timesteps / ~150-250 seconds on a K40 GPU
- mountaincarcontinuous-apex-ddpg:
- env: MountainCarContinuous-v0
- run: APEX_DDPG
- stop:
- sampler_results/episode_reward_mean: 90
- config:
- # Works for both torch and tf.
- framework: torch
- clip_rewards: False
- num_workers: 16
- exploration_config:
- ou_base_scale: 1.0
- n_step: 3
- target_network_update_freq: 50000
- tau: 1.0
- evaluation_interval: 5
- evaluation_duration: 10
|