openoker
/
ray


			
				
					
						
						
							123456789101112131415161718
							# This can be expected to reach 90 reward within ~1.5-2.5m timesteps / ~150-250 seconds on a K40 GPU
mountaincarcontinuous-apex-ddpg:
    env: MountainCarContinuous-v0
    run: APEX_DDPG
    stop:
        sampler_results/episode_reward_mean: 90
    config:
        # Works for both torch and tf.
        framework: torch
        clip_rewards: False
        num_workers: 16
        exploration_config:
            ou_base_scale: 1.0
        n_step: 3
        target_network_update_freq: 50000
        tau: 1.0
        evaluation_interval: 5
        evaluation_duration: 10