# This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU pendulum-apex-ddpg: env: Pendulum-v1 run: APEX_DDPG stop: sampler_results/episode_reward_mean: -160 config: # Works for both torch and tf. framework: torch use_huber: True clip_rewards: False num_workers: 16 n_step: 1 target_network_update_freq: 50000 tau: 1.0 evaluation_interval: 5 evaluation_duration: 10