openoker
/
ray


			
				
					
						
						
							1234567891011121314151617181920
							# Runs on a p2.8xlarge single head node machine.
# Should reach ~400 reward in about 1h and after 15-20M ts.
atari-impala:
    env:
        BreakoutNoFrameskip-v4
    run: IMPALA
    config:
        # Works for both torch and tf.
        framework: torch
        rollout_fragment_length: 50
        train_batch_size: 4000
        num_gpus: 4
        num_workers: 31
        num_gpus_per_worker: 0  # works also for partial GPUs (<1.0) per worker
        num_envs_per_worker: 5
        clip_rewards: True
        lr_schedule: [
            [0, 0.0005],
            [20000000, 0.000000000001],
        ]