openoker
/
ray


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
							
# ARS was never tested/tuned on Hopper. Maybe change to Reacher-v4?
# ars-hopper-v4:
#    env: Hopper-v4
#    run: ARS
#    # Minimum reward and total ts (in given time_total_s) to pass this test.
#    pass_criteria:
#        sampler_results/episode_reward_mean: 100.0
#        timesteps_total: 2000000
#    stop:
#        time_total_s: 2000
#    config:
#        noise_stdev: 0.01
#        num_rollouts: 1
#        rollouts_used: 1
#        num_workers: 1
#        sgd_stepsize: 0.02
#        noise_size: 250000000
#        eval_prob: 0.2
#        offset: 0
#        observation_filter: NoFilter
#        report_length: 3

# Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO
# isn't actually any more efficient on Atari, since the network size is
# relatively small and the env doesn't require a GPU.
# ddppo-breakoutnoframeskip-v5:
#    env: ALE/Breakout-v5
#    run: DDPPO
#    # Minimum reward and total ts (in given time_total_s) to pass this test.
#    pass_criteria:
#        sampler_results/episode_reward_mean: 50.0
#        timesteps_total: 10000000
#    stop:
#        time_total_s: 3600
#    config:
#        # DDPPO only supports PyTorch so far.
#        frameworks: [ "torch" ]
#        # Make analogous to old v4 + NoFrameskip.
#        env_config:
#            frameskip: 1
#            full_action_space: false
#            repeat_action_probability: 0.0
#        # Worker config: 10 workers, each of which requires a GPU.
#        num_workers: 16
#        # Workers require GPUs, but share 1 GPU amongst 2 workers.
#        num_gpus_per_worker: 0.25
#        # Each worker will sample 100 * 5 envs per worker steps = 500 steps
#        # per optimization round. This is 5000 steps summed across workers.
#        rollout_fragment_length: 100
#        num_envs_per_worker: 5
#        # Each worker will take a minibatch of 50. There are 10 workers total,
#        # so the effective minibatch size will be 500.
#        sgd_minibatch_size: 50
#        num_sgd_iter: 30
#        # Params from standard PPO Atari config:
#        lambda: 0.95
#        kl_coeff: 0.5
#        clip_rewards: true
#        clip_param: 0.1
#        vf_loss_coeff: 0.1
#        vf_clip_param: 10.0
#        entropy_coeff: 0.01
#        batch_mode: truncate_episodes
#        observation_filter: NoFilter