cartpole-crashing-pg: env: ray.rllib.examples.env.cartpole_crashing.CartPoleCrashing run: PG stop: evaluation/sampler_results/episode_reward_mean: 150.0 num_env_steps_sampled: 150000 config: # Works for both torch and tf. framework: torch env_config: config: # Crash roughly every 300 ts. This should be ok to measure 180.0 # reward (episodes are 200 ts long). p_crash: 0.0025 # prob to crash during step() p_crash_reset: 0.01 # prob to crash during reset() # Time for the env to initialize when newly created. # Every time a remote sub-environment crashes, a new env is created # in its place and will take this long (sleep) to "initialize". init_time_s: 1.0 num_workers: 2 num_envs_per_worker: 5 # Disable env checking. Env checker doesn't handle Exceptions from # user envs, and will crash rollout worker. disable_env_checking: true # Switch on resiliency for failed sub environments (within a vectorized stack). restart_failed_sub_environments: true evaluation_num_workers: 2 evaluation_interval: 1 evaluation_duration: 20 evaluation_duration_unit: episodes evaluation_parallel_to_training: true evaluation_config: explore: false env_config: config: # Make eval workers solid. # This test is to prove that we can learn with crashing env, # not eval with crashing env. p_crash: 0.0 p_crash_reset: 0.0 init_time_s: 0.0