123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- cartpole-crashing-pg:
- env: ray.rllib.examples.env.cartpole_crashing.CartPoleCrashing
- run: PG
- stop:
- evaluation/sampler_results/episode_reward_mean: 150.0
- num_env_steps_sampled: 150000
- config:
- # Works for both torch and tf.
- framework: torch
- env_config:
- config:
- # Crash roughly every 300 ts. This should be ok to measure 180.0
- # reward (episodes are 200 ts long).
- p_crash: 0.0025 # prob to crash during step()
- p_crash_reset: 0.01 # prob to crash during reset()
- # Time for the env to initialize when newly created.
- # Every time a remote sub-environment crashes, a new env is created
- # in its place and will take this long (sleep) to "initialize".
- init_time_s: 1.0
- num_workers: 2
- num_envs_per_worker: 5
- # Disable env checking. Env checker doesn't handle Exceptions from
- # user envs, and will crash rollout worker.
- disable_env_checking: true
- # Switch on resiliency for failed sub environments (within a vectorized stack).
- restart_failed_sub_environments: true
- evaluation_num_workers: 2
- evaluation_interval: 1
- evaluation_duration: 20
- evaluation_duration_unit: episodes
- evaluation_parallel_to_training: true
- evaluation_config:
- explore: false
- env_config:
- config:
- # Make eval workers solid.
- # This test is to prove that we can learn with crashing env,
- # not eval with crashing env.
- p_crash: 0.0
- p_crash_reset: 0.0
- init_time_s: 0.0
|