cartpole-crashing-pg.yaml 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. cartpole-crashing-pg:
  2. env: ray.rllib.examples.env.cartpole_crashing.CartPoleCrashing
  3. run: PG
  4. stop:
  5. evaluation/sampler_results/episode_reward_mean: 150.0
  6. num_env_steps_sampled: 150000
  7. config:
  8. # Works for both torch and tf.
  9. framework: torch
  10. env_config:
  11. config:
  12. # Crash roughly every 300 ts. This should be ok to measure 180.0
  13. # reward (episodes are 200 ts long).
  14. p_crash: 0.0025 # prob to crash during step()
  15. p_crash_reset: 0.01 # prob to crash during reset()
  16. # Time for the env to initialize when newly created.
  17. # Every time a remote sub-environment crashes, a new env is created
  18. # in its place and will take this long (sleep) to "initialize".
  19. init_time_s: 1.0
  20. num_workers: 2
  21. num_envs_per_worker: 5
  22. # Disable env checking. Env checker doesn't handle Exceptions from
  23. # user envs, and will crash rollout worker.
  24. disable_env_checking: true
  25. # Switch on resiliency for failed sub environments (within a vectorized stack).
  26. restart_failed_sub_environments: true
  27. evaluation_num_workers: 2
  28. evaluation_interval: 1
  29. evaluation_duration: 20
  30. evaluation_duration_unit: episodes
  31. evaluation_parallel_to_training: true
  32. evaluation_config:
  33. explore: false
  34. env_config:
  35. config:
  36. # Make eval workers solid.
  37. # This test is to prove that we can learn with crashing env,
  38. # not eval with crashing env.
  39. p_crash: 0.0
  40. p_crash_reset: 0.0
  41. init_time_s: 0.0