cartpole-apex.yaml 825 B

12345678910111213141516171819202122232425262728
  1. # Note here that with < 3 workers, APEX can behave a little unstably
  2. # due to the (static) per-worker-epsilon distribution, which also makes
  3. # evaluation w/o evaluation worker set harder.
  4. # For an epsilon-free/greedy evaluation, use:
  5. # evaluation_interval: 1
  6. # evaluation_config:
  7. # explore: False
  8. cartpole-apex-dqn:
  9. env: CartPole-v0
  10. run: APEX
  11. stop:
  12. episode_reward_mean: 150.0
  13. timesteps_total: 250000
  14. config:
  15. # Works for both torch and tf.
  16. framework: tf
  17. # Make this work with only 5 CPUs and 0 GPUs:
  18. num_workers: 3
  19. optimizer:
  20. num_replay_buffer_shards: 2
  21. num_gpus: 0
  22. min_iter_time_s: 5
  23. target_network_update_freq: 500
  24. learning_starts: 1000
  25. timesteps_per_iteration: 1000
  26. buffer_size: 20000