12345678910111213141516171819202122232425262728 |
- # Note here that with < 3 workers, APEX can behave a little unstably
- # due to the (static) per-worker-epsilon distribution, which also makes
- # evaluation w/o evaluation worker set harder.
- # For an epsilon-free/greedy evaluation, use:
- # evaluation_interval: 1
- # evaluation_config:
- # explore: False
- cartpole-apex-dqn:
- env: CartPole-v0
- run: APEX
- stop:
- episode_reward_mean: 150.0
- timesteps_total: 250000
- config:
- # Works for both torch and tf.
- framework: tf
- # Make this work with only 5 CPUs and 0 GPUs:
- num_workers: 3
- optimizer:
- num_replay_buffer_shards: 2
- num_gpus: 0
- min_iter_time_s: 5
- target_network_update_freq: 500
- learning_starts: 1000
- timesteps_per_iteration: 1000
- buffer_size: 20000
|