1234567891011121314151617181920 |
- # This configuration can expect to reach -160 reward in 10k-20k timesteps
- pendulum-td3:
- env: Pendulum-v1
- run: TD3
- stop:
- episode_reward_mean: -900
- timesteps_total: 100000
- config:
- # Works for both torch and tf.
- framework: tf
- # === Model ===
- actor_hiddens: [64, 64]
- critic_hiddens: [64, 64]
- # === Exploration ===
- learning_starts: 5000
- exploration_config:
- random_timesteps: 5000
- # === Evaluation ===
- evaluation_interval: 1
- evaluation_num_episodes: 5
|