pendulum-td3.yaml 578 B

1234567891011121314151617181920
  1. # This configuration can expect to reach -160 reward in 10k-20k timesteps
  2. pendulum-td3:
  3. env: Pendulum-v1
  4. run: TD3
  5. stop:
  6. episode_reward_mean: -900
  7. timesteps_total: 100000
  8. config:
  9. # Works for both torch and tf.
  10. framework: tf
  11. # === Model ===
  12. actor_hiddens: [64, 64]
  13. critic_hiddens: [64, 64]
  14. # === Exploration ===
  15. learning_starts: 5000
  16. exploration_config:
  17. random_timesteps: 5000
  18. # === Evaluation ===
  19. evaluation_interval: 1
  20. evaluation_num_episodes: 5