cartpole-continuous-pybullet-sac.yaml 628 B

1234567891011121314151617181920212223
  1. cartpole-sac:
  2. env: CartPoleContinuousBulletEnv-v0
  3. run: SAC
  4. stop:
  5. episode_reward_mean: 40
  6. timesteps_total: 100000
  7. config:
  8. # Works for both torch and tf.
  9. framework: tf
  10. gamma: 0.95
  11. no_done_at_end: false
  12. horizon: 200
  13. soft_horizon: true
  14. n_step: 3
  15. prioritized_replay: true
  16. initial_alpha: 0.2
  17. learning_starts: 256
  18. clip_actions: false
  19. timesteps_per_iteration: 1000
  20. optimization:
  21. actor_learning_rate: 0.005
  22. critic_learning_rate: 0.005
  23. entropy_learning_rate: 0.0001