pendulum-sac.yaml 1.0 KB

123456789101112131415161718192021222324252627282930313233343536
  1. # Pendulum SAC can attain -150+ reward in 6-7k
  2. # Configurations are the similar to original softlearning/sac codebase
  3. pendulum-sac:
  4. env: Pendulum-v1
  5. run: SAC
  6. stop:
  7. episode_reward_mean: -600
  8. timesteps_total: 10000
  9. config:
  10. # Works for both torch and tf.
  11. framework: tf
  12. horizon: 200
  13. soft_horizon: true
  14. Q_model:
  15. fcnet_activation: relu
  16. fcnet_hiddens: [256, 256]
  17. policy_model:
  18. fcnet_activation: relu
  19. fcnet_hiddens: [256, 256]
  20. tau: 0.005
  21. target_entropy: auto
  22. no_done_at_end: true
  23. n_step: 3
  24. rollout_fragment_length: 1
  25. prioritized_replay: true
  26. train_batch_size: 256
  27. target_network_update_freq: 1
  28. timesteps_per_iteration: 1000
  29. learning_starts: 256
  30. optimization:
  31. actor_learning_rate: 0.0003
  32. critic_learning_rate: 0.0003
  33. entropy_learning_rate: 0.0003
  34. num_workers: 0
  35. num_gpus: 0
  36. metrics_smoothing_episodes: 5