pendulum-transformed-actions-sac.yaml 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # TransformedActionPendulum SAC can attain -150+ reward in 6-7k
  2. # Configurations are the similar to original softlearning/sac codebase
  3. pendulum-sac:
  4. env: ray.rllib.examples.env.transformed_action_space_env.TransformedActionPendulum
  5. run: SAC
  6. stop:
  7. episode_reward_mean: -500
  8. timesteps_total: 10000
  9. config:
  10. # Works for both torch and tf.
  11. framework: tf
  12. # Test, whether SAC is able to learn in "distorted" action spaces.
  13. env_config:
  14. config:
  15. low: 300.0
  16. high: 500.0
  17. horizon: 200
  18. soft_horizon: true
  19. Q_model:
  20. fcnet_activation: relu
  21. fcnet_hiddens: [256, 256]
  22. policy_model:
  23. fcnet_activation: relu
  24. fcnet_hiddens: [256, 256]
  25. tau: 0.005
  26. target_entropy: auto
  27. no_done_at_end: true
  28. n_step: 1
  29. rollout_fragment_length: 1
  30. prioritized_replay: true
  31. train_batch_size: 256
  32. target_network_update_freq: 1
  33. timesteps_per_iteration: 1000
  34. learning_starts: 256
  35. optimization:
  36. actor_learning_rate: 0.0003
  37. critic_learning_rate: 0.0003
  38. entropy_learning_rate: 0.0003
  39. num_workers: 0
  40. num_gpus: 0
  41. metrics_smoothing_episodes: 5