12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # TransformedActionPendulum SAC can attain -150+ reward in 6-7k
- # Configurations are the similar to original softlearning/sac codebase
- pendulum-sac:
- env: ray.rllib.examples.env.transformed_action_space_env.TransformedActionPendulum
- run: SAC
- stop:
- episode_reward_mean: -500
- timesteps_total: 10000
- config:
- # Works for both torch and tf.
- framework: tf
- # Test, whether SAC is able to learn in "distorted" action spaces.
- env_config:
- config:
- low: 300.0
- high: 500.0
- horizon: 200
- soft_horizon: true
- Q_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- policy_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- tau: 0.005
- target_entropy: auto
- no_done_at_end: true
- n_step: 1
- rollout_fragment_length: 1
- prioritized_replay: true
- train_batch_size: 256
- target_network_update_freq: 1
- timesteps_per_iteration: 1000
- learning_starts: 256
- optimization:
- actor_learning_rate: 0.0003
- critic_learning_rate: 0.0003
- entropy_learning_rate: 0.0003
- num_workers: 0
- num_gpus: 0
- metrics_smoothing_episodes: 5
|