123456789101112131415161718192021222324252627282930313233343536 |
- # Pendulum SAC can attain -150+ reward in 6-7k
- # Configurations are the similar to original softlearning/sac codebase
- pendulum-sac:
- env: Pendulum-v1
- run: SAC
- stop:
- episode_reward_mean: -600
- timesteps_total: 10000
- config:
- # Works for both torch and tf.
- framework: tf
- horizon: 200
- soft_horizon: true
- Q_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- policy_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- tau: 0.005
- target_entropy: auto
- no_done_at_end: true
- n_step: 3
- rollout_fragment_length: 1
- prioritized_replay: true
- train_batch_size: 256
- target_network_update_freq: 1
- timesteps_per_iteration: 1000
- learning_starts: 256
- optimization:
- actor_learning_rate: 0.0003
- critic_learning_rate: 0.0003
- entropy_learning_rate: 0.0003
- num_workers: 0
- num_gpus: 0
- metrics_smoothing_episodes: 5
|