1234567891011121314151617181920212223242526272829303132333435363738 |
- # Our implementation of SAC can reach 9k reward in 400k timesteps
- halfcheetah_sac:
- env: HalfCheetah-v3
- run: SAC
- stop:
- episode_reward_mean: 9000
- config:
- # Works for both torch and tf.
- framework: tf
- horizon: 1000
- soft_horizon: false
- Q_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- policy_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- tau: 0.005
- target_entropy: auto
- no_done_at_end: true
- n_step: 1
- rollout_fragment_length: 1
- prioritized_replay: true
- train_batch_size: 256
- target_network_update_freq: 1
- timesteps_per_iteration: 1000
- learning_starts: 10000
- optimization:
- actor_learning_rate: 0.0003
- critic_learning_rate: 0.0003
- entropy_learning_rate: 0.0003
- num_workers: 0
- num_gpus: 0
- clip_actions: false
- normalize_actions: true
- evaluation_interval: 1
- metrics_smoothing_episodes: 5
|