12345678910111213141516171819202122232425262728293031323334 |
- halfcheetah-pybullet-sac:
- env: HalfCheetahBulletEnv-v0
- run: SAC
- stop:
- episode_reward_mean: 800.0
- config:
- # Works for both torch and tf.
- framework: tf
- horizon: 1000
- soft_horizon: false
- Q_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- policy_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256]
- tau: 0.005
- target_entropy: auto
- no_done_at_end: false
- n_step: 3
- rollout_fragment_length: 1
- prioritized_replay: true
- train_batch_size: 256
- target_network_update_freq: 1
- timesteps_per_iteration: 1000
- learning_starts: 10000
- optimization:
- actor_learning_rate: 0.0003
- critic_learning_rate: 0.0003
- entropy_learning_rate: 0.0003
- num_workers: 0
- num_gpus: 1
- metrics_smoothing_episodes: 5
|