1234567891011121314151617181920212223 |
- cartpole-sac:
- env: CartPoleContinuousBulletEnv-v0
- run: SAC
- stop:
- episode_reward_mean: 40
- timesteps_total: 100000
- config:
- # Works for both torch and tf.
- framework: tf
- gamma: 0.95
- no_done_at_end: false
- horizon: 200
- soft_horizon: true
- n_step: 3
- prioritized_replay: true
- initial_alpha: 0.2
- learning_starts: 256
- clip_actions: false
- timesteps_per_iteration: 1000
- optimization:
- actor_learning_rate: 0.005
- critic_learning_rate: 0.005
- entropy_learning_rate: 0.0001
|