# Our implementation of SAC can reach 9k reward in 400k timesteps halfcheetah_sac: env: HalfCheetah-v3 run: SAC stop: episode_reward_mean: 9000 config: # Works for both torch and tf. framework: tf horizon: 1000 soft_horizon: false Q_model: fcnet_activation: relu fcnet_hiddens: [256, 256] policy_model: fcnet_activation: relu fcnet_hiddens: [256, 256] tau: 0.005 target_entropy: auto no_done_at_end: true n_step: 1 rollout_fragment_length: 1 prioritized_replay: true train_batch_size: 256 target_network_update_freq: 1 timesteps_per_iteration: 1000 learning_starts: 10000 optimization: actor_learning_rate: 0.0003 critic_learning_rate: 0.0003 entropy_learning_rate: 0.0003 num_workers: 0 num_gpus: 0 clip_actions: false normalize_actions: true evaluation_interval: 1 metrics_smoothing_episodes: 5