halfcheetah-sac.yaml 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # Our implementation of SAC can reach 9k reward in 400k timesteps
  2. halfcheetah_sac:
  3. env: HalfCheetah-v3
  4. run: SAC
  5. stop:
  6. episode_reward_mean: 9000
  7. config:
  8. # Works for both torch and tf.
  9. framework: tf
  10. horizon: 1000
  11. soft_horizon: false
  12. Q_model:
  13. fcnet_activation: relu
  14. fcnet_hiddens: [256, 256]
  15. policy_model:
  16. fcnet_activation: relu
  17. fcnet_hiddens: [256, 256]
  18. tau: 0.005
  19. target_entropy: auto
  20. no_done_at_end: true
  21. n_step: 1
  22. rollout_fragment_length: 1
  23. prioritized_replay: true
  24. train_batch_size: 256
  25. target_network_update_freq: 1
  26. timesteps_per_iteration: 1000
  27. learning_starts: 10000
  28. optimization:
  29. actor_learning_rate: 0.0003
  30. critic_learning_rate: 0.0003
  31. entropy_learning_rate: 0.0003
  32. num_workers: 0
  33. num_gpus: 0
  34. clip_actions: false
  35. normalize_actions: true
  36. evaluation_interval: 1
  37. metrics_smoothing_episodes: 5