cql-halfcheetah-v4.yaml 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. cql-halfcheetah-v4:
  2. env: HalfCheetah-v4
  3. run: CQL
  4. pass_criteria:
  5. evaluation/sampler_results/episode_reward_mean: 400.0
  6. # Can not check throughput for offline methods.
  7. timesteps_total: 5000000
  8. stop:
  9. time_total_s: 3600
  10. config:
  11. # Use input produced by expert SAC algo.
  12. input: "dataset"
  13. input_config:
  14. format: "json"
  15. paths: "/home/ray/halfcheetah_1500_mean_reward_sac.json"
  16. actions_in_input_normalized: true
  17. q_model_config:
  18. fcnet_activation: relu
  19. fcnet_hiddens: [256, 256, 256]
  20. policy_model_config:
  21. fcnet_activation: relu
  22. fcnet_hiddens: [256, 256, 256]
  23. tau: 0.005
  24. target_entropy: auto
  25. n_step: 3
  26. rollout_fragment_length: auto
  27. num_workers: 8
  28. grad_clip: 40
  29. train_batch_size: 256
  30. target_network_update_freq: 0
  31. min_train_timesteps_per_iteration: 1000
  32. optimization:
  33. actor_learning_rate: 0.0001
  34. critic_learning_rate: 0.0003
  35. entropy_learning_rate: 0.0001
  36. num_gpus: 1
  37. metrics_num_episodes_for_smoothing: 5
  38. min_time_s_per_iteration: 30
  39. # CQL Configs
  40. min_q_weight: 5.0
  41. bc_iters: 20000
  42. temperature: 1.0
  43. num_actions: 10
  44. lagrangian: False
  45. # Switch on online evaluation.
  46. evaluation_interval: 3
  47. evaluation_parallel_to_training: true
  48. evaluation_num_workers: 1
  49. evaluation_duration: 10
  50. evaluation_duration_unit: episodes
  51. evaluation_config:
  52. input: sampler