12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- cql-halfcheetah-v4:
- env: HalfCheetah-v4
- run: CQL
- pass_criteria:
- evaluation/sampler_results/episode_reward_mean: 400.0
- # Can not check throughput for offline methods.
- timesteps_total: 5000000
- stop:
- time_total_s: 3600
- config:
- # Use input produced by expert SAC algo.
- input: "dataset"
- input_config:
- format: "json"
- paths: "/home/ray/halfcheetah_1500_mean_reward_sac.json"
- actions_in_input_normalized: true
- q_model_config:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256, 256]
- policy_model_config:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256, 256]
- tau: 0.005
- target_entropy: auto
- n_step: 3
- rollout_fragment_length: auto
- num_workers: 8
- grad_clip: 40
- train_batch_size: 256
- target_network_update_freq: 0
- min_train_timesteps_per_iteration: 1000
- optimization:
- actor_learning_rate: 0.0001
- critic_learning_rate: 0.0003
- entropy_learning_rate: 0.0001
- num_gpus: 1
- metrics_num_episodes_for_smoothing: 5
- min_time_s_per_iteration: 30
- # CQL Configs
- min_q_weight: 5.0
- bc_iters: 20000
- temperature: 1.0
- num_actions: 10
- lagrangian: False
- # Switch on online evaluation.
- evaluation_interval: 3
- evaluation_parallel_to_training: true
- evaluation_num_workers: 1
- evaluation_duration: 10
- evaluation_duration_unit: episodes
- evaluation_config:
- input: sampler
|