cql-halfcheetah-v4: env: HalfCheetah-v4 run: CQL pass_criteria: evaluation/sampler_results/episode_reward_mean: 400.0 # Can not check throughput for offline methods. timesteps_total: 5000000 stop: time_total_s: 3600 config: # Use input produced by expert SAC algo. input: "dataset" input_config: format: "json" paths: "/home/ray/halfcheetah_1500_mean_reward_sac.json" actions_in_input_normalized: true q_model_config: fcnet_activation: relu fcnet_hiddens: [256, 256, 256] policy_model_config: fcnet_activation: relu fcnet_hiddens: [256, 256, 256] tau: 0.005 target_entropy: auto n_step: 3 rollout_fragment_length: auto num_workers: 8 grad_clip: 40 train_batch_size: 256 target_network_update_freq: 0 min_train_timesteps_per_iteration: 1000 optimization: actor_learning_rate: 0.0001 critic_learning_rate: 0.0003 entropy_learning_rate: 0.0001 num_gpus: 1 metrics_num_episodes_for_smoothing: 5 min_time_s_per_iteration: 30 # CQL Configs min_q_weight: 5.0 bc_iters: 20000 temperature: 1.0 num_actions: 10 lagrangian: False # Switch on online evaluation. evaluation_interval: 3 evaluation_parallel_to_training: true evaluation_num_workers: 1 evaluation_duration: 10 evaluation_duration_unit: episodes evaluation_config: input: sampler