halfcheetah_cql: env: grid_search: #- ray.rllib.examples.env.d4rl_env.halfcheetah_random #- ray.rllib.examples.env.d4rl_env.halfcheetah_medium - ray.rllib.examples.env.d4rl_env.halfcheetah_expert #- ray.rllib.examples.env.d4rl_env.halfcheetah_medium_replay run: CQL config: # SAC Configs #input: d4rl.halfcheetah-random-v0 #input: d4rl.halfcheetah-medium-v0 input: d4rl.halfcheetah-expert-v0 #input: d4rl.halfcheetah-medium-replay-v0 # Works for both torch and tf. framework: tf soft_horizon: False horizon: 1000 Q_model: fcnet_activation: relu fcnet_hiddens: [256, 256, 256] policy_model: fcnet_activation: relu fcnet_hiddens: [256, 256, 256] tau: 0.005 target_entropy: auto no_done_at_end: false n_step: 3 rollout_fragment_length: 1 prioritized_replay: false train_batch_size: 256 target_network_update_freq: 0 timesteps_per_iteration: 1000 learning_starts: 256 optimization: actor_learning_rate: 0.0001 critic_learning_rate: 0.0003 entropy_learning_rate: 0.0001 num_workers: 0 num_gpus: 1 metrics_smoothing_episodes: 5 # CQL Configs min_q_weight: 5.0 bc_iters: 20000 temperature: 1.0 num_actions: 10 lagrangian: False evaluation_interval: 3 evaluation_config: input: sampler