12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- halfcheetah_cql:
- env:
- grid_search:
- #- ray.rllib.examples.env.d4rl_env.halfcheetah_random
- #- ray.rllib.examples.env.d4rl_env.halfcheetah_medium
- - ray.rllib.examples.env.d4rl_env.halfcheetah_expert
- #- ray.rllib.examples.env.d4rl_env.halfcheetah_medium_replay
- run: CQL
- config:
- # SAC Configs
- #input: d4rl.halfcheetah-random-v0
- #input: d4rl.halfcheetah-medium-v0
- input: d4rl.halfcheetah-expert-v0
- #input: d4rl.halfcheetah-medium-replay-v0
- # Works for both torch and tf.
- framework: tf
- soft_horizon: False
- horizon: 1000
- Q_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256, 256]
- policy_model:
- fcnet_activation: relu
- fcnet_hiddens: [256, 256, 256]
- tau: 0.005
- target_entropy: auto
- no_done_at_end: false
- n_step: 3
- rollout_fragment_length: 1
- prioritized_replay: false
- train_batch_size: 256
- target_network_update_freq: 0
- timesteps_per_iteration: 1000
- learning_starts: 256
- optimization:
- actor_learning_rate: 0.0001
- critic_learning_rate: 0.0003
- entropy_learning_rate: 0.0001
- num_workers: 0
- num_gpus: 1
- metrics_smoothing_episodes: 5
- # CQL Configs
- min_q_weight: 5.0
- bc_iters: 20000
- temperature: 1.0
- num_actions: 10
- lagrangian: False
- evaluation_interval: 3
- evaluation_config:
- input: sampler
|