12345678910111213141516171819202122232425 |
- halfcheetah-ppo:
- env: HalfCheetah-v2
- run: PPO
- stop:
- episode_reward_mean: 9800
- time_total_s: 10800
- config:
- # Works for both torch and tf.
- framework: tf
- gamma: 0.99
- lambda: 0.95
- kl_coeff: 1.0
- num_sgd_iter: 32
- lr: .0003
- vf_loss_coeff: 0.5
- clip_param: 0.2
- sgd_minibatch_size: 4096
- train_batch_size: 65536
- num_workers: 16
- num_gpus: 1
- grad_clip: 0.5
- num_envs_per_worker:
- grid_search: [16, 32]
- batch_mode: truncate_episodes
- observation_filter: MeanStdFilter
|