123456789101112131415161718192021222324252627 |
- pendulum-mbmpo:
- env: ray.rllib.examples.env.mbmpo_env.PendulumWrapper
- run: MBMPO
- stop:
- episode_reward_mean: -500
- training_iteration: 50
- config:
- # Only supported in torch right now.
- framework: torch
- #horizon: 200
- num_envs_per_worker: 20
- inner_adaptation_steps: 1
- maml_optimizer_steps: 8
- gamma: 0.99
- lambda: 1.0
- lr: 0.001
- clip_param: 0.5
- kl_target: 0.003
- kl_coeff: 0.0000000001
- num_workers: 10
- num_gpus: 0
- inner_lr: 0.001
- clip_actions: False
- num_maml_steps: 15
- model:
- fcnet_hiddens: [32, 32]
- free_log_std: True
|