cartpole-mbmpo.yaml 708 B

123456789101112131415161718192021222324252627
  1. cartpole-mbmpo:
  2. env: ray.rllib.examples.env.mbmpo_env.CartPoleWrapper
  3. run: MBMPO
  4. stop:
  5. episode_reward_mean: 190
  6. training_iteration: 20
  7. config:
  8. # Only supported in torch right now.
  9. framework: torch
  10. #horizon: 200
  11. num_envs_per_worker: 20
  12. inner_adaptation_steps: 1
  13. maml_optimizer_steps: 8
  14. gamma: 0.99
  15. lambda: 1.0
  16. lr: 0.001
  17. clip_param: 0.5
  18. kl_target: 0.003
  19. kl_coeff: 0.0000000001
  20. num_workers: 10
  21. num_gpus: 0
  22. inner_lr: 0.001
  23. clip_actions: False
  24. num_maml_steps: 15
  25. model:
  26. fcnet_hiddens: [32, 32]
  27. free_log_std: True