cartpole-appo-vtrace-separate-losses.yaml 949 B

1234567891011121314151617181920212223242526272829
  1. cartpole-appo-vtrace-separate-losses:
  2. env: CartPole-v0
  3. run: APPO
  4. stop:
  5. episode_reward_mean: 150
  6. timesteps_total: 200000
  7. config:
  8. # Only works for tf|tf2 so far.
  9. framework: tf
  10. # Switch on >1 loss/optimizer API for TFPolicy and EagerTFPolicy.
  11. _tf_policy_handles_more_than_one_loss: true
  12. # APPO will produce two separate loss terms:
  13. # policy loss + value function loss.
  14. _separate_vf_optimizer: true
  15. # Separate learning rate for the value function branch.
  16. _lr_vf: 0.00075
  17. num_envs_per_worker: 5
  18. num_workers: 1
  19. num_gpus: 0
  20. observation_filter: MeanStdFilter
  21. num_sgd_iter: 6
  22. vf_loss_coeff: 0.01
  23. vtrace: true
  24. model:
  25. fcnet_hiddens: [32]
  26. fcnet_activation: linear
  27. # Make sure we really have completely separate branches.
  28. vf_share_layers: false