pendulum-cql.yaml 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. # Given a SAC-generated offline file generated via:
  2. # rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
  3. # Pendulum CQL can attain ~ -300 reward in 10k from that file.
  4. pendulum-cql:
  5. env: Pendulum-v1
  6. run: CQL
  7. stop:
  8. evaluation/episode_reward_mean: -700
  9. timesteps_total: 200000
  10. config:
  11. # Works for both torch and tf.
  12. framework: tf
  13. # Use one or more offline files or "input: sampler" for online learning.
  14. input: ["tests/data/pendulum/enormous.zip"]
  15. # Our input file above comes from an SAC run. Actions in there
  16. # are already normalized (produced by SquashedGaussian).
  17. actions_in_input_normalized: true
  18. clip_actions: true
  19. twin_q: true
  20. train_batch_size: 2000
  21. learning_starts: 0
  22. bc_iters: 100
  23. metrics_smoothing_episodes: 5
  24. # Evaluate in an actual environment.
  25. evaluation_interval: 1
  26. evaluation_num_workers: 2
  27. evaluation_num_episodes: 10
  28. evaluation_parallel_to_training: true
  29. evaluation_config:
  30. input: sampler
  31. explore: False