recomm-sys001-slateq.yaml 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. recomm-sys001-slateq:
  2. env: ray.rllib.examples.env.recommender_system_envs.RecommSys001
  3. run: SlateQ
  4. stop:
  5. #evaluation/sampler_results/episode_reward_mean: 48.0
  6. timesteps_total: 200000
  7. config:
  8. # SlateQ only supported for torch so far.
  9. framework: torch
  10. metrics_num_episodes_for_smoothing: 1000
  11. # Env c'tor kwargs:
  12. env_config:
  13. # Number of different categories a doc can have and a user can
  14. # have a preference for.
  15. num_categories: 5
  16. # Number of docs to choose (a slate) from each timestep.
  17. num_docs_to_select_from: 50
  18. # Slate size.
  19. slate_size: 2
  20. # Re-sample docs each timesteps.
  21. num_docs_in_db: 1000
  22. # Re-sample user each episode.
  23. num_users_in_db: 1000
  24. # User time budget (determines lengths of episodes).
  25. user_time_budget: 60.0
  26. grad_clip: 2.0
  27. # Larger networks seem to help (large obs/action spaces).
  28. hiddens: [512, 512]
  29. # Larger batch sizes seem to help (more stability, even with higher lr).
  30. train_batch_size: 32
  31. num_workers: 0
  32. num_gpus: 0
  33. lr_choice_model: 0.002
  34. lr_q_model: 0.002
  35. target_network_update_freq: 500
  36. tau: 1.0
  37. # Evaluation settings.
  38. evaluation_interval: 1
  39. evaluation_num_workers: 4
  40. evaluation_duration: 200
  41. evaluation_duration_unit: episodes
  42. evaluation_parallel_to_training: true