interest-evolution-10-candidates-recsim-env-slateq.yaml 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. interest-evolution-recsim-env-slateq:
  2. env: ray.rllib.examples.env.recommender_system_envs_with_recsim.InterestEvolutionRecSimEnv
  3. run: SlateQ
  4. stop:
  5. sampler_results/episode_reward_mean: 160.0
  6. timesteps_total: 120000
  7. config:
  8. framework: torch
  9. # RLlib/RecSim wrapper specific settings:
  10. env_config:
  11. # Env class specified above takes one `config` arg in its c'tor:
  12. config:
  13. # Each step, sample `num_candidates` documents using the env-internal
  14. # document sampler model (a logic that creates n documents to select
  15. # the slate from).
  16. resample_documents: true
  17. num_candidates: 10
  18. # How many documents to recommend (out of `num_candidates`) each
  19. # timestep?
  20. slate_size: 2
  21. # Should the action space be purely Discrete? Useful for algos that
  22. # don't support MultiDiscrete (e.g. DQN or Bandits).
  23. # SlateQ handles MultiDiscrete action spaces.
  24. convert_to_discrete_action_space: false
  25. seed: 0
  26. exploration_config:
  27. warmup_timesteps: 10000
  28. epsilon_timesteps: 25000
  29. replay_buffer_config:
  30. capacity: 100000
  31. num_steps_sampled_before_learning_starts: 10000
  32. lr: 0.001
  33. target_network_update_freq: 3200
  34. metrics_num_episodes_for_smoothing: 200