parametric-item-reco-env-slateq.yaml 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. parametric-item-reco-env-slateq:
  2. env: ray.rllib.examples.env.bandit_envs_recommender_system.ParametricItemRecoEnv
  3. run: SlateQ
  4. stop:
  5. #evaluation/sampler_results/episode_reward_mean: 48.0
  6. timesteps_total: 200000
  7. config:
  8. # SlateQ only supported for torch so far.
  9. framework: torch
  10. metrics_num_episodes_for_smoothing: 200
  11. exploration_config:
  12. temperature: 0.7
  13. # Env c'tor kwargs:
  14. env_config:
  15. config:
  16. slate_q: true
  17. num_users: 50
  18. num_items: 1000
  19. num_candidates: 50
  20. slate_size: 1
  21. feature_dim: 16
  22. grad_clip: 10.0
  23. #double_q: false
  24. #slateq_strategy: MYOP
  25. # Larger networks seem to help (large obs/action spaces).
  26. hiddens: [512, 512]
  27. # Larger batch sizes seem to help (more stability, even with higher lr).
  28. train_batch_size: 64
  29. num_workers: 0
  30. num_gpus: 0
  31. lr_choice_model: 0.01
  32. lr_q_model: 0.01
  33. target_network_update_freq: 500
  34. tau: 1.0
  35. # Evaluation settings.
  36. evaluation_interval: 1
  37. evaluation_num_workers: 4
  38. evaluation_duration: 200
  39. evaluation_duration_unit: episodes
  40. evaluation_parallel_to_training: true