12345678910111213141516171819202122232425262728293031323334353637383940 |
- interest-evolution-recsim-env-slateq:
- env: ray.rllib.examples.env.recommender_system_envs_with_recsim.InterestEvolutionRecSimEnv
- run: SlateQ
- stop:
- sampler_results/episode_reward_mean: 160.0
- timesteps_total: 120000
- config:
- framework: torch
- # RLlib/RecSim wrapper specific settings:
- env_config:
- # Env class specified above takes one `config` arg in its c'tor:
- config:
- # Each step, sample `num_candidates` documents using the env-internal
- # document sampler model (a logic that creates n documents to select
- # the slate from).
- resample_documents: true
- num_candidates: 10
- # How many documents to recommend (out of `num_candidates`) each
- # timestep?
- slate_size: 2
- # Should the action space be purely Discrete? Useful for algos that
- # don't support MultiDiscrete (e.g. DQN or Bandits).
- # SlateQ handles MultiDiscrete action spaces.
- convert_to_discrete_action_space: false
- seed: 0
- exploration_config:
- warmup_timesteps: 10000
- epsilon_timesteps: 25000
- replay_buffer_config:
- capacity: 100000
- num_steps_sampled_before_learning_starts: 10000
- lr: 0.001
- target_network_update_freq: 3200
- metrics_num_episodes_for_smoothing: 200
|