123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649 |
- import argparse
- from dataclasses import dataclass
- from enum import Enum
- import os.path
- import tempfile
- import typer
- from typing import Optional
- import requests
- from ray.tune.experiment.config_parser import _make_parser
- from ray.tune.result import DEFAULT_RESULTS_DIR
- class FrameworkEnum(str, Enum):
- """Supported frameworks for RLlib, used for CLI argument validation."""
- tf = "tf"
- tf2 = "tf2"
- torch = "torch"
- class SupportedFileType(str, Enum):
- """Supported file types for RLlib, used for CLI argument validation."""
- yaml = "yaml"
- python = "python"
- def get_file_type(config_file: str) -> SupportedFileType:
- if config_file.endswith(".py"):
- file_type = SupportedFileType.python
- elif config_file.endswith(".yaml") or config_file.endswith(".yml"):
- file_type = SupportedFileType.yaml
- else:
- raise ValueError(
- "Unknown file type for config "
- "file: {}. Supported extensions: .py, "
- ".yml, .yaml".format(config_file)
- )
- return file_type
- def _create_tune_parser_help():
- """Create a Tune dummy parser to access its 'help' docstrings."""
- parser = _make_parser(
- parser_creator=None,
- formatter_class=argparse.RawDescriptionHelpFormatter,
- )
- return parser.__dict__.get("_option_string_actions")
- PARSER_HELP = _create_tune_parser_help()
- def download_example_file(
- example_file: str,
- base_url: Optional[str] = "https://raw.githubusercontent.com/"
- + "ray-project/ray/master/rllib/",
- ):
- """Download the example file (e.g. from GitHub) if it doesn't exist locally.
- If the provided example file exists locally, we return it directly.
- Not every user will have cloned our repo and cd'ed into this working directory
- when using the CLI.
- Args:
- example_file: The example file to download.
- base_url: The base URL to download the example file from. Use this if
- 'example_file' is a link relative to this base URL. If set to 'None',
- 'example_file' is assumed to be a complete URL (or a local file, in which
- case nothing is downloaded).
- """
- temp_file = None
- if not os.path.exists(example_file):
- example_url = base_url + example_file if base_url else example_file
- print(f">>> Attempting to download example file {example_url}...")
- file_type = get_file_type(example_url)
- if file_type == SupportedFileType.yaml:
- temp_file = tempfile.NamedTemporaryFile(suffix=".yaml")
- else:
- assert (
- file_type == SupportedFileType.python
- ), f"`example_url` ({example_url}) must be a python or yaml file!"
- temp_file = tempfile.NamedTemporaryFile(suffix=".py")
- r = requests.get(example_url)
- with open(temp_file.name, "wb") as f:
- print(r.content)
- f.write(r.content)
- print(f" Status code: {r.status_code}")
- if r.status_code == 200:
- print(f" Downloaded example file to {temp_file.name}")
- # only overwrite the file if the download was successful
- example_file = temp_file.name
- return example_file, temp_file
- def get_help(key: str) -> str:
- """Get the help string from a parser for a given key.
- If e.g. 'resource_group' is provided, we return
- the entry for '--resource-group'."""
- key = "--" + key
- key = key.replace("_", "-")
- if key not in PARSER_HELP.keys():
- raise ValueError(f"Key {key} not found in parser.")
- return PARSER_HELP.get(key).help
- example_help = dict(
- filter="Filter examples by exact substring match. For instance,"
- " --filter=ppo will only show examples that"
- " contain the substring 'ppo' in their ID. The same way, -f=recsys"
- "will return all recommender system examples.",
- )
- train_help = dict(
- env="The environment specifier to use. This could be an Farama-Foundation "
- "Gymnasium specifier (e.g. `CartPole-v1`) or a full class-path (e.g. "
- "`ray.rllib.examples.env.simple_corridor.SimpleCorridor`).",
- config_file="Use the algorithm configuration from this file.",
- filetype="The file type of the config file. Defaults to 'yaml' and can also be "
- "'python'.",
- experiment_name="Name of the subdirectory under `local_dir` to put results in.",
- framework="The identifier of the deep learning framework you want to use."
- "Choose between TensorFlow 1.x ('tf'), TensorFlow 2.x ('tf2'), "
- "and PyTorch ('torch').",
- v="Whether to use INFO level logging.",
- vv="Whether to use DEBUG level logging.",
- resume="Whether to attempt to resume from previous experiments.",
- local_dir=f"Local dir to save training results to. "
- f"Defaults to '{DEFAULT_RESULTS_DIR}'.",
- local_mode="Run Ray in local mode for easier debugging.",
- ray_address="Connect to an existing Ray cluster at this address instead "
- "of starting a new one.",
- ray_ui="Whether to enable the Ray web UI.",
- ray_num_cpus="The '--num-cpus' argument to use if starting a new cluster.",
- ray_num_gpus="The '--num-gpus' argument to use if starting a new cluster.",
- ray_num_nodes="Emulate multiple cluster nodes for debugging.",
- ray_object_store_memory="--object-store-memory to use if starting a new cluster.",
- upload_dir="Optional URI to sync training results to (e.g. s3://bucket).",
- trace="Whether to attempt to enable eager-tracing for framework=tf2.",
- torch="Whether to use PyTorch (instead of tf) as the DL framework. "
- "This argument is deprecated, please use --framework to select 'torch'"
- "as backend.",
- wandb_key="An optional WandB API key for logging all results to your WandB "
- "account.",
- wandb_project="An optional project name under which to store the training results.",
- wandb_run_name="An optional name for the specific run under which to store the "
- "training results.",
- )
- eval_help = dict(
- checkpoint="Optional checkpoint from which to roll out. If none provided, we will "
- "evaluate an untrained algorithm.",
- algo="The algorithm or model to train. This may refer to the name of a built-in "
- "Algorithm (e.g. RLlib's `DQN` or `PPO`), or a user-defined trainable "
- "function or class registered in the Tune registry.",
- env="The environment specifier to use. This could be an Farama-Foundation gymnasium"
- " specifier (e.g. `CartPole-v1`) or a full class-path (e.g. "
- "`ray.rllib.examples.env.simple_corridor.SimpleCorridor`).",
- local_mode="Run Ray in local mode for easier debugging.",
- render="Render the environment while evaluating. Off by default",
- video_dir="Specifies the directory into which videos of all episode"
- "rollouts will be stored.",
- steps="Number of time-steps to roll out. The evaluation will also stop if "
- "`--episodes` limit is reached first. A value of 0 means no "
- "limitation on the number of time-steps run.",
- episodes="Number of complete episodes to roll out. The evaluation will also stop "
- "if `--steps` (time-steps) limit is reached first. A value of 0 means "
- "no limitation on the number of episodes run.",
- out="Output filename",
- config="Algorithm-specific configuration (e.g. `env`, `framework` etc.). "
- "Gets merged with loaded configuration from checkpoint file and "
- "`evaluation_config` settings therein.",
- save_info="Save the info field generated by the step() method, "
- "as well as the action, observations, rewards and done fields.",
- use_shelve="Save rollouts into a Python shelf file (will save each episode "
- "as it is generated). An output filename must be set using --out.",
- track_progress="Write progress to a temporary file (updated "
- "after each episode). An output filename must be set using --out; "
- "the progress file will live in the same folder.",
- )
- @dataclass
- class CLIArguments:
- """Dataclass for CLI arguments and options. We use this class to keep track
- of common arguments, like "run" or "env" that would otherwise be duplicated."""
- # Common arguments
- # __cli_common_start__
- Algo = typer.Option(None, "--algo", "--run", "-a", "-r", help=get_help("run"))
- AlgoRequired = typer.Option(
- ..., "--algo", "--run", "-a", "-r", help=get_help("run")
- )
- Env = typer.Option(None, "--env", "-e", help=train_help.get("env"))
- EnvRequired = typer.Option(..., "--env", "-e", help=train_help.get("env"))
- Config = typer.Option("{}", "--config", "-c", help=get_help("config"))
- ConfigRequired = typer.Option(..., "--config", "-c", help=get_help("config"))
- # __cli_common_end__
- # Train file arguments
- # __cli_file_start__
- ConfigFile = typer.Argument( # config file is now mandatory for "file" subcommand
- ..., help=train_help.get("config_file")
- )
- FileType = typer.Option(
- SupportedFileType.yaml, "--type", "-t", help=train_help.get("filetype")
- )
- # __cli_file_end__
- # Train arguments
- # __cli_train_start__
- Stop = typer.Option("{}", "--stop", "-s", help=get_help("stop"))
- ExperimentName = typer.Option(
- "default", "--experiment-name", "-n", help=train_help.get("experiment_name")
- )
- V = typer.Option(False, "--log-info", "-v", help=train_help.get("v"))
- VV = typer.Option(False, "--log-debug", "-vv", help=train_help.get("vv"))
- Resume = typer.Option(False, help=train_help.get("resume"))
- NumSamples = typer.Option(1, help=get_help("num_samples"))
- CheckpointFreq = typer.Option(0, help=get_help("checkpoint_freq"))
- CheckpointAtEnd = typer.Option(True, help=get_help("checkpoint_at_end"))
- LocalDir = typer.Option(DEFAULT_RESULTS_DIR, help=train_help.get("local_dir"))
- Restore = typer.Option(None, help=get_help("restore"))
- Framework = typer.Option(None, help=train_help.get("framework"))
- ResourcesPerTrial = typer.Option(None, help=get_help("resources_per_trial"))
- KeepCheckpointsNum = typer.Option(None, help=get_help("keep_checkpoints_num"))
- CheckpointScoreAttr = typer.Option(
- "training_iteration", help=get_help("sync_on_checkpoint")
- )
- UploadDir = typer.Option("", help=train_help.get("upload_dir"))
- Trace = typer.Option(False, help=train_help.get("trace"))
- LocalMode = typer.Option(False, help=train_help.get("local_mode"))
- Scheduler = typer.Option("FIFO", help=get_help("scheduler"))
- SchedulerConfig = typer.Option("{}", help=get_help("scheduler_config"))
- RayAddress = typer.Option(None, help=train_help.get("ray_address"))
- RayUi = typer.Option(False, help=train_help.get("ray_ui"))
- RayNumCpus = typer.Option(None, help=train_help.get("ray_num_cpus"))
- RayNumGpus = typer.Option(None, help=train_help.get("ray_num_gpus"))
- RayNumNodes = typer.Option(None, help=train_help.get("ray_num_nodes"))
- RayObjectStoreMemory = typer.Option(
- None, help=train_help.get("ray_object_store_memory")
- )
- WandBKey = typer.Option(None, "--wandb-key", help=train_help.get("wandb_key"))
- WandBProject = typer.Option(
- None, "--wandb-project", help=eval_help.get("wandb_project")
- )
- WandBRunName = typer.Option(
- None, "--wandb-run-name", help=eval_help.get("wandb_run_name")
- )
- # __cli_train_end__
- # Eval arguments
- # __cli_eval_start__
- Checkpoint = typer.Argument(None, help=eval_help.get("checkpoint"))
- Render = typer.Option(False, help=eval_help.get("render"))
- Steps = typer.Option(10000, help=eval_help.get("steps"))
- Episodes = typer.Option(0, help=eval_help.get("episodes"))
- Out = typer.Option(None, help=eval_help.get("out"))
- SaveInfo = typer.Option(False, help=eval_help.get("save_info"))
- UseShelve = typer.Option(False, help=eval_help.get("use_shelve"))
- TrackProgress = typer.Option(False, help=eval_help.get("track_progress"))
- # __cli_eval_end__
- # Note that the IDs of these examples are lexicographically sorted by environment,
- # not by algorithm. This should be more natural for users, but could be changed easily.
- EXAMPLES = {
- # A2C
- "atari-a2c": {
- "file": "tuned_examples/a2c/atari-a2c.yaml",
- "description": "Runs grid search over several Atari games on A2C.",
- },
- "cartpole-a2c": {
- "file": "tuned_examples/a2c/cartpole_a2c.py",
- "stop": "{'timesteps_total': 50000, 'episode_reward_mean': 200}",
- "description": "Runs A2C on the CartPole-v1 environment.",
- },
- "cartpole-a2c-micro": {
- "file": "tuned_examples/a2c/cartpole-a2c-microbatch.yaml",
- "description": "Runs A2C on the CartPole-v1 environment, using micro-batches.",
- },
- # A3C
- "cartpole-a3c": {
- "file": "tuned_examples/a3c/cartpole_a3c.py",
- "stop": "{'timesteps_total': 20000, 'episode_reward_mean': 150}",
- "description": "Runs A3C on the CartPole-v1 environment.",
- },
- "pong-a3c": {
- "file": "tuned_examples/a3c/pong-a3c.yaml",
- "description": "Runs A3C on the ALE/Pong-v5 (deterministic) environment.",
- },
- # AlphaStar
- "multi-agent-cartpole-alpha-star": {
- "file": "tuned_examples/alpha_star/multi-agent-cartpole-alpha-star.yaml",
- "description": "Runs AlphaStar on 4 CartPole agents.",
- },
- # AlphaZero
- "cartpole-alpha-zero": {
- "file": "tuned_examples/alpha_zero/cartpole-sparse-rewards-alpha-zero.yaml",
- "description": "Runs AlphaZero on a Cartpole with sparse rewards.",
- },
- # Apex DDPG
- "mountaincar-apex-ddpg": {
- "file": "tuned_examples/apex_ddpg/mountaincarcontinuous-apex-ddpg.yaml",
- "description": "Runs Apex DDPG on MountainCarContinuous-v0.",
- },
- "pendulum-apex-ddpg": {
- "file": "tuned_examples/apex_ddpg/pendulum-apex-ddpg.yaml",
- "description": "Runs Apex DDPG on Pendulum-v1.",
- },
- # Apex DQN
- "breakout-apex-dqn": {
- "file": "tuned_examples/apex_dqn/atari-apex-dqn.yaml",
- "description": "Runs Apex DQN on ALE/Breakout-v5 (no frameskip).",
- },
- "cartpole-apex-dqn": {
- "file": "tuned_examples/apex_dqn/cartpole-apex-dqn.yaml",
- "description": "Runs Apex DQN on CartPole-v1.",
- },
- "pong-apex-dqn": {
- "file": "tuned_examples/apex_dqn/pong-apex-dqn.yaml",
- "description": "Runs Apex DQN on ALE/Pong-v5 (no frameskip).",
- },
- # APPO
- "cartpole-appo": {
- "file": "tuned_examples/appo/cartpole-appo.yaml",
- "description": "Runs APPO on CartPole-v1.",
- },
- "frozenlake-appo": {
- "file": "tuned_examples/appo/frozenlake-appo-vtrace.yaml",
- "description": "Runs APPO on FrozenLake-v1.",
- },
- "halfcheetah-appo": {
- "file": "tuned_examples/appo/halfcheetah-appo.yaml",
- "description": "Runs APPO on HalfCheetah-v2.",
- },
- "multi-agent-cartpole-appo": {
- "file": "tuned_examples/appo/multi-agent-cartpole-appo.yaml",
- "description": "Runs APPO on RLlib's MultiAgentCartPole",
- },
- "pendulum-appo": {
- "file": "tuned_examples/appo/pendulum-appo.yaml",
- "description": "Runs APPO on Pendulum-v1.",
- },
- "pong-appo": {
- "file": "tuned_examples/appo/pong-appo.yaml",
- "description": "Runs APPO on ALE/Pong-v5 (no frameskip).",
- },
- # ARS
- "cartpole-ars": {
- "file": "tuned_examples/ars/cartpole-ars.yaml",
- "description": "Runs ARS on CartPole-v1.",
- },
- "swimmer-ars": {
- "file": "tuned_examples/ars/swimmer-ars.yaml",
- "description": "Runs ARS on Swimmer-v2.",
- },
- # Bandits
- "recsys-bandits": {
- "file": "tuned_examples/bandits/"
- + "interest-evolution-recsim-env-bandit-linucb.yaml",
- "description": "Runs BanditLinUCB on a Recommendation Simulation environment.",
- },
- # BC
- "cartpole-bc": {
- "file": "tuned_examples/bc/cartpole-bc.yaml",
- "description": "Runs BC on CartPole-v1.",
- },
- # CQL
- "halfcheetah-cql": {
- "file": "tuned_examples/cql/halfcheetah-cql.yaml",
- "description": "Runs grid search on HalfCheetah environments with CQL.",
- },
- "hopper-cql": {
- "file": "tuned_examples/cql/hopper-cql.yaml",
- "description": "Runs grid search on Hopper environments with CQL.",
- },
- "pendulum-cql": {
- "file": "tuned_examples/cql/pendulum-cql.yaml",
- "description": "Runs CQL on Pendulum-v1.",
- },
- # CRR
- "cartpole-crr": {
- "file": "tuned_examples/crr/CartPole-v1-crr.yaml",
- "description": "Run CRR on CartPole-v1.",
- },
- "pendulum-crr": {
- "file": "tuned_examples/crr/pendulum-v1-crr.yaml",
- "description": "Run CRR on Pendulum-v1.",
- },
- # DDPG
- "halfcheetah-ddpg": {
- "file": "tuned_examples/ddpg/halfcheetah-ddpg.yaml",
- "description": "Runs DDPG on HalfCheetah-v2.",
- },
- "halfcheetah-bullet-ddpg": {
- "file": "tuned_examples/ddpg/halfcheetah-pybullet-ddpg.yaml",
- "description": "Runs DDPG on HalfCheetahBulletEnv-v0.",
- },
- "hopper-bullet-ddpg": {
- "file": "tuned_examples/ddpg/hopper-pybullet-ddpg.yaml",
- "description": "Runs DDPG on HopperBulletEnv-v0.",
- },
- "mountaincar-ddpg": {
- "file": "tuned_examples/ddpg/mountaincarcontinuous-ddpg.yaml",
- "description": "Runs DDPG on MountainCarContinuous-v0.",
- },
- "pendulum-ddpg": {
- "file": "tuned_examples/ddpg/pendulum-ddpg.yaml",
- "description": "Runs DDPG on Pendulum-v1.",
- },
- # DDPPO
- "breakout-ddppo": {
- "file": "tuned_examples/ddppo/atari-ddppo.yaml",
- "description": "Runs DDPPO on ALE/Breakout-v5 (no frameskip).",
- },
- "cartpole-ddppo": {
- "file": "tuned_examples/ddppo/cartpole-ddppo.yaml",
- "description": "Runs DDPPO on CartPole-v1",
- },
- "pendulum-ddppo": {
- "file": "tuned_examples/ddppo/pendulum-ddppo.yaml",
- "description": "Runs DDPPO on Pendulum-v1.",
- },
- # DQN
- "atari-dqn": {
- "file": "tuned_examples/dqn/atari-dqn.yaml",
- "description": "Run grid search on Atari environments with DQN.",
- },
- "atari-duel-ddqn": {
- "file": "tuned_examples/dqn/atari-duel-ddqn.yaml",
- "description": "Run grid search on Atari environments "
- "with duelling double DQN.",
- },
- "cartpole-dqn": {
- "file": "tuned_examples/dqn/cartpole-dqn.yaml",
- "description": "Run DQN on CartPole-v1.",
- },
- "pong-dqn": {
- "file": "tuned_examples/dqn/pong-dqn.yaml",
- "description": "Run DQN on ALE/Pong-v5 (deterministic).",
- },
- "pong-rainbow": {
- "file": "tuned_examples/dqn/pong-rainbow.yaml",
- "description": "Run Rainbow on ALE/Pong-v5 (deterministic).",
- },
- # DREAMER
- "dm-control-dreamer": {
- "file": "tuned_examples/dreamer/dreamer-deepmind-control.yaml",
- "description": "Run DREAMER on a suite of control problems by Deepmind.",
- },
- # DT
- "cartpole-dt": {
- "file": "tuned_examples/dt/CartPole-v1-dt.yaml",
- "description": "Run DT on CartPole-v1.",
- },
- "pendulum-dt": {
- "file": "tuned_examples/dt/pendulum-v1-dt.yaml",
- "description": "Run DT on Pendulum-v1.",
- },
- # ES
- "cartpole-es": {
- "file": "tuned_examples/es/cartpole-es.yaml",
- "description": "Run ES on CartPole-v1.",
- },
- "humanoid-es": {
- "file": "tuned_examples/es/humanoid-es.yaml",
- "description": "Run ES on Humanoid-v2.",
- },
- # IMPALA
- "atari-impala": {
- "file": "tuned_examples/impala/atari-impala.yaml",
- "description": "Run grid search over several atari games with IMPALA.",
- },
- "cartpole-impala": {
- "file": "tuned_examples/impala/cartpole-impala.yaml",
- "description": "Run IMPALA on CartPole-v1.",
- },
- "multi-agent-cartpole-impala": {
- "file": "tuned_examples/impala/multi-agent-cartpole-impala.yaml",
- "description": "Run IMPALA on RLlib's MultiAgentCartPole",
- },
- "pendulum-impala": {
- "file": "tuned_examples/impala/pendulum-impala.yaml",
- "description": "Run IMPALA on Pendulum-v1.",
- },
- "pong-impala": {
- "file": "tuned_examples/impala/pong-impala-fast.yaml",
- "description": "Run IMPALA on ALE/Pong-v5 (no frameskip).",
- },
- # MADDPG
- "two-step-game-maddpg": {
- "file": "tuned_examples/maddpg/two-step-game-maddpg.yaml",
- "description": "Run RLlib's Two-step game with multi-agent DDPG.",
- },
- # MAML
- "cartpole-maml": {
- "file": "tuned_examples/maml/cartpole-maml.yaml",
- "description": "Run MAML on CartPole-v1.",
- },
- "halfcheetah-maml": {
- "file": "tuned_examples/maml/halfcheetah-rand-direc-maml.yaml",
- "description": "Run MAML on a custom HalfCheetah environment.",
- },
- "pendulum-maml": {
- "file": "tuned_examples/maml/pendulum-mass-maml.yaml",
- "description": "Run MAML on a custom Pendulum environment.",
- },
- # MARWIL
- "cartpole-marwil": {
- "file": "tuned_examples/marwil/cartpole-marwil.yaml",
- "description": "Run MARWIL on CartPole-v1.",
- },
- # MBMPO
- "cartpole-mbmpo": {
- "file": "tuned_examples/mbmpo/cartpole-mbmpo.yaml",
- "description": "Run MBMPO on a CartPole environment wrapper.",
- },
- "halfcheetah-mbmpo": {
- "file": "tuned_examples/mbmpo/halfcheetah-mbmpo.yaml",
- "description": "Run MBMPO on a HalfCheetah environment wrapper.",
- },
- "hopper-mbmpo": {
- "file": "tuned_examples/mbmpo/hopper-mbmpo.yaml",
- "description": "Run MBMPO on a Hopper environment wrapper.",
- },
- "pendulum-mbmpo": {
- "file": "tuned_examples/mbmpo/pendulum-mbmpo.yaml",
- "description": "Run MBMPO on a Pendulum environment wrapper.",
- },
- # PG
- "cartpole-pg": {
- "file": "tuned_examples/pg/cartpole-pg.yaml",
- "description": "Run PG on CartPole-v1",
- },
- # PPO
- "atari-ppo": {
- "file": "tuned_examples/ppo/atari-ppo.yaml",
- "description": "Run grid search over several atari games with PPO.",
- },
- "cartpole-ppo": {
- "file": "tuned_examples/ppo/cartpole-ppo.yaml",
- "description": "Run PPO on CartPole-v1.",
- },
- "halfcheetah-ppo": {
- "file": "tuned_examples/ppo/halfcheetah-ppo.yaml",
- "description": "Run PPO on HalfCheetah-v2.",
- },
- "hopper-ppo": {
- "file": "tuned_examples/ppo/hopper-ppo.yaml",
- "description": "Run PPO on Hopper-v1.",
- },
- "humanoid-ppo": {
- "file": "tuned_examples/ppo/humanoid-ppo.yaml",
- "description": "Run PPO on Humanoid-v1.",
- },
- "pendulum-ppo": {
- "file": "tuned_examples/ppo/pendulum-ppo.yaml",
- "description": "Run PPO on Pendulum-v1.",
- },
- "pong-ppo": {
- "file": "tuned_examples/ppo/pong-ppo.yaml",
- "description": "Run PPO on ALE/Pong-v5 (no frameskip).",
- },
- "recsys-ppo": {
- "file": "tuned_examples/ppo/recomm-sys001-ppo.yaml",
- "description": "Run PPO on a recommender system example from RLlib.",
- },
- "repeatafterme-ppo": {
- "file": "tuned_examples/ppo/repeatafterme-ppo-lstm.yaml",
- "description": "Run PPO on RLlib's RepeatAfterMe environment.",
- },
- "walker2d-ppo": {
- "file": "tuned_examples/ppo/walker2d-ppo.yaml",
- "description": "Run PPO on the Walker2d-v1 environment.",
- },
- # QMIX
- "two-step-game-qmix": {
- "file": "tuned_examples/qmix/two-step-game-qmix.yaml",
- "description": "Run QMIX on RLlib's two-step game.",
- },
- # R2D2
- "stateless-cartpole-r2d2": {
- "file": "tuned_examples/r2d2/stateless-cartpole-r2d2.yaml",
- "description": "Run R2D2 on a stateless cart pole environment.",
- },
- # SAC
- "atari-sac": {
- "file": "tuned_examples/sac/atari-sac.yaml",
- "description": "Run grid search on several atari games with SAC.",
- },
- "cartpole-sac": {
- "file": "tuned_examples/sac/cartpole-sac.yaml",
- "description": "Run SAC on CartPole-v1",
- },
- "halfcheetah-sac": {
- "file": "tuned_examples/sac/halfcheetah-sac.yaml",
- "description": "Run SAC on HalfCheetah-v3.",
- },
- "pacman-sac": {
- "file": "tuned_examples/sac/mspacman-sac.yaml",
- "description": "Run SAC on ALE/MsPacman-v5 (no frameskip).",
- },
- "pendulum-sac": {
- "file": "tuned_examples/sac/pendulum-sac.yaml",
- "description": "Run SAC on Pendulum-v1.",
- },
- # SimpleQ
- "cartpole-simpleq": {
- "file": "tuned_examples/simple_q/cartpole-simpleq.yaml",
- "description": "Run SimpleQ on CartPole-v1",
- },
- # SlateQ
- "recsys-long-term-slateq": {
- "file": "tuned_examples/slateq/long-term-satisfaction-recsim-env-slateq.yaml",
- "description": "Run SlateQ on a recommendation system aimed at "
- "long-term satisfaction.",
- },
- "recsys-parametric-slateq": {
- "file": "tuned_examples/slateq/parametric-item-reco-env-slateq.yaml",
- "description": "SlateQ run on a recommendation system.",
- },
- "recsys-slateq": {
- "file": "tuned_examples/slateq/recomm-sys001-slateq.yaml",
- "description": "SlateQ run on a recommendation system.",
- },
- # TD3
- "inverted-pendulum-td3": {
- "file": "tuned_examples/td3/invertedpendulum-td3.yaml",
- "description": "Run TD3 on InvertedPendulum-v2.",
- },
- "mujoco-td3": {
- "file": "tuned_examples/td3/mujoco-td3.yaml",
- "description": "Run TD3 against four of the hardest MuJoCo tasks.",
- },
- "pendulum-td3": {
- "file": "tuned_examples/td3/pendulum-td3.yaml",
- "description": "Run TD3 on Pendulum-v1.",
- },
- }
|