""" Example for using _disable_preprocessor_api=True to disable all preprocessing. This example shows: - How a complex observation space from the env is handled directly by the model. - Complex observations are flattened into lists of tensors and as such stored by the SampleCollectors. - This has the advantage that preprocessing happens in batched fashion (in the model). """ import argparse from gym.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple import numpy as np import os import ray from ray import tune def get_cli_args(): """Create CLI parser and return parsed arguments""" parser = argparse.ArgumentParser() # general args parser.add_argument( "--run", default="PPO", help="The RLlib-registered algorithm to use.") parser.add_argument("--num-cpus", type=int, default=3) parser.add_argument( "--framework", choices=["tf", "tf2", "tfe", "torch"], default="tf", help="The DL framework specifier.") parser.add_argument( "--stop-iters", type=int, default=200, help="Number of iterations to train.") parser.add_argument( "--stop-timesteps", type=int, default=500000, help="Number of timesteps to train.") parser.add_argument( "--stop-reward", type=float, default=80.0, help="Reward at which we stop training.") parser.add_argument( "--as-test", action="store_true", help="Whether this script should be run as a test: --stop-reward must " "be achieved within --stop-timesteps AND --stop-iters.") parser.add_argument( "--no-tune", action="store_true", help="Run without Tune using a manual train loop instead. Here," "there is no TensorBoard support.") parser.add_argument( "--local-mode", action="store_true", help="Init Ray in local mode for easier debugging.") args = parser.parse_args() print(f"Running with following CLI args: {args}") return args if __name__ == "__main__": args = get_cli_args() ray.init(local_mode=args.local_mode) config = { "env": "ray.rllib.examples.env.random_env.RandomEnv", "env_config": { "config": { "observation_space": Dict({ "a": Discrete(2), "b": Dict({ "ba": Discrete(3), "bb": Box(-1.0, 1.0, (2, 3), dtype=np.float32) }), "c": Tuple((MultiDiscrete([2, 3]), Discrete(2))), "d": Box(-1.0, 1.0, (2, ), dtype=np.int32), }), }, }, # Set this to True to enforce no preprocessors being used. # Complex observations now arrive directly in the model as # structures of batches, e.g. {"a": tensor, "b": [tensor, tensor]} # for obs-space=Dict(a=..., b=Tuple(..., ...)). "_disable_preprocessor_api": True, # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", 0)), "framework": args.framework, } stop = { "training_iteration": args.stop_iters, "timesteps_total": args.stop_timesteps, "episode_reward_mean": args.stop_reward, } results = tune.run(args.run, config=config, stop=stop, verbose=2) ray.shutdown()