preprocessing_disabled.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """
  2. Example for using _disable_preprocessor_api=True to disable all preprocessing.
  3. This example shows:
  4. - How a complex observation space from the env is handled directly by the
  5. model.
  6. - Complex observations are flattened into lists of tensors and as such
  7. stored by the SampleCollectors.
  8. - This has the advantage that preprocessing happens in batched fashion
  9. (in the model).
  10. """
  11. import argparse
  12. from gym.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
  13. import numpy as np
  14. import os
  15. import ray
  16. from ray import tune
  17. def get_cli_args():
  18. """Create CLI parser and return parsed arguments"""
  19. parser = argparse.ArgumentParser()
  20. # general args
  21. parser.add_argument(
  22. "--run", default="PPO", help="The RLlib-registered algorithm to use.")
  23. parser.add_argument("--num-cpus", type=int, default=3)
  24. parser.add_argument(
  25. "--framework",
  26. choices=["tf", "tf2", "tfe", "torch"],
  27. default="tf",
  28. help="The DL framework specifier.")
  29. parser.add_argument(
  30. "--stop-iters",
  31. type=int,
  32. default=200,
  33. help="Number of iterations to train.")
  34. parser.add_argument(
  35. "--stop-timesteps",
  36. type=int,
  37. default=500000,
  38. help="Number of timesteps to train.")
  39. parser.add_argument(
  40. "--stop-reward",
  41. type=float,
  42. default=80.0,
  43. help="Reward at which we stop training.")
  44. parser.add_argument(
  45. "--as-test",
  46. action="store_true",
  47. help="Whether this script should be run as a test: --stop-reward must "
  48. "be achieved within --stop-timesteps AND --stop-iters.")
  49. parser.add_argument(
  50. "--no-tune",
  51. action="store_true",
  52. help="Run without Tune using a manual train loop instead. Here,"
  53. "there is no TensorBoard support.")
  54. parser.add_argument(
  55. "--local-mode",
  56. action="store_true",
  57. help="Init Ray in local mode for easier debugging.")
  58. args = parser.parse_args()
  59. print(f"Running with following CLI args: {args}")
  60. return args
  61. if __name__ == "__main__":
  62. args = get_cli_args()
  63. ray.init(local_mode=args.local_mode)
  64. config = {
  65. "env": "ray.rllib.examples.env.random_env.RandomEnv",
  66. "env_config": {
  67. "config": {
  68. "observation_space": Dict({
  69. "a": Discrete(2),
  70. "b": Dict({
  71. "ba": Discrete(3),
  72. "bb": Box(-1.0, 1.0, (2, 3), dtype=np.float32)
  73. }),
  74. "c": Tuple((MultiDiscrete([2, 3]), Discrete(2))),
  75. "d": Box(-1.0, 1.0, (2, ), dtype=np.int32),
  76. }),
  77. },
  78. },
  79. # Set this to True to enforce no preprocessors being used.
  80. # Complex observations now arrive directly in the model as
  81. # structures of batches, e.g. {"a": tensor, "b": [tensor, tensor]}
  82. # for obs-space=Dict(a=..., b=Tuple(..., ...)).
  83. "_disable_preprocessor_api": True,
  84. # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
  85. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", 0)),
  86. "framework": args.framework,
  87. }
  88. stop = {
  89. "training_iteration": args.stop_iters,
  90. "timesteps_total": args.stop_timesteps,
  91. "episode_reward_mean": args.stop_reward,
  92. }
  93. results = tune.run(args.run, config=config, stop=stop, verbose=2)
  94. ray.shutdown()