123456789101112131415161718192021 |
- # flake8: noqa
- # __rllib-in-60s-begin__
- from ray.rllib.algorithms.ppo import PPOConfig
- config = ( # 1. Configure the algorithm,
- PPOConfig()
- .environment("Taxi-v3")
- .rollouts(num_rollout_workers=2)
- .framework("torch")
- .training(model={"fcnet_hiddens": [64, 64]})
- .evaluation(evaluation_num_workers=1)
- )
- algo = config.build() # 2. build the algorithm,
- for _ in range(5):
- print(algo.train()) # 3. train it,
- algo.evaluate() # 4. and evaluate it.
- # __rllib-in-60s-end__
|