openoker
/
ray


			
							123456789101112131415161718192021222324252627282930313233343536373839
							# __rllib-custom-gym-env-begin__
import gymnasium as gym

import ray
from ray.rllib.algorithms.ppo import PPOConfig


class SimpleCorridor(gym.Env):
    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = gym.spaces.Discrete(2)  # right/left
        self.observation_space = gym.spaces.Discrete(self.end_pos)

    def reset(self, *, seed=None, options=None):
        self.cur_pos = 0
        return self.cur_pos, {}

    def step(self, action):
        if action == 0 and self.cur_pos > 0:  # move right (towards goal)
            self.cur_pos -= 1
        elif action == 1:  # move left (towards start)
            self.cur_pos += 1
        if self.cur_pos >= self.end_pos:
            return 0, 1.0, True, True, {}
        else:
            return self.cur_pos, -0.1, False, False, {}


ray.init()

config = PPOConfig().environment(SimpleCorridor, env_config={"corridor_length": 5})
algo = config.build()

for _ in range(3):
    print(algo.train())

algo.stop()
# __rllib-custom-gym-env-end__