openoker
/
ray


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
							import logging
import unittest

import ray
from ray.rllib.algorithms.a3c import A3CConfig
from ray.rllib.algorithms.appo import APPOConfig
from ray.rllib.algorithms.ars import ARSConfig
from ray.rllib.algorithms.ddpg import DDPGConfig
from ray.rllib.algorithms.dqn import DQNConfig
from ray.rllib.algorithms.es import ESConfig
from ray.rllib.algorithms.impala import ImpalaConfig
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.utils.test_utils import check_supported_spaces


logger = logging.getLogger(__name__)


class TestSupportedSpacesIMPALA(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_impala(self):
        check_supported_spaces(
            "IMPALA",
            (
                ImpalaConfig()
                .resources(num_gpus=0)
                .training(model={"fcnet_hiddens": [10]})
            ),
        )


class TestSupportedSpacesAPPO(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_appo(self):
        config = (
            APPOConfig()
            .resources(num_gpus=0)
            .training(vtrace=False, model={"fcnet_hiddens": [10]})
        )
        config.training(vtrace=True)
        check_supported_spaces("APPO", config)


class TestSupportedSpacesA3C(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_a3c(self):
        config = (
            A3CConfig()
            .rollouts(num_rollout_workers=1)
            .training(
                optimizer={"grads_per_step": 1},
                model={"fcnet_hiddens": [10]},
            )
        )
        check_supported_spaces("A3C", config, check_bounds=True)


class TestSupportedSpacesPPO(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_ppo(self):
        config = (
            PPOConfig()
            .rollouts(num_rollout_workers=2, rollout_fragment_length=50)
            .training(
                train_batch_size=100,
                num_sgd_iter=1,
                sgd_minibatch_size=50,
                model={
                    "fcnet_hiddens": [10],
                },
            )
        )
        check_supported_spaces("PPO", config, check_bounds=True)


class TestSupportedSpacesPPONoPreprocessorGPU(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_gpus=1)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_ppo_no_preprocessors_gpu(self):
        # Same test as test_ppo, but also test if we are able to move models and tensors
        # on the same device when not using preprocessors.
        # (Artur) This covers a superposition of these edge cases that can lead to
        # obscure errors.
        config = (
            PPOConfig()
            .rollouts(num_rollout_workers=2, rollout_fragment_length=50)
            .training(
                train_batch_size=100,
                num_sgd_iter=1,
                sgd_minibatch_size=50,
                model={
                    "fcnet_hiddens": [10],
                },
            )
            .experimental(_disable_preprocessor_api=True)
            .resources(num_gpus=1)
        )

        # (Artur): This test only works under the old ModelV2 API because we
        # don't offer arbitrarily complex Models under the RLModules API without
        # preprocessors. Such input spaces require custom implementations of the
        # input space.
        # TODO (Artur): Delete this test once we remove ModelV2 API.
        config.rl_module(_enable_rl_module_api=False).training(
            _enable_learner_api=False
        )

        check_supported_spaces(
            "PPO",
            config,
            check_bounds=True,
            frameworks=["torch", "tf"],
            use_gpu=True,
        )


class TestSupportedSpacesDQN(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_dqn(self):
        config = (
            DQNConfig()
            .reporting(min_sample_timesteps_per_iteration=1)
            .training(
                replay_buffer_config={
                    "capacity": 1000,
                }
            )
        )
        check_supported_spaces("DQN", config, frameworks=["tf2", "torch", "tf"])


class TestSupportedSpacesOffPolicy(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=4)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_ddpg(self):
        check_supported_spaces(
            "DDPG",
            DDPGConfig()
            .exploration(exploration_config={"ou_base_scale": 100.0})
            .reporting(min_sample_timesteps_per_iteration=1)
            .training(
                replay_buffer_config={"capacity": 1000},
                use_state_preprocessor=True,
            ),
            check_bounds=True,
        )

    def test_sac(self):
        check_supported_spaces(
            "SAC",
            SACConfig().training(replay_buffer_config={"capacity": 1000}),
            check_bounds=True,
        )


class TestSupportedSpacesEvolutionAlgos(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=4)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_ars(self):
        check_supported_spaces(
            "ARS",
            ARSConfig()
            .rollouts(num_rollout_workers=1)
            .training(noise_size=1500000, num_rollouts=1, rollouts_used=1),
            # framework=None corresponds to numpy since ARS uses a numpy policy
            frameworks=[None],
        )

    def test_es(self):
        check_supported_spaces(
            "ES",
            ESConfig()
            .rollouts(num_rollout_workers=1)
            .training(noise_size=1500000, episodes_per_batch=1, train_batch_size=1),
            # framework=None corresponds to numpy since ES uses a numpy policy
            frameworks=[None],
        )


if __name__ == "__main__":
    import pytest
    import sys

    # One can specify the specific TestCase class to run.
    # None for all unittest.TestCase classes in this file.
    class_ = sys.argv[1] if len(sys.argv) > 1 else None
    sys.exit(pytest.main(["-v", __file__ + ("" if class_ is None else "::" + class_)]))