123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- from copy import deepcopy
- import numpy as np
- import unittest
- import ray
- import ray.rllib.algorithms.dqn as dqn
- from ray.rllib.utils.test_utils import (
- check,
- check_compute_single_action,
- check_train_results,
- framework_iterator,
- )
- class TestDQN(unittest.TestCase):
- @classmethod
- def setUpClass(cls) -> None:
- ray.init()
- @classmethod
- def tearDownClass(cls) -> None:
- ray.shutdown()
- def test_dqn_compilation(self):
- """Test whether DQN can be built on all frameworks."""
- num_iterations = 1
- config = (
- dqn.dqn.DQNConfig()
- .environment("CartPole-v1")
- .rollouts(num_rollout_workers=2)
- .training(num_steps_sampled_before_learning_starts=0)
- )
- for _ in framework_iterator(config):
- # Double-dueling DQN.
- print("Double-dueling")
- algo = config.build()
- for i in range(num_iterations):
- results = algo.train()
- check_train_results(results)
- print(results)
- check_compute_single_action(algo)
- algo.stop()
- # Rainbow.
- print("Rainbow")
- rainbow_config = deepcopy(config).training(
- num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
- )
- algo = rainbow_config.build()
- for i in range(num_iterations):
- results = algo.train()
- check_train_results(results)
- print(results)
- check_compute_single_action(algo)
- algo.stop()
- def test_dqn_compilation_integer_rewards(self):
- """Test whether DQN can be built on all frameworks.
- Unlike the previous test, this uses an environment with integer rewards
- in order to test that type conversions are working correctly."""
- num_iterations = 1
- config = (
- dqn.dqn.DQNConfig()
- .environment("Taxi-v3")
- .rollouts(num_rollout_workers=2)
- .training(num_steps_sampled_before_learning_starts=0)
- )
- for _ in framework_iterator(config):
- # Double-dueling DQN.
- print("Double-dueling")
- algo = config.build()
- for i in range(num_iterations):
- results = algo.train()
- check_train_results(results)
- print(results)
- check_compute_single_action(algo)
- algo.stop()
- # Rainbow.
- print("Rainbow")
- rainbow_config = deepcopy(config).training(
- num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
- )
- algo = rainbow_config.build()
- for i in range(num_iterations):
- results = algo.train()
- check_train_results(results)
- print(results)
- check_compute_single_action(algo)
- algo.stop()
- def test_dqn_exploration_and_soft_q_config(self):
- """Tests, whether a DQN Agent outputs exploration/softmaxed actions."""
- config = (
- dqn.dqn.DQNConfig()
- .environment("FrozenLake-v1")
- .rollouts(num_rollout_workers=0)
- .environment(env_config={"is_slippery": False, "map_name": "4x4"})
- ).training(num_steps_sampled_before_learning_starts=0)
- obs = np.array(0)
- # Test against all frameworks.
- for _ in framework_iterator(config):
- # Default EpsilonGreedy setup.
- algo = config.build()
- # Setting explore=False should always return the same action.
- a_ = algo.compute_single_action(obs, explore=False)
- for _ in range(50):
- a = algo.compute_single_action(obs, explore=False)
- check(a, a_)
- # explore=None (default: explore) should return different actions.
- actions = []
- for _ in range(50):
- actions.append(algo.compute_single_action(obs))
- check(np.std(actions), 0.0, false=True)
- algo.stop()
- # Low softmax temperature. Behaves like argmax
- # (but no epsilon exploration).
- config.exploration(
- exploration_config={"type": "SoftQ", "temperature": 0.000001}
- )
- algo = config.build()
- # Due to the low temp, always expect the same action.
- actions = [algo.compute_single_action(obs)]
- for _ in range(50):
- actions.append(algo.compute_single_action(obs))
- check(np.std(actions), 0.0, decimals=3)
- algo.stop()
- # Higher softmax temperature.
- config.exploration_config["temperature"] = 1.0
- algo = config.build()
- # Even with the higher temperature, if we set explore=False, we
- # should expect the same actions always.
- a_ = algo.compute_single_action(obs, explore=False)
- for _ in range(50):
- a = algo.compute_single_action(obs, explore=False)
- check(a, a_)
- # Due to the higher temp, expect different actions avg'ing
- # around 1.5.
- actions = []
- for _ in range(300):
- actions.append(algo.compute_single_action(obs))
- check(np.std(actions), 0.0, false=True)
- algo.stop()
- # With Random exploration.
- config.exploration(exploration_config={"type": "Random"}, explore=True)
- algo = config.build()
- actions = []
- for _ in range(300):
- actions.append(algo.compute_single_action(obs))
- check(np.std(actions), 0.0, false=True)
- algo.stop()
- if __name__ == "__main__":
- import pytest
- import sys
- sys.exit(pytest.main(["-v", __file__]))
|