123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- from abc import ABCMeta, abstractmethod
- import logging
- import numpy as np
- import threading
- from ray.rllib.policy.sample_batch import MultiAgentBatch
- from ray.rllib.utils.annotations import PublicAPI
- from ray.rllib.utils.framework import try_import_tf
- from typing import Dict, List
- from ray.rllib.utils.typing import TensorType, SampleBatchType
- tf1, tf, tfv = try_import_tf()
- logger = logging.getLogger(__name__)
- @PublicAPI
- class InputReader(metaclass=ABCMeta):
- """API for collecting and returning experiences during policy evaluation."""
- @abstractmethod
- @PublicAPI
- def next(self) -> SampleBatchType:
- """Returns the next batch of read experiences.
- Returns:
- The experience read (SampleBatch or MultiAgentBatch).
- """
- raise NotImplementedError
- @PublicAPI
- def tf_input_ops(self, queue_size: int = 1) -> Dict[str, TensorType]:
- """Returns TensorFlow queue ops for reading inputs from this reader.
- The main use of these ops is for integration into custom model losses.
- For example, you can use tf_input_ops() to read from files of external
- experiences to add an imitation learning loss to your model.
- This method creates a queue runner thread that will call next() on this
- reader repeatedly to feed the TensorFlow queue.
- Args:
- queue_size: Max elements to allow in the TF queue.
- Example:
- >>> from ray.rllib.models.modelv2 import ModelV2
- >>> from ray.rllib.offline.json_reader import JsonReader
- >>> imitation_loss = ... # doctest +SKIP
- >>> class MyModel(ModelV2): # doctest +SKIP
- ... def custom_loss(self, policy_loss, loss_inputs):
- ... reader = JsonReader(...)
- ... input_ops = reader.tf_input_ops()
- ... logits, _ = self._build_layers_v2(
- ... {"obs": input_ops["obs"]},
- ... self.num_outputs, self.options)
- ... il_loss = imitation_loss(logits, input_ops["action"])
- ... return policy_loss + il_loss
- You can find a runnable version of this in examples/custom_loss.py.
- Returns:
- Dict of Tensors, one for each column of the read SampleBatch.
- """
- if hasattr(self, "_queue_runner"):
- raise ValueError(
- "A queue runner already exists for this input reader. "
- "You can only call tf_input_ops() once per reader."
- )
- logger.info("Reading initial batch of data from input reader.")
- batch = self.next()
- if isinstance(batch, MultiAgentBatch):
- raise NotImplementedError(
- "tf_input_ops() is not implemented for multi agent batches"
- )
- # Note on casting to `np.array(batch[k])`: In order to get all keys that
- # are numbers, we need to convert to numpy everything that is not a numpy array.
- # This is because SampleBatches used to only hold numpy arrays, but since our
- # RNN efforts under RLModules, we also allow lists.
- keys = [
- k
- for k in sorted(batch.keys())
- if np.issubdtype(np.array(batch[k]).dtype, np.number)
- ]
- dtypes = [batch[k].dtype for k in keys]
- shapes = {k: (-1,) + s[1:] for (k, s) in [(k, batch[k].shape) for k in keys]}
- queue = tf1.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys)
- tensors = queue.dequeue()
- logger.info("Creating TF queue runner for {}".format(self))
- self._queue_runner = _QueueRunner(self, queue, keys, dtypes)
- self._queue_runner.enqueue(batch)
- self._queue_runner.start()
- out = {k: tf.reshape(t, shapes[k]) for k, t in tensors.items()}
- return out
- class _QueueRunner(threading.Thread):
- """Thread that feeds a TF queue from a InputReader."""
- def __init__(
- self,
- input_reader: InputReader,
- queue: "tf1.FIFOQueue",
- keys: List[str],
- dtypes: "tf.dtypes.DType",
- ):
- threading.Thread.__init__(self)
- self.sess = tf1.get_default_session()
- self.daemon = True
- self.input_reader = input_reader
- self.keys = keys
- self.queue = queue
- self.placeholders = [tf1.placeholder(dtype) for dtype in dtypes]
- self.enqueue_op = queue.enqueue(dict(zip(keys, self.placeholders)))
- def enqueue(self, batch: SampleBatchType):
- data = {self.placeholders[i]: batch[key] for i, key in enumerate(self.keys)}
- self.sess.run(self.enqueue_op, feed_dict=data)
- def run(self):
- while True:
- try:
- batch = self.input_reader.next()
- self.enqueue(batch)
- except Exception:
- logger.exception("Error reading from input")
|