numpy.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. from gym.spaces import Discrete, MultiDiscrete
  2. import numpy as np
  3. import tree # pip install dm_tree
  4. from typing import List, Optional
  5. from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning
  6. from ray.rllib.utils.framework import try_import_tf, try_import_torch
  7. from ray.rllib.utils.typing import SpaceStruct, TensorType, TensorStructType, \
  8. Union
  9. tf1, tf, tfv = try_import_tf()
  10. torch, _ = try_import_torch()
  11. SMALL_NUMBER = 1e-6
  12. # Some large int number. May be increased here, if needed.
  13. LARGE_INTEGER = 100000000
  14. # Min and Max outputs (clipped) from an NN-output layer interpreted as the
  15. # log(x) of some x (e.g. a stddev of a normal
  16. # distribution).
  17. MIN_LOG_NN_OUTPUT = -5
  18. MAX_LOG_NN_OUTPUT = 2
  19. def aligned_array(size: int, dtype, align: int = 64) -> np.ndarray:
  20. """Returns an array of a given size that is 64-byte aligned.
  21. The returned array can be efficiently copied into GPU memory by TensorFlow.
  22. Args:
  23. size: The size (total number of items) of the array. For example,
  24. array([[0.0, 1.0], [2.0, 3.0]]) would have size=4.
  25. dtype: The numpy dtype of the array.
  26. align: The alignment to use.
  27. Returns:
  28. A np.ndarray with the given specifications.
  29. """
  30. n = size * dtype.itemsize
  31. empty = np.empty(n + (align - 1), dtype=np.uint8)
  32. data_align = empty.ctypes.data % align
  33. offset = 0 if data_align == 0 else (align - data_align)
  34. if n == 0:
  35. # stop np from optimising out empty slice reference
  36. output = empty[offset:offset + 1][0:0].view(dtype)
  37. else:
  38. output = empty[offset:offset + n].view(dtype)
  39. assert len(output) == size, len(output)
  40. assert output.ctypes.data % align == 0, output.ctypes.data
  41. return output
  42. def concat_aligned(items: List[np.ndarray],
  43. time_major: Optional[bool] = None) -> np.ndarray:
  44. """Concatenate arrays, ensuring the output is 64-byte aligned.
  45. We only align float arrays; other arrays are concatenated as normal.
  46. This should be used instead of np.concatenate() to improve performance
  47. when the output array is likely to be fed into TensorFlow.
  48. Args:
  49. items: The list of items to concatenate and align.
  50. time_major: Whether the data in items is time-major, in which
  51. case, we will concatenate along axis=1.
  52. Returns:
  53. The concat'd and aligned array.
  54. """
  55. if len(items) == 0:
  56. return []
  57. elif len(items) == 1:
  58. # we assume the input is aligned. In any case, it doesn't help
  59. # performance to force align it since that incurs a needless copy.
  60. return items[0]
  61. elif (isinstance(items[0], np.ndarray)
  62. and items[0].dtype in [np.float32, np.float64, np.uint8]):
  63. dtype = items[0].dtype
  64. flat = aligned_array(sum(s.size for s in items), dtype)
  65. if time_major is not None:
  66. if time_major is True:
  67. batch_dim = sum(s.shape[1] for s in items)
  68. new_shape = (
  69. items[0].shape[0],
  70. batch_dim,
  71. ) + items[0].shape[2:]
  72. else:
  73. batch_dim = sum(s.shape[0] for s in items)
  74. new_shape = (
  75. batch_dim,
  76. items[0].shape[1],
  77. ) + items[0].shape[2:]
  78. else:
  79. batch_dim = sum(s.shape[0] for s in items)
  80. new_shape = (batch_dim, ) + items[0].shape[1:]
  81. output = flat.reshape(new_shape)
  82. assert output.ctypes.data % 64 == 0, output.ctypes.data
  83. np.concatenate(items, out=output, axis=1 if time_major else 0)
  84. return output
  85. else:
  86. return np.concatenate(items, axis=1 if time_major else 0)
  87. def convert_to_numpy(x: TensorStructType,
  88. reduce_type: bool = True,
  89. reduce_floats=DEPRECATED_VALUE):
  90. """Converts values in `stats` to non-Tensor numpy or python types.
  91. Args:
  92. x: Any (possibly nested) struct, the values in which will be
  93. converted and returned as a new struct with all torch/tf tensors
  94. being converted to numpy types.
  95. reduce_type: Whether to automatically reduce all float64 and int64 data
  96. into float32 and int32 data, respectively.
  97. Returns:
  98. A new struct with the same structure as `x`, but with all
  99. values converted to numpy arrays (on CPU).
  100. """
  101. if reduce_floats != DEPRECATED_VALUE:
  102. deprecation_warning(
  103. old="reduce_floats", new="reduce_types", error=False)
  104. reduce_type = reduce_floats
  105. # The mapping function used to numpyize torch/tf Tensors (and move them
  106. # to the CPU beforehand).
  107. def mapping(item):
  108. if torch and isinstance(item, torch.Tensor):
  109. ret = item.cpu().item() if len(item.size()) == 0 else \
  110. item.detach().cpu().numpy()
  111. elif tf and isinstance(item, (tf.Tensor, tf.Variable)) and \
  112. hasattr(item, "numpy"):
  113. assert tf.executing_eagerly()
  114. ret = item.numpy()
  115. else:
  116. ret = item
  117. if reduce_type and isinstance(ret, np.ndarray):
  118. if np.issubdtype(ret.dtype, np.floating):
  119. ret = ret.astype(np.float32)
  120. elif np.issubdtype(ret.dtype, int):
  121. ret = ret.astype(np.int32)
  122. return ret
  123. return ret
  124. return tree.map_structure(mapping, x)
  125. def fc(x: np.ndarray,
  126. weights: np.ndarray,
  127. biases: Optional[np.ndarray] = None,
  128. framework: Optional[str] = None) -> np.ndarray:
  129. """Calculates FC (dense) layer outputs given weights/biases and input.
  130. Args:
  131. x: The input to the dense layer.
  132. weights: The weights matrix.
  133. biases: The biases vector. All 0s if None.
  134. framework: An optional framework hint (to figure out,
  135. e.g. whether to transpose torch weight matrices).
  136. Returns:
  137. The dense layer's output.
  138. """
  139. def map_(data, transpose=False):
  140. if torch:
  141. if isinstance(data, torch.Tensor):
  142. data = data.cpu().detach().numpy()
  143. if tf and tf.executing_eagerly():
  144. if isinstance(data, tf.Variable):
  145. data = data.numpy()
  146. if transpose:
  147. data = np.transpose(data)
  148. return data
  149. x = map_(x)
  150. # Torch stores matrices in transpose (faster for backprop).
  151. transpose = (framework == "torch" and (x.shape[1] != weights.shape[0]
  152. and x.shape[1] == weights.shape[1]))
  153. weights = map_(weights, transpose=transpose)
  154. biases = map_(biases)
  155. return np.matmul(x, weights) + (0.0 if biases is None else biases)
  156. def flatten_inputs_to_1d_tensor(inputs: TensorStructType,
  157. spaces_struct: Optional[SpaceStruct] = None,
  158. time_axis: bool = False) -> TensorType:
  159. """Flattens arbitrary input structs according to the given spaces struct.
  160. Returns a single 1D tensor resulting from the different input
  161. components' values.
  162. Thereby:
  163. - Boxes (any shape) get flattened to (B, [T]?, -1). Note that image boxes
  164. are not treated differently from other types of Boxes and get
  165. flattened as well.
  166. - Discrete (int) values are one-hot'd, e.g. a batch of [1, 0, 3] (B=3 with
  167. Discrete(4) space) results in [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]].
  168. - MultiDiscrete values are multi-one-hot'd, e.g. a batch of
  169. [[0, 2], [1, 4]] (B=2 with MultiDiscrete([2, 5]) space) results in
  170. [[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 1]].
  171. Args:
  172. inputs: The inputs to be flattened.
  173. spaces_struct: The structure of the spaces that behind the input
  174. time_axis: Whether all inputs have a time-axis (after the batch axis).
  175. If True, will keep not only the batch axis (0th), but the time axis
  176. (1st) as-is and flatten everything from the 2nd axis up.
  177. Returns:
  178. A single 1D tensor resulting from concatenating all
  179. flattened/one-hot'd input components. Depending on the time_axis flag,
  180. the shape is (B, n) or (B, T, n).
  181. Examples:
  182. >>> # B=2
  183. >>> out = flatten_inputs_to_1d_tensor(
  184. ... {"a": [1, 0], "b": [[[0.0], [0.1]], [1.0], [1.1]]},
  185. ... spaces_struct=dict(a=Discrete(2), b=Box(shape=(2, 1)))
  186. ... )
  187. >>> print(out)
  188. ... [[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]] # B=2 n=4
  189. >>> # B=2; T=2
  190. >>> out = flatten_inputs_to_1d_tensor(
  191. ... ([[1, 0], [0, 1]],
  192. ... [[[0.0, 0.1], [1.0, 1.1]], [[2.0, 2.1], [3.0, 3.1]]]),
  193. ... spaces_struct=tuple([Discrete(2), Box(shape=(2, ))]),
  194. ... time_axis=True
  195. ... )
  196. >>> print(out)
  197. ... [[[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]],
  198. ... [[1.0, 0.0, 2.0, 2.1], [0.0, 1.0, 3.0, 3.1]]] # B=2 T=2 n=4
  199. """
  200. flat_inputs = tree.flatten(inputs)
  201. flat_spaces = tree.flatten(spaces_struct) if spaces_struct is not None \
  202. else [None] * len(flat_inputs)
  203. B = None
  204. T = None
  205. out = []
  206. for input_, space in zip(flat_inputs, flat_spaces):
  207. assert isinstance(input_, np.ndarray)
  208. # Store batch and (if applicable) time dimension.
  209. if B is None:
  210. B = input_.shape[0]
  211. if time_axis:
  212. T = input_.shape[1]
  213. # One-hot encoding.
  214. if isinstance(space, Discrete):
  215. if time_axis:
  216. input_ = np.reshape(input_, [B * T])
  217. out.append(one_hot(input_, depth=space.n).astype(np.float32))
  218. # Multi one-hot encoding.
  219. elif isinstance(space, MultiDiscrete):
  220. if time_axis:
  221. input_ = np.reshape(input_, [B * T, -1])
  222. out.append(
  223. np.concatenate(
  224. [
  225. one_hot(input_[:, i], depth=n).astype(np.float32)
  226. for i, n in enumerate(space.nvec)
  227. ],
  228. axis=-1))
  229. # Box: Flatten.
  230. else:
  231. if time_axis:
  232. input_ = np.reshape(input_, [B * T, -1])
  233. else:
  234. input_ = np.reshape(input_, [B, -1])
  235. out.append(input_.astype(np.float32))
  236. merged = np.concatenate(out, axis=-1)
  237. # Restore the time-dimension, if applicable.
  238. if time_axis:
  239. merged = np.reshape(merged, [B, T, -1])
  240. return merged
  241. def huber_loss(x: np.ndarray, delta: float = 1.0) -> np.ndarray:
  242. """Reference: https://en.wikipedia.org/wiki/Huber_loss."""
  243. return np.where(
  244. np.abs(x) < delta,
  245. np.power(x, 2.0) * 0.5, delta * (np.abs(x) - 0.5 * delta))
  246. def l2_loss(x: np.ndarray) -> np.ndarray:
  247. """Computes half the L2 norm of a tensor (w/o the sqrt): sum(x**2) / 2.
  248. Args:
  249. x: The input tensor.
  250. Returns:
  251. The l2-loss output according to the above formula given `x`.
  252. """
  253. return np.sum(np.square(x)) / 2.0
  254. def lstm(x,
  255. weights: np.ndarray,
  256. biases: Optional[np.ndarray] = None,
  257. initial_internal_states: Optional[np.ndarray] = None,
  258. time_major: bool = False,
  259. forget_bias: float = 1.0):
  260. """Calculates LSTM layer output given weights/biases, states, and input.
  261. Args:
  262. x: The inputs to the LSTM layer including time-rank
  263. (0th if time-major, else 1st) and the batch-rank
  264. (1st if time-major, else 0th).
  265. weights: The weights matrix.
  266. biases: The biases vector. All 0s if None.
  267. initial_internal_states: The initial internal
  268. states to pass into the layer. All 0s if None.
  269. time_major: Whether to use time-major or not. Default: False.
  270. forget_bias: Gets added to first sigmoid (forget gate) output.
  271. Default: 1.0.
  272. Returns:
  273. Tuple consisting of 1) The LSTM layer's output and
  274. 2) Tuple: Last (c-state, h-state).
  275. """
  276. sequence_length = x.shape[0 if time_major else 1]
  277. batch_size = x.shape[1 if time_major else 0]
  278. units = weights.shape[1] // 4 # 4 internal layers (3x sigmoid, 1x tanh)
  279. if initial_internal_states is None:
  280. c_states = np.zeros(shape=(batch_size, units))
  281. h_states = np.zeros(shape=(batch_size, units))
  282. else:
  283. c_states = initial_internal_states[0]
  284. h_states = initial_internal_states[1]
  285. # Create a placeholder for all n-time step outputs.
  286. if time_major:
  287. unrolled_outputs = np.zeros(shape=(sequence_length, batch_size, units))
  288. else:
  289. unrolled_outputs = np.zeros(shape=(batch_size, sequence_length, units))
  290. # Push the batch 4 times through the LSTM cell and capture the outputs plus
  291. # the final h- and c-states.
  292. for t in range(sequence_length):
  293. input_matrix = x[t, :, :] if time_major else x[:, t, :]
  294. input_matrix = np.concatenate((input_matrix, h_states), axis=1)
  295. input_matmul_matrix = np.matmul(input_matrix, weights) + biases
  296. # Forget gate (3rd slot in tf output matrix). Add static forget bias.
  297. sigmoid_1 = sigmoid(input_matmul_matrix[:, units * 2:units * 3] +
  298. forget_bias)
  299. c_states = np.multiply(c_states, sigmoid_1)
  300. # Add gate (1st and 2nd slots in tf output matrix).
  301. sigmoid_2 = sigmoid(input_matmul_matrix[:, 0:units])
  302. tanh_3 = np.tanh(input_matmul_matrix[:, units:units * 2])
  303. c_states = np.add(c_states, np.multiply(sigmoid_2, tanh_3))
  304. # Output gate (last slot in tf output matrix).
  305. sigmoid_4 = sigmoid(input_matmul_matrix[:, units * 3:units * 4])
  306. h_states = np.multiply(sigmoid_4, np.tanh(c_states))
  307. # Store this output time-slice.
  308. if time_major:
  309. unrolled_outputs[t, :, :] = h_states
  310. else:
  311. unrolled_outputs[:, t, :] = h_states
  312. return unrolled_outputs, (c_states, h_states)
  313. def one_hot(x: Union[TensorType, int],
  314. depth: int = 0,
  315. on_value: float = 1.0,
  316. off_value: float = 0.0) -> np.ndarray:
  317. """One-hot utility function for numpy.
  318. Thanks to qianyizhang:
  319. https://gist.github.com/qianyizhang/07ee1c15cad08afb03f5de69349efc30.
  320. Args:
  321. x: The input to be one-hot encoded.
  322. depth: The max. number to be one-hot encoded (size of last rank).
  323. on_value: The value to use for on. Default: 1.0.
  324. off_value: The value to use for off. Default: 0.0.
  325. Returns:
  326. The one-hot encoded equivalent of the input array.
  327. """
  328. # Handle simple ints properly.
  329. if isinstance(x, int):
  330. x = np.array(x, dtype=np.int32)
  331. # Handle torch arrays properly.
  332. elif torch and isinstance(x, torch.Tensor):
  333. x = x.numpy()
  334. # Handle bool arrays correctly.
  335. if x.dtype == np.bool_:
  336. x = x.astype(np.int)
  337. depth = 2
  338. # If depth is not given, try to infer it from the values in the array.
  339. if depth == 0:
  340. depth = np.max(x) + 1
  341. assert np.max(x) < depth, \
  342. "ERROR: The max. index of `x` ({}) is larger than depth ({})!".\
  343. format(np.max(x), depth)
  344. shape = x.shape
  345. # Python 2.7 compatibility, (*shape, depth) is not allowed.
  346. shape_list = list(shape[:])
  347. shape_list.append(depth)
  348. out = np.ones(shape_list) * off_value
  349. indices = []
  350. for i in range(x.ndim):
  351. tiles = [1] * x.ndim
  352. s = [1] * x.ndim
  353. s[i] = -1
  354. r = np.arange(shape[i]).reshape(s)
  355. if i > 0:
  356. tiles[i - 1] = shape[i - 1]
  357. r = np.tile(r, tiles)
  358. indices.append(r)
  359. indices.append(x)
  360. out[tuple(indices)] = on_value
  361. return out
  362. def relu(x: np.ndarray, alpha: float = 0.0) -> np.ndarray:
  363. """Implementation of the leaky ReLU function.
  364. y = x * alpha if x < 0 else x
  365. Args:
  366. x: The input values.
  367. alpha: A scaling ("leak") factor to use for negative x.
  368. Returns:
  369. The leaky ReLU output for x.
  370. """
  371. return np.maximum(x, x * alpha, x)
  372. def sigmoid(x: np.ndarray, derivative: bool = False) -> np.ndarray:
  373. """
  374. Returns the sigmoid function applied to x.
  375. Alternatively, can return the derivative or the sigmoid function.
  376. Args:
  377. x: The input to the sigmoid function.
  378. derivative: Whether to return the derivative or not.
  379. Default: False.
  380. Returns:
  381. The sigmoid function (or its derivative) applied to x.
  382. """
  383. if derivative:
  384. return x * (1 - x)
  385. else:
  386. return 1 / (1 + np.exp(-x))
  387. def softmax(x: np.ndarray, axis: int = -1,
  388. epsilon: Optional[float] = None) -> np.ndarray:
  389. """Returns the softmax values for x.
  390. The exact formula used is:
  391. S(xi) = e^xi / SUMj(e^xj), where j goes over all elements in x.
  392. Args:
  393. x: The input to the softmax function.
  394. axis: The axis along which to softmax.
  395. epsilon: Optional epsilon as a minimum value. If None, use
  396. `SMALL_NUMBER`.
  397. Returns:
  398. The softmax over x.
  399. """
  400. epsilon = epsilon or SMALL_NUMBER
  401. # x_exp = np.maximum(np.exp(x), SMALL_NUMBER)
  402. x_exp = np.exp(x)
  403. # return x_exp /
  404. # np.maximum(np.sum(x_exp, axis, keepdims=True), SMALL_NUMBER)
  405. return np.maximum(x_exp / np.sum(x_exp, axis, keepdims=True), epsilon)