numpy.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. import numpy as np
  2. import tree # pip install dm_tree
  3. from typing import List, Optional
  4. from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning
  5. from ray.rllib.utils.framework import try_import_tf, try_import_torch
  6. from ray.rllib.utils.typing import TensorType, TensorStructType, Union
  7. tf1, tf, tfv = try_import_tf()
  8. torch, _ = try_import_torch()
  9. SMALL_NUMBER = 1e-6
  10. # Some large int number. May be increased here, if needed.
  11. LARGE_INTEGER = 100000000
  12. # Min and Max outputs (clipped) from an NN-output layer interpreted as the
  13. # log(x) of some x (e.g. a stddev of a normal
  14. # distribution).
  15. MIN_LOG_NN_OUTPUT = -5
  16. MAX_LOG_NN_OUTPUT = 2
  17. def aligned_array(size: int, dtype, align: int = 64) -> np.ndarray:
  18. """Returns an array of a given size that is 64-byte aligned.
  19. The returned array can be efficiently copied into GPU memory by TensorFlow.
  20. Args:
  21. size: The size (total number of items) of the array. For example,
  22. array([[0.0, 1.0], [2.0, 3.0]]) would have size=4.
  23. dtype: The numpy dtype of the array.
  24. align: The alignment to use.
  25. Returns:
  26. A np.ndarray with the given specifications.
  27. """
  28. n = size * dtype.itemsize
  29. empty = np.empty(n + (align - 1), dtype=np.uint8)
  30. data_align = empty.ctypes.data % align
  31. offset = 0 if data_align == 0 else (align - data_align)
  32. if n == 0:
  33. # stop np from optimising out empty slice reference
  34. output = empty[offset:offset + 1][0:0].view(dtype)
  35. else:
  36. output = empty[offset:offset + n].view(dtype)
  37. assert len(output) == size, len(output)
  38. assert output.ctypes.data % align == 0, output.ctypes.data
  39. return output
  40. def concat_aligned(items: List[np.ndarray],
  41. time_major: Optional[bool] = None) -> np.ndarray:
  42. """Concatenate arrays, ensuring the output is 64-byte aligned.
  43. We only align float arrays; other arrays are concatenated as normal.
  44. This should be used instead of np.concatenate() to improve performance
  45. when the output array is likely to be fed into TensorFlow.
  46. Args:
  47. items: The list of items to concatenate and align.
  48. time_major: Whether the data in items is time-major, in which
  49. case, we will concatenate along axis=1.
  50. Returns:
  51. The concat'd and aligned array.
  52. """
  53. if len(items) == 0:
  54. return []
  55. elif len(items) == 1:
  56. # we assume the input is aligned. In any case, it doesn't help
  57. # performance to force align it since that incurs a needless copy.
  58. return items[0]
  59. elif (isinstance(items[0], np.ndarray)
  60. and items[0].dtype in [np.float32, np.float64, np.uint8]):
  61. dtype = items[0].dtype
  62. flat = aligned_array(sum(s.size for s in items), dtype)
  63. if time_major is not None:
  64. if time_major is True:
  65. batch_dim = sum(s.shape[1] for s in items)
  66. new_shape = (
  67. items[0].shape[0],
  68. batch_dim,
  69. ) + items[0].shape[2:]
  70. else:
  71. batch_dim = sum(s.shape[0] for s in items)
  72. new_shape = (
  73. batch_dim,
  74. items[0].shape[1],
  75. ) + items[0].shape[2:]
  76. else:
  77. batch_dim = sum(s.shape[0] for s in items)
  78. new_shape = (batch_dim, ) + items[0].shape[1:]
  79. output = flat.reshape(new_shape)
  80. assert output.ctypes.data % 64 == 0, output.ctypes.data
  81. np.concatenate(items, out=output, axis=1 if time_major else 0)
  82. return output
  83. else:
  84. return np.concatenate(items, axis=1 if time_major else 0)
  85. def convert_to_numpy(x: TensorStructType,
  86. reduce_type: bool = True,
  87. reduce_floats=DEPRECATED_VALUE):
  88. """Converts values in `stats` to non-Tensor numpy or python types.
  89. Args:
  90. x: Any (possibly nested) struct, the values in which will be
  91. converted and returned as a new struct with all torch/tf tensors
  92. being converted to numpy types.
  93. reduce_type: Whether to automatically reduce all float64 and int64 data
  94. into float32 and int32 data, respectively.
  95. Returns:
  96. A new struct with the same structure as `x`, but with all
  97. values converted to numpy arrays (on CPU).
  98. """
  99. if reduce_floats != DEPRECATED_VALUE:
  100. deprecation_warning(
  101. old="reduce_floats", new="reduce_types", error=False)
  102. reduce_type = reduce_floats
  103. # The mapping function used to numpyize torch/tf Tensors (and move them
  104. # to the CPU beforehand).
  105. def mapping(item):
  106. if torch and isinstance(item, torch.Tensor):
  107. ret = item.cpu().item() if len(item.size()) == 0 else \
  108. item.detach().cpu().numpy()
  109. elif tf and isinstance(item, (tf.Tensor, tf.Variable)) and \
  110. hasattr(item, "numpy"):
  111. assert tf.executing_eagerly()
  112. ret = item.numpy()
  113. else:
  114. ret = item
  115. if reduce_type and isinstance(ret, np.ndarray):
  116. if np.issubdtype(ret.dtype, np.floating):
  117. ret = ret.astype(np.float32)
  118. elif np.issubdtype(ret.dtype, int):
  119. ret = ret.astype(np.int32)
  120. return ret
  121. return ret
  122. return tree.map_structure(mapping, x)
  123. def fc(x: np.ndarray,
  124. weights: np.ndarray,
  125. biases: Optional[np.ndarray] = None,
  126. framework: Optional[str] = None) -> np.ndarray:
  127. """Calculates FC (dense) layer outputs given weights/biases and input.
  128. Args:
  129. x: The input to the dense layer.
  130. weights: The weights matrix.
  131. biases: The biases vector. All 0s if None.
  132. framework: An optional framework hint (to figure out,
  133. e.g. whether to transpose torch weight matrices).
  134. Returns:
  135. The dense layer's output.
  136. """
  137. def map_(data, transpose=False):
  138. if torch:
  139. if isinstance(data, torch.Tensor):
  140. data = data.cpu().detach().numpy()
  141. if tf and tf.executing_eagerly():
  142. if isinstance(data, tf.Variable):
  143. data = data.numpy()
  144. if transpose:
  145. data = np.transpose(data)
  146. return data
  147. x = map_(x)
  148. # Torch stores matrices in transpose (faster for backprop).
  149. transpose = (framework == "torch" and (x.shape[1] != weights.shape[0]
  150. and x.shape[1] == weights.shape[1]))
  151. weights = map_(weights, transpose=transpose)
  152. biases = map_(biases)
  153. return np.matmul(x, weights) + (0.0 if biases is None else biases)
  154. def huber_loss(x: np.ndarray, delta: float = 1.0) -> np.ndarray:
  155. """Reference: https://en.wikipedia.org/wiki/Huber_loss."""
  156. return np.where(
  157. np.abs(x) < delta,
  158. np.power(x, 2.0) * 0.5, delta * (np.abs(x) - 0.5 * delta))
  159. def l2_loss(x: np.ndarray) -> np.ndarray:
  160. """Computes half the L2 norm of a tensor (w/o the sqrt): sum(x**2) / 2.
  161. Args:
  162. x: The input tensor.
  163. Returns:
  164. The l2-loss output according to the above formula given `x`.
  165. """
  166. return np.sum(np.square(x)) / 2.0
  167. def lstm(x,
  168. weights: np.ndarray,
  169. biases: Optional[np.ndarray] = None,
  170. initial_internal_states: Optional[np.ndarray] = None,
  171. time_major: bool = False,
  172. forget_bias: float = 1.0):
  173. """Calculates LSTM layer output given weights/biases, states, and input.
  174. Args:
  175. x: The inputs to the LSTM layer including time-rank
  176. (0th if time-major, else 1st) and the batch-rank
  177. (1st if time-major, else 0th).
  178. weights: The weights matrix.
  179. biases: The biases vector. All 0s if None.
  180. initial_internal_states: The initial internal
  181. states to pass into the layer. All 0s if None.
  182. time_major: Whether to use time-major or not. Default: False.
  183. forget_bias: Gets added to first sigmoid (forget gate) output.
  184. Default: 1.0.
  185. Returns:
  186. Tuple consisting of 1) The LSTM layer's output and
  187. 2) Tuple: Last (c-state, h-state).
  188. """
  189. sequence_length = x.shape[0 if time_major else 1]
  190. batch_size = x.shape[1 if time_major else 0]
  191. units = weights.shape[1] // 4 # 4 internal layers (3x sigmoid, 1x tanh)
  192. if initial_internal_states is None:
  193. c_states = np.zeros(shape=(batch_size, units))
  194. h_states = np.zeros(shape=(batch_size, units))
  195. else:
  196. c_states = initial_internal_states[0]
  197. h_states = initial_internal_states[1]
  198. # Create a placeholder for all n-time step outputs.
  199. if time_major:
  200. unrolled_outputs = np.zeros(shape=(sequence_length, batch_size, units))
  201. else:
  202. unrolled_outputs = np.zeros(shape=(batch_size, sequence_length, units))
  203. # Push the batch 4 times through the LSTM cell and capture the outputs plus
  204. # the final h- and c-states.
  205. for t in range(sequence_length):
  206. input_matrix = x[t, :, :] if time_major else x[:, t, :]
  207. input_matrix = np.concatenate((input_matrix, h_states), axis=1)
  208. input_matmul_matrix = np.matmul(input_matrix, weights) + biases
  209. # Forget gate (3rd slot in tf output matrix). Add static forget bias.
  210. sigmoid_1 = sigmoid(input_matmul_matrix[:, units * 2:units * 3] +
  211. forget_bias)
  212. c_states = np.multiply(c_states, sigmoid_1)
  213. # Add gate (1st and 2nd slots in tf output matrix).
  214. sigmoid_2 = sigmoid(input_matmul_matrix[:, 0:units])
  215. tanh_3 = np.tanh(input_matmul_matrix[:, units:units * 2])
  216. c_states = np.add(c_states, np.multiply(sigmoid_2, tanh_3))
  217. # Output gate (last slot in tf output matrix).
  218. sigmoid_4 = sigmoid(input_matmul_matrix[:, units * 3:units * 4])
  219. h_states = np.multiply(sigmoid_4, np.tanh(c_states))
  220. # Store this output time-slice.
  221. if time_major:
  222. unrolled_outputs[t, :, :] = h_states
  223. else:
  224. unrolled_outputs[:, t, :] = h_states
  225. return unrolled_outputs, (c_states, h_states)
  226. def one_hot(x: Union[TensorType, int],
  227. depth: int = 0,
  228. on_value: int = 1.0,
  229. off_value: float = 0.0) -> np.ndarray:
  230. """One-hot utility function for numpy.
  231. Thanks to qianyizhang:
  232. https://gist.github.com/qianyizhang/07ee1c15cad08afb03f5de69349efc30.
  233. Args:
  234. x: The input to be one-hot encoded.
  235. depth: The max. number to be one-hot encoded (size of last rank).
  236. on_value: The value to use for on. Default: 1.0.
  237. off_value: The value to use for off. Default: 0.0.
  238. Returns:
  239. The one-hot encoded equivalent of the input array.
  240. """
  241. # Handle simple ints properly.
  242. if isinstance(x, int):
  243. x = np.array(x, dtype=np.int32)
  244. # Handle torch arrays properly.
  245. elif torch and isinstance(x, torch.Tensor):
  246. x = x.numpy()
  247. # Handle bool arrays correctly.
  248. if x.dtype == np.bool_:
  249. x = x.astype(np.int)
  250. depth = 2
  251. # If depth is not given, try to infer it from the values in the array.
  252. if depth == 0:
  253. depth = np.max(x) + 1
  254. assert np.max(x) < depth, \
  255. "ERROR: The max. index of `x` ({}) is larger than depth ({})!".\
  256. format(np.max(x), depth)
  257. shape = x.shape
  258. # Python 2.7 compatibility, (*shape, depth) is not allowed.
  259. shape_list = list(shape[:])
  260. shape_list.append(depth)
  261. out = np.ones(shape_list) * off_value
  262. indices = []
  263. for i in range(x.ndim):
  264. tiles = [1] * x.ndim
  265. s = [1] * x.ndim
  266. s[i] = -1
  267. r = np.arange(shape[i]).reshape(s)
  268. if i > 0:
  269. tiles[i - 1] = shape[i - 1]
  270. r = np.tile(r, tiles)
  271. indices.append(r)
  272. indices.append(x)
  273. out[tuple(indices)] = on_value
  274. return out
  275. def relu(x: np.ndarray, alpha: float = 0.0) -> np.ndarray:
  276. """Implementation of the leaky ReLU function.
  277. y = x * alpha if x < 0 else x
  278. Args:
  279. x: The input values.
  280. alpha: A scaling ("leak") factor to use for negative x.
  281. Returns:
  282. The leaky ReLU output for x.
  283. """
  284. return np.maximum(x, x * alpha, x)
  285. def sigmoid(x: np.ndarray, derivative: bool = False) -> np.ndarray:
  286. """
  287. Returns the sigmoid function applied to x.
  288. Alternatively, can return the derivative or the sigmoid function.
  289. Args:
  290. x: The input to the sigmoid function.
  291. derivative: Whether to return the derivative or not.
  292. Default: False.
  293. Returns:
  294. The sigmoid function (or its derivative) applied to x.
  295. """
  296. if derivative:
  297. return x * (1 - x)
  298. else:
  299. return 1 / (1 + np.exp(-x))
  300. def softmax(x: np.ndarray, axis: int = -1,
  301. epsilon: Optional[float] = None) -> np.ndarray:
  302. """Returns the softmax values for x.
  303. The exact formula used is:
  304. S(xi) = e^xi / SUMj(e^xj), where j goes over all elements in x.
  305. Args:
  306. x: The input to the softmax function.
  307. axis: The axis along which to softmax.
  308. epsilon: Optional epsilon as a minimum value. If None, use
  309. `SMALL_NUMBER`.
  310. Returns:
  311. The softmax over x.
  312. """
  313. epsilon = epsilon or SMALL_NUMBER
  314. # x_exp = np.maximum(np.exp(x), SMALL_NUMBER)
  315. x_exp = np.exp(x)
  316. # return x_exp /
  317. # np.maximum(np.sum(x_exp, axis, keepdims=True), SMALL_NUMBER)
  318. return np.maximum(x_exp / np.sum(x_exp, axis, keepdims=True), epsilon)