abstract_accelerator.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. # Copyright (c) Microsoft Corporation.
  2. # SPDX-License-Identifier: Apache-2.0
  3. # DeepSpeed Team
  4. import abc
  5. from abc import ABC
  6. class DeepSpeedAccelerator(ABC):
  7. def __init__(self):
  8. self._name = None
  9. self._communication_backend_name = None
  10. @abc.abstractmethod
  11. def is_synchronized_device(self):
  12. ...
  13. @abc.abstractmethod
  14. def use_host_timers(self):
  15. ...
  16. @abc.abstractmethod
  17. def resolves_data_dependency(self):
  18. ...
  19. @abc.abstractmethod
  20. def handles_memory_backpressure(self):
  21. ...
  22. # Device APIs
  23. @abc.abstractmethod
  24. def device_name(self, device_index):
  25. ...
  26. @abc.abstractmethod
  27. def device(self, device_index):
  28. ...
  29. @abc.abstractmethod
  30. def set_device(self, device_index):
  31. ...
  32. @abc.abstractmethod
  33. def current_device(self):
  34. ...
  35. @abc.abstractmethod
  36. def current_device_name(self):
  37. ...
  38. @abc.abstractmethod
  39. def device_count(self):
  40. ...
  41. @abc.abstractmethod
  42. def synchronize(self, device_index=None):
  43. ...
  44. # RNG APIs
  45. @abc.abstractmethod
  46. def random(self):
  47. ...
  48. @abc.abstractmethod
  49. def set_rng_state(self, new_state, device_index=None):
  50. ...
  51. @abc.abstractmethod
  52. def get_rng_state(self, device_index=None):
  53. ...
  54. @abc.abstractmethod
  55. def manual_seed(self, seed):
  56. ...
  57. @abc.abstractmethod
  58. def manual_seed_all(self, seed):
  59. ...
  60. @abc.abstractmethod
  61. def initial_seed(self, seed):
  62. ...
  63. @abc.abstractmethod
  64. def default_generator(self, device_index):
  65. ...
  66. # Streams/Events
  67. @property
  68. @abc.abstractmethod
  69. def Stream(self):
  70. ...
  71. @abc.abstractmethod
  72. def stream(self, stream):
  73. ...
  74. @abc.abstractmethod
  75. def current_stream(self, device_index=None):
  76. ...
  77. @abc.abstractmethod
  78. def default_stream(self, device_index=None):
  79. ...
  80. @property
  81. @abc.abstractmethod
  82. def Event(self):
  83. ...
  84. # Memory management
  85. @abc.abstractmethod
  86. def empty_cache(self):
  87. ...
  88. @abc.abstractmethod
  89. def memory_allocated(self, device_index=None):
  90. ...
  91. @abc.abstractmethod
  92. def max_memory_allocated(self, device_index=None):
  93. ...
  94. @abc.abstractmethod
  95. def reset_max_memory_allocated(self, device_index=None):
  96. ...
  97. @abc.abstractmethod
  98. def memory_cached(self, device_index=None):
  99. ...
  100. @abc.abstractmethod
  101. def max_memory_cached(self, device_index=None):
  102. ...
  103. @abc.abstractmethod
  104. def reset_max_memory_cached(self, device_index=None):
  105. ...
  106. @abc.abstractmethod
  107. def memory_stats(self, device_index=None):
  108. ...
  109. @abc.abstractmethod
  110. def reset_peak_memory_stats(self, device_index=None):
  111. ...
  112. @abc.abstractmethod
  113. def memory_reserved(self, device_index=None):
  114. ...
  115. @abc.abstractmethod
  116. def max_memory_reserved(self, device_index=None):
  117. ...
  118. @abc.abstractmethod
  119. def total_memory(self, device_index=None):
  120. ...
  121. @abc.abstractmethod
  122. def available_memory(self, device_index=None):
  123. ...
  124. # Data types
  125. @abc.abstractmethod
  126. def is_bf16_supported(self):
  127. ...
  128. @abc.abstractmethod
  129. def is_fp16_supported(self):
  130. ...
  131. @abc.abstractmethod
  132. def supported_dtypes(self):
  133. ...
  134. # Misc
  135. @abc.abstractmethod
  136. def amp(self):
  137. ...
  138. @abc.abstractmethod
  139. def is_available(self):
  140. ...
  141. @abc.abstractmethod
  142. def range_push(self, msg):
  143. ...
  144. @abc.abstractmethod
  145. def range_pop(self):
  146. ...
  147. @abc.abstractmethod
  148. def lazy_call(self, callback):
  149. ...
  150. @abc.abstractmethod
  151. def communication_backend_name(self):
  152. ...
  153. @abc.abstractmethod
  154. def is_triton_supported(self):
  155. ...
  156. # Graph operations
  157. @abc.abstractmethod
  158. def create_graph(self):
  159. ...
  160. @abc.abstractmethod
  161. def capture_to_graph(self, graph, pool=None, stream=None):
  162. ...
  163. @abc.abstractmethod
  164. def replay_graph(self, graph):
  165. ...
  166. # Tensor operations
  167. @property
  168. @abc.abstractmethod
  169. def BFloat16Tensor(self):
  170. ...
  171. @property
  172. @abc.abstractmethod
  173. def ByteTensor(self):
  174. ...
  175. @property
  176. @abc.abstractmethod
  177. def DoubleTensor(self):
  178. ...
  179. @property
  180. @abc.abstractmethod
  181. def FloatTensor(self):
  182. ...
  183. @property
  184. @abc.abstractmethod
  185. def HalfTensor(self):
  186. ...
  187. @property
  188. @abc.abstractmethod
  189. def IntTensor(self):
  190. ...
  191. @property
  192. @abc.abstractmethod
  193. def LongTensor(self):
  194. ...
  195. @abc.abstractmethod
  196. def pin_memory(self, tensor, align_bytes=1):
  197. ...
  198. @abc.abstractmethod
  199. def is_pinned(self, tensor):
  200. ...
  201. @abc.abstractmethod
  202. def on_accelerator(self, tensor):
  203. ...
  204. @abc.abstractmethod
  205. def op_builder_dir(self):
  206. ...
  207. # create an instance of op builder, specified by class_name
  208. @abc.abstractmethod
  209. def create_op_builder(self, class_name):
  210. ...
  211. # return an op builder class, specified by class_name
  212. @abc.abstractmethod
  213. def get_op_builder(self, class_name):
  214. ...
  215. @abc.abstractmethod
  216. def build_extension(self):
  217. ...
  218. @abc.abstractmethod
  219. def export_envs(self):
  220. ...