atomic.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /*-
  2. * Copyright (c) 2015-2024 Ruslan Bukin <br@bsdpad.com>
  3. * All rights reserved.
  4. *
  5. * Portions of this software were developed by SRI International and the
  6. * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  7. * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  8. *
  9. * Portions of this software were developed by the University of Cambridge
  10. * Computer Laboratory as part of the CTSRD Project, with support from the
  11. * UK Higher Education Innovation Fund (HEIF).
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions
  15. * are met:
  16. * 1. Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  23. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  26. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32. * SUCH DAMAGE.
  33. */
  34. #ifndef _MACHINE_ATOMIC_H_
  35. #define _MACHINE_ATOMIC_H_
  36. #include <sys/atomic_common.h>
  37. #define fence() __asm __volatile("fence" ::: "memory");
  38. #define mb() fence()
  39. #define rmb() fence()
  40. #define wmb() fence()
  41. static __inline int atomic_cmpset_8(__volatile uint8_t *, uint8_t, uint8_t);
  42. static __inline int atomic_fcmpset_8(__volatile uint8_t *, uint8_t *, uint8_t);
  43. static __inline int atomic_cmpset_16(__volatile uint16_t *, uint16_t, uint16_t);
  44. static __inline int atomic_fcmpset_16(__volatile uint16_t *, uint16_t *,
  45. uint16_t);
  46. #define ATOMIC_ACQ_REL(NAME, WIDTH) \
  47. static __inline void \
  48. atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  49. { \
  50. atomic_##NAME##_##WIDTH(p, v); \
  51. fence(); \
  52. } \
  53. \
  54. static __inline void \
  55. atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  56. { \
  57. fence(); \
  58. atomic_##NAME##_##WIDTH(p, v); \
  59. }
  60. #define ATOMIC_CMPSET_ACQ_REL(WIDTH) \
  61. static __inline int \
  62. atomic_cmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p, \
  63. uint##WIDTH##_t cmpval, uint##WIDTH##_t newval) \
  64. { \
  65. int retval; \
  66. \
  67. retval = atomic_cmpset_##WIDTH(p, cmpval, newval); \
  68. fence(); \
  69. return (retval); \
  70. } \
  71. \
  72. static __inline int \
  73. atomic_cmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p, \
  74. uint##WIDTH##_t cmpval, uint##WIDTH##_t newval) \
  75. { \
  76. fence(); \
  77. return (atomic_cmpset_##WIDTH(p, cmpval, newval)); \
  78. }
  79. #define ATOMIC_FCMPSET_ACQ_REL(WIDTH) \
  80. static __inline int \
  81. atomic_fcmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p, \
  82. uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval) \
  83. { \
  84. int retval; \
  85. \
  86. retval = atomic_fcmpset_##WIDTH(p, cmpval, newval); \
  87. fence(); \
  88. return (retval); \
  89. } \
  90. \
  91. static __inline int \
  92. atomic_fcmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p, \
  93. uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval) \
  94. { \
  95. fence(); \
  96. return (atomic_fcmpset_##WIDTH(p, cmpval, newval)); \
  97. }
  98. ATOMIC_CMPSET_ACQ_REL(8);
  99. ATOMIC_FCMPSET_ACQ_REL(8);
  100. #define atomic_cmpset_char atomic_cmpset_8
  101. #define atomic_cmpset_acq_char atomic_cmpset_acq_8
  102. #define atomic_cmpset_rel_char atomic_cmpset_rel_8
  103. #define atomic_fcmpset_char atomic_fcmpset_8
  104. #define atomic_fcmpset_acq_char atomic_fcmpset_acq_8
  105. #define atomic_fcmpset_rel_char atomic_fcmpset_rel_8
  106. #define atomic_cmpset_short atomic_cmpset_16
  107. #define atomic_fcmpset_short atomic_fcmpset_16
  108. ATOMIC_CMPSET_ACQ_REL(16);
  109. ATOMIC_FCMPSET_ACQ_REL(16);
  110. #define atomic_load_acq_16 atomic_load_acq_16
  111. static __inline uint16_t
  112. atomic_load_acq_16(volatile uint16_t *p)
  113. {
  114. uint16_t ret;
  115. ret = *p;
  116. fence();
  117. return (ret);
  118. }
  119. static __inline void
  120. atomic_store_rel_16(volatile uint16_t *p, uint16_t val)
  121. {
  122. fence();
  123. *p = val;
  124. }
  125. #define atomic_cmpset_acq_short atomic_cmpset_acq_16
  126. #define atomic_fcmpset_acq_short atomic_fcmpset_acq_16
  127. #define atomic_load_acq_short atomic_load_acq_16
  128. #define atomic_cmpset_rel_short atomic_cmpset_rel_16
  129. #define atomic_fcmpset_rel_short atomic_fcmpset_rel_16
  130. #define atomic_store_rel_short atomic_store_rel_16
  131. static __inline void
  132. atomic_add_32(volatile uint32_t *p, uint32_t val)
  133. {
  134. __asm __volatile("amoadd.w zero, %1, %0"
  135. : "+A" (*p)
  136. : "r" (val)
  137. : "memory");
  138. }
  139. static __inline void
  140. atomic_subtract_32(volatile uint32_t *p, uint32_t val)
  141. {
  142. __asm __volatile("amoadd.w zero, %1, %0"
  143. : "+A" (*p)
  144. : "r" (-val)
  145. : "memory");
  146. }
  147. static __inline void
  148. atomic_set_32(volatile uint32_t *p, uint32_t val)
  149. {
  150. __asm __volatile("amoor.w zero, %1, %0"
  151. : "+A" (*p)
  152. : "r" (val)
  153. : "memory");
  154. }
  155. static __inline void
  156. atomic_clear_32(volatile uint32_t *p, uint32_t val)
  157. {
  158. __asm __volatile("amoand.w zero, %1, %0"
  159. : "+A" (*p)
  160. : "r" (~val)
  161. : "memory");
  162. }
  163. static __inline int
  164. atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
  165. {
  166. uint32_t tmp;
  167. int res;
  168. res = 0;
  169. __asm __volatile(
  170. "0:"
  171. "li %1, 1\n" /* Preset to fail */
  172. "lr.w %0, %2\n"
  173. "bne %0, %z3, 1f\n"
  174. "sc.w %1, %z4, %2\n"
  175. "bnez %1, 0b\n"
  176. "1:"
  177. : "=&r" (tmp), "=&r" (res), "+A" (*p)
  178. : "rJ" ((long)(int32_t)cmpval), "rJ" (newval)
  179. : "memory");
  180. return (!res);
  181. }
  182. static __inline int
  183. atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
  184. {
  185. uint32_t tmp;
  186. int res;
  187. res = 0;
  188. __asm __volatile(
  189. "0:"
  190. "li %1, 1\n" /* Preset to fail */
  191. "lr.w %0, %2\n" /* Load old value */
  192. "bne %0, %z4, 1f\n" /* Compare */
  193. "sc.w %1, %z5, %2\n" /* Try to store new value */
  194. "j 2f\n"
  195. "1:"
  196. "sw %0, %3\n" /* Save old value */
  197. "2:"
  198. : "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
  199. : "rJ" ((long)(int32_t)*cmpval), "rJ" (newval)
  200. : "memory");
  201. return (!res);
  202. }
  203. static __inline uint32_t
  204. atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
  205. {
  206. uint32_t ret;
  207. __asm __volatile("amoadd.w %0, %2, %1"
  208. : "=&r" (ret), "+A" (*p)
  209. : "r" (val)
  210. : "memory");
  211. return (ret);
  212. }
  213. static __inline uint32_t
  214. atomic_readandclear_32(volatile uint32_t *p)
  215. {
  216. uint32_t ret;
  217. uint32_t val;
  218. val = 0;
  219. __asm __volatile("amoswap.w %0, %2, %1"
  220. : "=&r"(ret), "+A" (*p)
  221. : "r" (val)
  222. : "memory");
  223. return (ret);
  224. }
  225. #define atomic_add_int atomic_add_32
  226. #define atomic_clear_int atomic_clear_32
  227. #define atomic_cmpset_int atomic_cmpset_32
  228. #define atomic_fcmpset_int atomic_fcmpset_32
  229. #define atomic_fetchadd_int atomic_fetchadd_32
  230. #define atomic_readandclear_int atomic_readandclear_32
  231. #define atomic_set_int atomic_set_32
  232. #define atomic_subtract_int atomic_subtract_32
  233. ATOMIC_ACQ_REL(set, 32)
  234. ATOMIC_ACQ_REL(clear, 32)
  235. ATOMIC_ACQ_REL(add, 32)
  236. ATOMIC_ACQ_REL(subtract, 32)
  237. ATOMIC_CMPSET_ACQ_REL(32);
  238. ATOMIC_FCMPSET_ACQ_REL(32);
  239. static __inline uint32_t
  240. atomic_load_acq_32(volatile uint32_t *p)
  241. {
  242. uint32_t ret;
  243. ret = *p;
  244. fence();
  245. return (ret);
  246. }
  247. static __inline void
  248. atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
  249. {
  250. fence();
  251. *p = val;
  252. }
  253. #define atomic_add_acq_int atomic_add_acq_32
  254. #define atomic_clear_acq_int atomic_clear_acq_32
  255. #define atomic_cmpset_acq_int atomic_cmpset_acq_32
  256. #define atomic_fcmpset_acq_int atomic_fcmpset_acq_32
  257. #define atomic_load_acq_int atomic_load_acq_32
  258. #define atomic_set_acq_int atomic_set_acq_32
  259. #define atomic_subtract_acq_int atomic_subtract_acq_32
  260. #define atomic_add_rel_int atomic_add_rel_32
  261. #define atomic_clear_rel_int atomic_clear_rel_32
  262. #define atomic_cmpset_rel_int atomic_cmpset_rel_32
  263. #define atomic_fcmpset_rel_int atomic_fcmpset_rel_32
  264. #define atomic_set_rel_int atomic_set_rel_32
  265. #define atomic_subtract_rel_int atomic_subtract_rel_32
  266. #define atomic_store_rel_int atomic_store_rel_32
  267. static __inline void
  268. atomic_add_64(volatile uint64_t *p, uint64_t val)
  269. {
  270. __asm __volatile("amoadd.d zero, %1, %0"
  271. : "+A" (*p)
  272. : "r" (val)
  273. : "memory");
  274. }
  275. static __inline void
  276. atomic_subtract_64(volatile uint64_t *p, uint64_t val)
  277. {
  278. __asm __volatile("amoadd.d zero, %1, %0"
  279. : "+A" (*p)
  280. : "r" (-val)
  281. : "memory");
  282. }
  283. static __inline void
  284. atomic_set_64(volatile uint64_t *p, uint64_t val)
  285. {
  286. __asm __volatile("amoor.d zero, %1, %0"
  287. : "+A" (*p)
  288. : "r" (val)
  289. : "memory");
  290. }
  291. static __inline void
  292. atomic_clear_64(volatile uint64_t *p, uint64_t val)
  293. {
  294. __asm __volatile("amoand.d zero, %1, %0"
  295. : "+A" (*p)
  296. : "r" (~val)
  297. : "memory");
  298. }
  299. static __inline int
  300. atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
  301. {
  302. uint64_t tmp;
  303. int res;
  304. res = 0;
  305. __asm __volatile(
  306. "0:"
  307. "li %1, 1\n" /* Preset to fail */
  308. "lr.d %0, %2\n"
  309. "bne %0, %z3, 1f\n"
  310. "sc.d %1, %z4, %2\n"
  311. "bnez %1, 0b\n"
  312. "1:"
  313. : "=&r" (tmp), "=&r" (res), "+A" (*p)
  314. : "rJ" (cmpval), "rJ" (newval)
  315. : "memory");
  316. return (!res);
  317. }
  318. static __inline int
  319. atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
  320. {
  321. uint64_t tmp;
  322. int res;
  323. res = 0;
  324. __asm __volatile(
  325. "0:"
  326. "li %1, 1\n" /* Preset to fail */
  327. "lr.d %0, %2\n" /* Load old value */
  328. "bne %0, %z4, 1f\n" /* Compare */
  329. "sc.d %1, %z5, %2\n" /* Try to store new value */
  330. "j 2f\n"
  331. "1:"
  332. "sd %0, %3\n" /* Save old value */
  333. "2:"
  334. : "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
  335. : "rJ" (*cmpval), "rJ" (newval)
  336. : "memory");
  337. return (!res);
  338. }
  339. static __inline uint64_t
  340. atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
  341. {
  342. uint64_t ret;
  343. __asm __volatile("amoadd.d %0, %2, %1"
  344. : "=&r" (ret), "+A" (*p)
  345. : "r" (val)
  346. : "memory");
  347. return (ret);
  348. }
  349. static __inline uint64_t
  350. atomic_readandclear_64(volatile uint64_t *p)
  351. {
  352. uint64_t ret;
  353. uint64_t val;
  354. val = 0;
  355. __asm __volatile("amoswap.d %0, %2, %1"
  356. : "=&r"(ret), "+A" (*p)
  357. : "r" (val)
  358. : "memory");
  359. return (ret);
  360. }
  361. static __inline uint32_t
  362. atomic_swap_32(volatile uint32_t *p, uint32_t val)
  363. {
  364. uint32_t old;
  365. __asm __volatile("amoswap.w %0, %2, %1"
  366. : "=&r"(old), "+A" (*p)
  367. : "r" (val)
  368. : "memory");
  369. return (old);
  370. }
  371. static __inline uint64_t
  372. atomic_swap_64(volatile uint64_t *p, uint64_t val)
  373. {
  374. uint64_t old;
  375. __asm __volatile("amoswap.d %0, %2, %1"
  376. : "=&r"(old), "+A" (*p)
  377. : "r" (val)
  378. : "memory");
  379. return (old);
  380. }
  381. #define atomic_swap_int atomic_swap_32
  382. #define atomic_add_long atomic_add_64
  383. #define atomic_clear_long atomic_clear_64
  384. #define atomic_cmpset_long atomic_cmpset_64
  385. #define atomic_fcmpset_long atomic_fcmpset_64
  386. #define atomic_fetchadd_long atomic_fetchadd_64
  387. #define atomic_readandclear_long atomic_readandclear_64
  388. #define atomic_set_long atomic_set_64
  389. #define atomic_subtract_long atomic_subtract_64
  390. #define atomic_swap_long atomic_swap_64
  391. #define atomic_add_ptr atomic_add_64
  392. #define atomic_clear_ptr atomic_clear_64
  393. #define atomic_cmpset_ptr atomic_cmpset_64
  394. #define atomic_fcmpset_ptr atomic_fcmpset_64
  395. #define atomic_fetchadd_ptr atomic_fetchadd_64
  396. #define atomic_readandclear_ptr atomic_readandclear_64
  397. #define atomic_set_ptr atomic_set_64
  398. #define atomic_subtract_ptr atomic_subtract_64
  399. #define atomic_swap_ptr atomic_swap_64
  400. ATOMIC_ACQ_REL(set, 64)
  401. ATOMIC_ACQ_REL(clear, 64)
  402. ATOMIC_ACQ_REL(add, 64)
  403. ATOMIC_ACQ_REL(subtract, 64)
  404. ATOMIC_CMPSET_ACQ_REL(64);
  405. ATOMIC_FCMPSET_ACQ_REL(64);
  406. static __inline uint64_t
  407. atomic_load_acq_64(volatile uint64_t *p)
  408. {
  409. uint64_t ret;
  410. ret = *p;
  411. fence();
  412. return (ret);
  413. }
  414. static __inline void
  415. atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
  416. {
  417. fence();
  418. *p = val;
  419. }
  420. #define atomic_add_acq_long atomic_add_acq_64
  421. #define atomic_clear_acq_long atomic_clear_acq_64
  422. #define atomic_cmpset_acq_long atomic_cmpset_acq_64
  423. #define atomic_fcmpset_acq_long atomic_fcmpset_acq_64
  424. #define atomic_load_acq_long atomic_load_acq_64
  425. #define atomic_set_acq_long atomic_set_acq_64
  426. #define atomic_subtract_acq_long atomic_subtract_acq_64
  427. #define atomic_add_acq_ptr atomic_add_acq_64
  428. #define atomic_clear_acq_ptr atomic_clear_acq_64
  429. #define atomic_cmpset_acq_ptr atomic_cmpset_acq_64
  430. #define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_64
  431. #define atomic_load_acq_ptr atomic_load_acq_64
  432. #define atomic_set_acq_ptr atomic_set_acq_64
  433. #define atomic_subtract_acq_ptr atomic_subtract_acq_64
  434. #undef ATOMIC_ACQ_REL
  435. static __inline void
  436. atomic_thread_fence_acq(void)
  437. {
  438. fence();
  439. }
  440. static __inline void
  441. atomic_thread_fence_rel(void)
  442. {
  443. fence();
  444. }
  445. static __inline void
  446. atomic_thread_fence_acq_rel(void)
  447. {
  448. fence();
  449. }
  450. static __inline void
  451. atomic_thread_fence_seq_cst(void)
  452. {
  453. fence();
  454. }
  455. #define atomic_add_rel_long atomic_add_rel_64
  456. #define atomic_clear_rel_long atomic_clear_rel_64
  457. #define atomic_add_rel_long atomic_add_rel_64
  458. #define atomic_clear_rel_long atomic_clear_rel_64
  459. #define atomic_cmpset_rel_long atomic_cmpset_rel_64
  460. #define atomic_fcmpset_rel_long atomic_fcmpset_rel_64
  461. #define atomic_set_rel_long atomic_set_rel_64
  462. #define atomic_subtract_rel_long atomic_subtract_rel_64
  463. #define atomic_store_rel_long atomic_store_rel_64
  464. #define atomic_add_rel_ptr atomic_add_rel_64
  465. #define atomic_clear_rel_ptr atomic_clear_rel_64
  466. #define atomic_cmpset_rel_ptr atomic_cmpset_rel_64
  467. #define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_64
  468. #define atomic_set_rel_ptr atomic_set_rel_64
  469. #define atomic_subtract_rel_ptr atomic_subtract_rel_64
  470. #define atomic_store_rel_ptr atomic_store_rel_64
  471. #include <sys/_atomic_subword.h>
  472. #endif /* _MACHINE_ATOMIC_H_ */