25#if !defined(__HIPCC_RTC__)
26#include "amd_device_functions.h"
29#if __has_builtin(__hip_atomic_compare_exchange_strong)
31template<
bool B,
typename T,
typename F>
struct Cond_t;
33template<
typename T,
typename F>
struct Cond_t<true, T, F> {
using type = T; };
34template<
typename T,
typename F>
struct Cond_t<false, T, F> {
using type = F; };
36#if !__HIP_DEVICE_COMPILE__
38#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40#define __HIP_MEMORY_SCOPE_WORKGROUP 3
41#define __HIP_MEMORY_SCOPE_AGENT 4
42#define __HIP_MEMORY_SCOPE_SYSTEM 5
45#if !defined(__HIPCC_RTC__)
46#include "amd_hip_unsafe_atomics.h"
51 int mem_order = __ATOMIC_SEQ_CST,
52 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
57__attribute__((always_inline, device))
58T hip_cas_expander(T* p, T x, Op op, F f)
noexcept
60 using FP = __attribute__((address_space(0))) const
void*;
63 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
65 if (is_shared_workaround((FP)p))
68 using U = typename Cond_t<
69 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
71 auto q = reinterpret_cast<U*>(p);
73 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
78 op(
reinterpret_cast<T&
>(tmp1), x);
79 }
while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80 mem_order, mem_scope));
82 return reinterpret_cast<const T&
>(tmp0);
86 int mem_order = __ATOMIC_SEQ_CST,
87 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
92__attribute__((always_inline, device))
93T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f)
noexcept
95 using FP = __attribute__((address_space(0))) const
void*;
98 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
100 if (is_shared_workaround((FP)p))
103 using U = typename Cond_t<
104 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
106 auto q = reinterpret_cast<U*>(p);
108 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109 while (cmp(x,
reinterpret_cast<const T&
>(tmp)) &&
110 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
113 return reinterpret_cast<const T&
>(tmp);
118int atomicCAS(
int* address,
int compare,
int val) {
119 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120 __HIP_MEMORY_SCOPE_AGENT);
126int atomicCAS_system(
int* address,
int compare,
int val) {
127 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128 __HIP_MEMORY_SCOPE_SYSTEM);
134unsigned int atomicCAS(
unsigned int* address,
unsigned int compare,
unsigned int val) {
135 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136 __HIP_MEMORY_SCOPE_AGENT);
142unsigned int atomicCAS_system(
unsigned int* address,
unsigned int compare,
unsigned int val) {
143 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144 __HIP_MEMORY_SCOPE_SYSTEM);
150unsigned long atomicCAS(
unsigned long* address,
unsigned long compare,
unsigned long val) {
151 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152 __HIP_MEMORY_SCOPE_AGENT);
158unsigned long atomicCAS_system(
unsigned long* address,
unsigned long compare,
unsigned long val) {
159 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160 __HIP_MEMORY_SCOPE_SYSTEM);
166unsigned long long atomicCAS(
unsigned long long* address,
unsigned long long compare,
167 unsigned long long val) {
168 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169 __HIP_MEMORY_SCOPE_AGENT);
175unsigned long long atomicCAS_system(
unsigned long long* address,
unsigned long long compare,
176 unsigned long long val) {
177 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178 __HIP_MEMORY_SCOPE_SYSTEM);
184float atomicCAS(
float* address,
float compare,
float val) {
185 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186 __HIP_MEMORY_SCOPE_AGENT);
192float atomicCAS_system(
float* address,
float compare,
float val) {
193 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194 __HIP_MEMORY_SCOPE_SYSTEM);
200double atomicCAS(
double* address,
double compare,
double val) {
201 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202 __HIP_MEMORY_SCOPE_AGENT);
208double atomicCAS_system(
double* address,
double compare,
double val) {
209 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210 __HIP_MEMORY_SCOPE_SYSTEM);
216int atomicAdd(
int* address,
int val) {
217 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
222int atomicAdd_system(
int* address,
int val) {
223 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
228unsigned int atomicAdd(
unsigned int* address,
unsigned int val) {
229 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
234unsigned int atomicAdd_system(
unsigned int* address,
unsigned int val) {
235 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
240unsigned long atomicAdd(
unsigned long* address,
unsigned long val) {
241 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
246unsigned long atomicAdd_system(
unsigned long* address,
unsigned long val) {
247 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
252unsigned long long atomicAdd(
unsigned long long* address,
unsigned long long val) {
253 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
258unsigned long long atomicAdd_system(
unsigned long long* address,
unsigned long long val) {
259 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
264float atomicAdd(
float* address,
float val) {
265#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266 return unsafeAtomicAdd(address, val);
268 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
274float atomicAdd_system(
float* address,
float val) {
275 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
278#if !defined(__HIPCC_RTC__)
279DEPRECATED(
"use atomicAdd instead")
283void atomicAddNoRet(
float* address,
float val)
285 __ockl_atomic_add_noret_f32(address, val);
290double atomicAdd(
double* address,
double val) {
291#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292 return unsafeAtomicAdd(address, val);
294 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
300double atomicAdd_system(
double* address,
double val) {
301 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
306int atomicSub(
int* address,
int val) {
307 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
312int atomicSub_system(
int* address,
int val) {
313 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
318unsigned int atomicSub(
unsigned int* address,
unsigned int val) {
319 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
324unsigned int atomicSub_system(
unsigned int* address,
unsigned int val) {
325 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
330unsigned long atomicSub(
unsigned long* address,
unsigned long val) {
331 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
336unsigned long atomicSub_system(
unsigned long* address,
unsigned long val) {
337 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
342unsigned long long atomicSub(
unsigned long long* address,
unsigned long long val) {
343 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
348unsigned long long atomicSub_system(
unsigned long long* address,
unsigned long long val) {
349 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
354float atomicSub(
float* address,
float val) {
355#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356 return unsafeAtomicAdd(address, -val);
358 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
364float atomicSub_system(
float* address,
float val) {
365 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
370double atomicSub(
double* address,
double val) {
371#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372 return unsafeAtomicAdd(address, -val);
374 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
380double atomicSub_system(
double* address,
double val) {
381 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
386int atomicExch(
int* address,
int val) {
387 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
392int atomicExch_system(
int* address,
int val) {
393 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
398unsigned int atomicExch(
unsigned int* address,
unsigned int val) {
399 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
404unsigned int atomicExch_system(
unsigned int* address,
unsigned int val) {
405 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
410unsigned long atomicExch(
unsigned long* address,
unsigned long val) {
411 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
416unsigned long atomicExch_system(
unsigned long* address,
unsigned long val) {
417 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
422unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val) {
423 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
428unsigned long long atomicExch_system(
unsigned long long* address,
unsigned long long val) {
429 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
434float atomicExch(
float* address,
float val) {
435 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
440float atomicExch_system(
float* address,
float val) {
441 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
446double atomicExch(
double* address,
double val) {
447 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
452double atomicExch_system(
double* address,
double val) {
453 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
458int atomicMin(
int* address,
int val) {
459#if defined(__gfx941__)
460 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
462 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463 __HIP_MEMORY_SCOPE_AGENT);
466 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
472int atomicMin_system(
int* address,
int val) {
473#if defined(__gfx941__)
474 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
476 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477 __HIP_MEMORY_SCOPE_SYSTEM);
480 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
486unsigned int atomicMin(
unsigned int* address,
unsigned int val) {
487#if defined(__gfx941__)
488 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
490 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491 __HIP_MEMORY_SCOPE_AGENT);
494 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
501unsigned int atomicMin_system(
unsigned int* address,
unsigned int val) {
502#if defined(__gfx941__)
503 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
505 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506 __HIP_MEMORY_SCOPE_SYSTEM);
509 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
515unsigned long long atomicMin(
unsigned long* address,
unsigned long val) {
516#if defined(__gfx941__)
517 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
520 [](
unsigned long x,
unsigned long y) {
return x < y; },
522 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523 __HIP_MEMORY_SCOPE_AGENT);
526 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
532unsigned long atomicMin_system(
unsigned long* address,
unsigned long val) {
533#if defined(__gfx941__)
534 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
537 [](
unsigned long x,
unsigned long y) {
return x < y; },
539 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540 __HIP_MEMORY_SCOPE_SYSTEM);
543 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
549unsigned long long atomicMin(
unsigned long long* address,
unsigned long long val) {
550#if defined(__gfx941__)
551 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
554 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
556 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557 __HIP_MEMORY_SCOPE_AGENT);
560 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
566unsigned long long atomicMin_system(
unsigned long long* address,
unsigned long long val) {
567#if defined(__gfx941__)
568 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
571 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
573 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574 __HIP_MEMORY_SCOPE_SYSTEM);
577 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
583long long atomicMin(
long long* address,
long long val) {
584#if defined(__gfx941__)
585 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586 address, val, [](
long long x,
long long y) {
return x < y; },
588 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
591 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
597long long atomicMin_system(
long long* address,
long long val) {
598#if defined(__gfx941__)
599 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600 address, val, [](
long long x,
long long y) {
return x < y; },
602 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
605 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
611float atomicMin(
float* addr,
float val) {
612#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613 return unsafeAtomicMin(addr, val);
615 #if __has_builtin(__hip_atomic_load) && \
616 __has_builtin(__hip_atomic_compare_exchange_strong)
617 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
619 while (!done && value > val) {
620 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
621 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
625 unsigned int *uaddr = (
unsigned int *)addr;
626 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
628 while (!done && __uint_as_float(value) > val) {
629 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
630 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
632 return __uint_as_float(value);
639float atomicMin_system(
float* address,
float val) {
640 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
641 #if __has_builtin(__hip_atomic_load)
642 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
644 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
646 float value = __uint_as_float(tmp);
648 while (val < value) {
649 value = atomicCAS_system(address, value, val);
657double atomicMin(
double* addr,
double val) {
658#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
659 return unsafeAtomicMin(addr, val);
661 #if __has_builtin(__hip_atomic_load) && \
662 __has_builtin(__hip_atomic_compare_exchange_strong)
663 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
665 while (!done && value > val) {
666 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
667 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
671 unsigned long long *uaddr = (
unsigned long long *)addr;
672 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
674 while (!done && __longlong_as_double(value) > val) {
675 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
676 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
678 return __longlong_as_double(value);
685double atomicMin_system(
double* address,
double val) {
686 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
687 #if __has_builtin(__hip_atomic_load)
688 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
690 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
692 double value = __longlong_as_double(tmp);
694 while (val < value) {
695 value = atomicCAS_system(address, value, val);
703int atomicMax(
int* address,
int val) {
704#if defined(__gfx941__)
705 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
706 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
707 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
708 __HIP_MEMORY_SCOPE_AGENT);
711 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
717int atomicMax_system(
int* address,
int val) {
718#if defined(__gfx941__)
719 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
720 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
721 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
722 __HIP_MEMORY_SCOPE_SYSTEM);
725 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
731unsigned int atomicMax(
unsigned int* address,
unsigned int val) {
732#if defined(__gfx941__)
733 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
734 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
735 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
736 __HIP_MEMORY_SCOPE_AGENT);
739 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
745unsigned int atomicMax_system(
unsigned int* address,
unsigned int val) {
746#if defined(__gfx941__)
747 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
748 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
749 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
750 __HIP_MEMORY_SCOPE_SYSTEM);
753 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
759unsigned long atomicMax(
unsigned long* address,
unsigned long val) {
760#if defined(__gfx941__)
761 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
764 [](
unsigned long x,
unsigned long y) {
return y < x; },
766 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
767 __HIP_MEMORY_SCOPE_AGENT);
770 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
776unsigned long atomicMax_system(
unsigned long* address,
unsigned long val) {
777#if defined(__gfx941__)
778 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
781 [](
unsigned long x,
unsigned long y) {
return y < x; },
783 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
784 __HIP_MEMORY_SCOPE_SYSTEM);
787 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
793unsigned long long atomicMax(
unsigned long long* address,
unsigned long long val) {
794#if defined(__gfx941__)
795 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
798 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
800 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
801 __HIP_MEMORY_SCOPE_AGENT);
804 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
810unsigned long long atomicMax_system(
unsigned long long* address,
unsigned long long val) {
811#if defined(__gfx941__)
812 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
815 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
817 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
818 __HIP_MEMORY_SCOPE_SYSTEM);
821 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
827long long atomicMax(
long long* address,
long long val) {
828 #if defined(__gfx941__)
829 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
830 address, val, [](
long long x,
long long y) {
return y < x; },
832 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
835 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
841long long atomicMax_system(
long long* address,
long long val) {
842#if defined(__gfx941__)
843 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
844 address, val, [](
long long x,
long long y) {
return y < x; },
846 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
849 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
855float atomicMax(
float* addr,
float val) {
856#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
857 return unsafeAtomicMax(addr, val);
859 #if __has_builtin(__hip_atomic_load) && \
860 __has_builtin(__hip_atomic_compare_exchange_strong)
861 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
863 while (!done && value < val) {
864 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
865 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
869 unsigned int *uaddr = (
unsigned int *)addr;
870 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
872 while (!done && __uint_as_float(value) < val) {
873 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
874 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
876 return __uint_as_float(value);
883float atomicMax_system(
float* address,
float val) {
884 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
885 #if __has_builtin(__hip_atomic_load)
886 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
888 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
890 float value = __uint_as_float(tmp);
892 while (value < val) {
893 value = atomicCAS_system(address, value, val);
901double atomicMax(
double* addr,
double val) {
902#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
903 return unsafeAtomicMax(addr, val);
905 #if __has_builtin(__hip_atomic_load) && \
906 __has_builtin(__hip_atomic_compare_exchange_strong)
907 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
909 while (!done && value < val) {
910 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
911 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
915 unsigned long long *uaddr = (
unsigned long long *)addr;
916 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
918 while (!done && __longlong_as_double(value) < val) {
919 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
920 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
922 return __longlong_as_double(value);
929double atomicMax_system(
double* address,
double val) {
930 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
931 #if __has_builtin(__hip_atomic_load)
932 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
934 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
936 double value = __longlong_as_double(tmp);
938 while (value < val) {
939 value = atomicCAS_system(address, value, val);
947unsigned int atomicInc(
unsigned int* address,
unsigned int val)
949#if defined(__gfx941__)
952 unsigned int __builtin_amdgcn_atomic_inc(
957 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
959 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
962 [](
unsigned int& x,
unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
965 __builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1,
false);
968 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
975unsigned int atomicDec(
unsigned int* address,
unsigned int val)
977#if defined(__gfx941__)
980 unsigned int __builtin_amdgcn_atomic_dec(
985 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
987 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
990 [](
unsigned int& x,
unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
993 __builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1,
false);
996 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1003int atomicAnd(
int* address,
int val) {
1004#if defined(__gfx941__)
1005 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1006 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1007 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1008 __HIP_MEMORY_SCOPE_AGENT);
1011 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1017int atomicAnd_system(
int* address,
int val) {
1018#if defined(__gfx941__)
1019 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1020 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1021 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1022 __HIP_MEMORY_SCOPE_SYSTEM);
1025 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1031unsigned int atomicAnd(
unsigned int* address,
unsigned int val) {
1032#if defined(__gfx941__)
1033 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1034 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1035 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1036 __HIP_MEMORY_SCOPE_AGENT);
1039 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1045unsigned int atomicAnd_system(
unsigned int* address,
unsigned int val) {
1046#if defined(__gfx941__)
1047 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1048 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1049 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1050 __HIP_MEMORY_SCOPE_SYSTEM);
1053 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1059unsigned long atomicAnd(
unsigned long* address,
unsigned long val) {
1060#if defined(__gfx941__)
1061 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1062 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1063 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1064 __HIP_MEMORY_SCOPE_AGENT);
1067 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1073unsigned long atomicAnd_system(
unsigned long* address,
unsigned long val) {
1074#if defined(__gfx941__)
1075 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1076 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1077 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1078 __HIP_MEMORY_SCOPE_SYSTEM);
1081 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1087unsigned long long atomicAnd(
unsigned long long* address,
unsigned long long val) {
1088#if defined(__gfx941__)
1089 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1092 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1094 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1095 __HIP_MEMORY_SCOPE_AGENT);
1098 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1104unsigned long long atomicAnd_system(
unsigned long long* address,
unsigned long long val) {
1105#if defined(__gfx941__)
1106 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1109 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1111 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1112 __HIP_MEMORY_SCOPE_SYSTEM);
1115 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1121int atomicOr(
int* address,
int val) {
1122#if defined(__gfx941__)
1123 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1124 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1125 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1126 __HIP_MEMORY_SCOPE_AGENT);
1129 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1135int atomicOr_system(
int* address,
int val) {
1136#if defined(__gfx941__)
1137 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1138 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1139 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1140 __HIP_MEMORY_SCOPE_SYSTEM);
1143 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1149unsigned int atomicOr(
unsigned int* address,
unsigned int val) {
1150#if defined(__gfx941__)
1151 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1152 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1153 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1154 __HIP_MEMORY_SCOPE_AGENT);
1157 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1163unsigned int atomicOr_system(
unsigned int* address,
unsigned int val) {
1164#if defined(__gfx941__)
1165 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1166 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1167 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1168 __HIP_MEMORY_SCOPE_SYSTEM);
1171 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1177unsigned long atomicOr(
unsigned long* address,
unsigned long val) {
1178#if defined(__gfx941__)
1179 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1180 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1181 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1182 __HIP_MEMORY_SCOPE_AGENT);
1185 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1191unsigned long atomicOr_system(
unsigned long* address,
unsigned long val) {
1192#if defined(__gfx941__)
1193 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1194 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1195 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1196 __HIP_MEMORY_SCOPE_SYSTEM);
1199 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1205unsigned long long atomicOr(
unsigned long long* address,
unsigned long long val) {
1206#if defined(__gfx941__)
1207 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1210 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1212 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1213 __HIP_MEMORY_SCOPE_AGENT);
1216 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1222unsigned long long atomicOr_system(
unsigned long long* address,
unsigned long long val) {
1223#if defined(__gfx941__)
1224 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1227 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1229 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1230 __HIP_MEMORY_SCOPE_SYSTEM);
1233 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1239int atomicXor(
int* address,
int val) {
1240#if defined(__gfx941__)
1241 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1242 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1243 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1244 __HIP_MEMORY_SCOPE_AGENT);
1247 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1253int atomicXor_system(
int* address,
int val) {
1254#if defined(__gfx941__)
1255 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1256 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1257 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1258 __HIP_MEMORY_SCOPE_SYSTEM);
1261 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1267unsigned int atomicXor(
unsigned int* address,
unsigned int val) {
1268#if defined(__gfx941__)
1269 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1270 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1271 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1272 __HIP_MEMORY_SCOPE_AGENT);
1275 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1281unsigned int atomicXor_system(
unsigned int* address,
unsigned int val) {
1282#if defined(__gfx941__)
1283 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1284 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1285 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1286 __HIP_MEMORY_SCOPE_SYSTEM);
1289 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1295unsigned long atomicXor(
unsigned long* address,
unsigned long val) {
1296#if defined(__gfx941__)
1297 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1298 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1299 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1300 __HIP_MEMORY_SCOPE_AGENT);
1303 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1309unsigned long atomicXor_system(
unsigned long* address,
unsigned long val) {
1310#if defined(__gfx941__)
1311 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1312 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1313 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1314 __HIP_MEMORY_SCOPE_SYSTEM);
1317 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1323unsigned long long atomicXor(
unsigned long long* address,
unsigned long long val) {
1324#if defined(__gfx941__)
1325 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1328 [](
unsigned long long& x,
unsigned long long y) { x ^= y; },
1330 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1331 __HIP_MEMORY_SCOPE_AGENT);
1334 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1340unsigned long long atomicXor_system(
unsigned long long* address,
unsigned long long val) {
1341 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1348int atomicCAS(
int* address,
int compare,
int val)
1350 __atomic_compare_exchange_n(
1351 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1357unsigned int atomicCAS(
1358 unsigned int* address,
unsigned int compare,
unsigned int val)
1360 __atomic_compare_exchange_n(
1361 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1367unsigned long long atomicCAS(
1368 unsigned long long* address,
1369 unsigned long long compare,
1370 unsigned long long val)
1372 __atomic_compare_exchange_n(
1373 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1380int atomicAdd(
int* address,
int val)
1382 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1386unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
1388 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1392unsigned long long atomicAdd(
1393 unsigned long long* address,
unsigned long long val)
1395 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1399float atomicAdd(
float* address,
float val)
1401#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1402 return unsafeAtomicAdd(address, val);
1404 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1408#if !defined(__HIPCC_RTC__)
1409DEPRECATED(
"use atomicAdd instead")
1413void atomicAddNoRet(
float* address,
float val)
1415 __ockl_atomic_add_noret_f32(address, val);
1420double atomicAdd(
double* address,
double val)
1422#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1423 return unsafeAtomicAdd(address, val);
1425 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1431int atomicSub(
int* address,
int val)
1433 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1437unsigned int atomicSub(
unsigned int* address,
unsigned int val)
1439 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1444int atomicExch(
int* address,
int val)
1446 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1450unsigned int atomicExch(
unsigned int* address,
unsigned int val)
1452 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1456unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
1458 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1462float atomicExch(
float* address,
float val)
1464 return __uint_as_float(__atomic_exchange_n(
1465 reinterpret_cast<unsigned int*
>(address),
1466 __float_as_uint(val),
1472int atomicMin(
int* address,
int val)
1474 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1478unsigned int atomicMin(
unsigned int* address,
unsigned int val)
1480 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1484unsigned long long atomicMin(
1485 unsigned long long* address,
unsigned long long val)
1487 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1489 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1491 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1493 tmp = atomicCAS(address, tmp, val);
1498__device__
inline long long atomicMin(
long long* address,
long long val) {
1499 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1501 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1508 tmp = atomicCAS(address, tmp, val);
1515int atomicMax(
int* address,
int val)
1517 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1521unsigned int atomicMax(
unsigned int* address,
unsigned int val)
1523 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1527unsigned long long atomicMax(
1528 unsigned long long* address,
unsigned long long val)
1530 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1532 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1534 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1536 tmp = atomicCAS(address, tmp, val);
1541__device__
inline long long atomicMax(
long long* address,
long long val) {
1542 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1544 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1551 tmp = atomicCAS(address, tmp, val);
1558unsigned int atomicInc(
unsigned int* address,
unsigned int val)
1560 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
1565unsigned int atomicDec(
unsigned int* address,
unsigned int val)
1567 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1572int atomicAnd(
int* address,
int val)
1574 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1578unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
1580 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1584unsigned long long atomicAnd(
1585 unsigned long long* address,
unsigned long long val)
1587 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1592int atomicOr(
int* address,
int val)
1594 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1598unsigned int atomicOr(
unsigned int* address,
unsigned int val)
1600 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1604unsigned long long atomicOr(
1605 unsigned long long* address,
unsigned long long val)
1607 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1612int atomicXor(
int* address,
int val)
1614 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1618unsigned int atomicXor(
unsigned int* address,
unsigned int val)
1620 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1624unsigned long long atomicXor(
1625 unsigned long long* address,
unsigned long long val)
1627 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);