1/*========================== begin_copyright_notice ============================
2
3Copyright (C) 2017-2021 Intel Corporation
4
5SPDX-License-Identifier: MIT
6
7============================= end_copyright_notice ===========================*/
8
9// Atomic Instructions
10
11#include "../Headers/spirv.h"
12
13#define ATOMIC_FLAG_TRUE 1
14#define ATOMIC_FLAG_FALSE 0
15
16#define SEMANTICS_PRE_OP_NEED_FENCE ( Release | AcquireRelease | SequentiallyConsistent)
17
18#define SEMANTICS_POST_OP_NEEDS_FENCE ( Acquire | AcquireRelease | SequentiallyConsistent)
19
20extern __constant int __UseNativeFP32GlobalAtomicAdd;
21extern __constant int __UseNativeFP16AtomicMinMax;
22
23
24  __local int* __builtin_IB_get_local_lock();
25  __global int* __builtin_IB_get_global_lock();
26  void __builtin_IB_eu_thread_pause(uint value);
27  void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1);
28
29#define LOCAL_SPINLOCK_START() \
30  { \
31  volatile bool done = false; \
32  while(!done) { \
33       __builtin_IB_eu_thread_pause(32); \
34       if(SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )(__builtin_IB_get_local_lock(), Device, Relaxed, Relaxed, 1, 0) == 0) {
35
36#define LOCAL_SPINLOCK_END() \
37            done = true; \
38            SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )(__builtin_IB_get_local_lock(), Device, SequentiallyConsistent | WorkgroupMemory, 0); \
39  }}}
40
41#define GLOBAL_SPINLOCK_START() \
42  { \
43  volatile bool done = false; \
44  while(!done) { \
45       __builtin_IB_eu_thread_pause(32); \
46       if(SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )(__builtin_IB_get_global_lock(), Device, Relaxed, Relaxed, 1, 0) == 0) {
47
48#define GLOBAL_SPINLOCK_END() \
49            done = true; \
50            SPIRV_BUILTIN(AtomicStore, _p1i32_i32_i32_i32, )(__builtin_IB_get_global_lock(), Device, SequentiallyConsistent | CrossWorkgroupMemory, 0); \
51  }}}
52
53#define FENCE_PRE_OP(Scope, Semantics, isGlobal)                                      \
54  if( ( (Semantics) & ( SEMANTICS_PRE_OP_NEED_FENCE ) ) > 0 )                         \
55  {                                                                                   \
56      bool flushL3 = (isGlobal) && ((Scope) == Device || (Scope) == CrossDevice);     \
57      __intel_memfence_handler(flushL3, isGlobal, false);                             \
58  }
59
60#define FENCE_POST_OP(Scope, Semantics, isGlobal)                                     \
61  if( ( (Semantics) & ( SEMANTICS_POST_OP_NEEDS_FENCE ) ) > 0 )                       \
62  {                                                                                   \
63      bool flushL3 = (isGlobal) && ((Scope) == Device || (Scope) == CrossDevice);     \
64      __intel_memfence_handler(flushL3, isGlobal, false);                             \
65  }
66
67// This fencing scheme allows us to obey the memory model when coherency is
68// enabled or disabled.  Because the L3$ has 2 pipelines (cohereny&atomics and
69// non-coherant) the fences guarentee the memory model is followed when coherency
70// is disabled.
71//
72// When coherency is enabled, though, all HDC traffic uses the same L3$ pipe so
73// these fences would not be needed.  The compiler is agnostic to coherency
74// being enabled or disbled so we asume the worst case.
75
76
77#define atomic_operation_1op( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )   \
78{                                                                                             \
79    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                              \
80    TYPE result = INTRINSIC( (Pointer), (Value) );                                            \
81    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                             \
82    return result;                                                                            \
83}
84
85#define atomic_operation_1op_as_float( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )\
86{                                                                                             \
87    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                              \
88    TYPE result = as_float(INTRINSIC( (Pointer), (Value) ));                                  \
89    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                             \
90    return result;                                                                            \
91}
92
93#define atomic_operation_1op_as_double( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )\
94{                                                                                             \
95    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                              \
96    TYPE result = as_double(INTRINSIC( (Pointer), (Value) ));                                  \
97    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                             \
98    return result;                                                                            \
99}
100
101#define atomic_operation_1op_as_half( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )\
102{                                                                                             \
103    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                              \
104    TYPE result = as_half(INTRINSIC( (Pointer), (Value) ));                                  \
105    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                             \
106    return result;                                                                            \
107}
108
109#define atomic_operation_0op( INTRINSIC, TYPE, Pointer, Scope, Semantics, isGlobal )          \
110{                                                                                             \
111    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                              \
112    TYPE result = INTRINSIC( (Pointer) );                                                     \
113    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                             \
114    return result;                                                                            \
115}
116
117#define atomic_cmpxhg( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, Comp, isGlobal )\
118{                                                                                         \
119    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                          \
120    TYPE result = INTRINSIC( (Pointer), (Comp), (Value) );                                \
121    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                         \
122    return result;                                                                        \
123}
124
125#define atomic_cmpxhg_as_float( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, Comp, isGlobal )\
126{                                                                                         \
127    FENCE_PRE_OP((Scope), (Semantics), isGlobal)                                          \
128    TYPE result = as_float(INTRINSIC( (Pointer), (Comp), (Value) ));                      \
129    FENCE_POST_OP((Scope), (Semantics), isGlobal)                                         \
130    return result;                                                                        \
131}
132
133
134// Atomic loads/stores must be implemented with an atomic operation - While our HDC has an in-order
135// pipeline the L3$ has 2 pipelines - coherant and non-coherant.  Even when coherency is disabled atomics
136// will still go down the coherant pipeline.  The 2 L3$ pipes do not guarentee order of operations between
137// themselves.
138
139// Since we dont have specialized atomic load/store HDC message we're using atomic_or( a, 0x0 ) to emulate
140// an atomic load since it does not modify the in memory value and returns the 'old' value. atomic store
141// can be implemented with an atomic_exchance with the return value ignored.
142
143int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics )
144{
145    return *Pointer;
146}
147
148int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics )
149{
150    return SPIRV_BUILTIN(AtomicOr, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, 0 );
151}
152
153int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics )
154{
155    return SPIRV_BUILTIN(AtomicOr, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, 0 );
156}
157
158#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
159
160int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics )
161{
162    return SPIRV_BUILTIN(AtomicOr, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, 0 );
163}
164
165#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
166
167#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
168
169long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0i64_i32_i32, )( __private long *Pointer, int Scope, int Semantics )
170{
171    return *Pointer;
172}
173
174long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )( __global long *Pointer, int Scope, int Semantics )
175{
176    return SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( Pointer, Scope, Semantics, 0 );
177}
178
179long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3i64_i32_i32, )( __local long *Pointer, int Scope, int Semantics )
180{
181    return SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( Pointer, Scope, Semantics, 0 );
182}
183
184#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
185
186long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4i64_i32_i32, )( __generic long *Pointer, int Scope, int Semantics )
187{
188    return SPIRV_BUILTIN(AtomicOr, _p4i64_i32_i32_i64, )( Pointer, Scope, Semantics, 0 );
189}
190
191#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
192
193#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
194
195
196float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0f32_i32_i32, )( __private float *Pointer, int Scope, int Semantics )
197{
198    return *Pointer;
199}
200
201
202float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1f32_i32_i32, )( __global float *Pointer, int Scope, int Semantics )
203{
204    return as_float( SPIRV_BUILTIN(AtomicOr, _p1i32_i32_i32_i32, )( (__global int*)Pointer, Scope, Semantics, 0 ) );
205}
206
207float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3f32_i32_i32, )( __local float *Pointer, int Scope, int Semantics )
208{
209    return as_float( SPIRV_BUILTIN(AtomicOr, _p3i32_i32_i32_i32, )( (__local int*)Pointer, Scope, Semantics, 0 ) );
210}
211
212#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
213
214float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4f32_i32_i32, )( __generic float *Pointer, int Scope, int Semantics )
215{
216    return as_float( SPIRV_BUILTIN(AtomicOr, _p4i32_i32_i32_i32, )( (volatile __generic int*)Pointer, Scope, Semantics, 0 ) );
217}
218
219#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
220
221#if defined(cl_khr_fp64)
222#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
223double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0f64_i32_i32, )( __private double *Pointer, int Scope, int Semantics )
224{
225    return *Pointer;
226}
227
228
229double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1f64_i32_i32, )( __global double *Pointer, int Scope, int Semantics )
230{
231    return as_double( SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, 0 ) );
232}
233
234double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3f64_i32_i32, )( __local double *Pointer, int Scope, int Semantics )
235{
236    return as_double( SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, 0 ) );
237}
238
239#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
240
241double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4f64_i32_i32, )( __generic double *Pointer, int Scope, int Semantics )
242{
243    return as_double( SPIRV_BUILTIN(AtomicOr, _p4i64_i32_i32_i64, )( (__generic long*)Pointer, Scope, Semantics, 0 ) );
244}
245
246#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
247#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
248#endif // defined(cl_khr_fp64)
249
250
251// Atomic Stores
252
253
254void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
255{
256    *Pointer = Value;
257}
258
259
260void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
261{
262    SPIRV_BUILTIN(AtomicExchange, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, Value );
263}
264
265
266void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
267{
268    SPIRV_BUILTIN(AtomicExchange, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, Value );
269}
270
271#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
272
273void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
274{
275    SPIRV_BUILTIN(AtomicExchange, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, Value );
276}
277
278#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
279
280
281#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
282
283void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
284{
285    *Pointer = Value;
286}
287
288
289void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
290{
291    SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )( Pointer, Scope, Semantics, Value );
292}
293
294
295void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
296{
297    SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )( Pointer, Scope, Semantics, Value );
298}
299
300#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
301
302void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
303{
304    SPIRV_BUILTIN(AtomicExchange, _p4i64_i32_i32_i64, )( Pointer, Scope, Semantics, Value );
305}
306
307#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
308
309#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
310
311
312void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0f32_i32_i32_f32, )( __private float *Pointer, int Scope, int Semantics, float Value )
313{
314    SPIRV_BUILTIN(AtomicExchange, _p0f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value );
315}
316
317
318void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1f32_i32_i32_f32, )( __global float *Pointer, int Scope, int Semantics, float Value )
319{
320    SPIRV_BUILTIN(AtomicExchange, _p1f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value );
321}
322
323
324void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3f32_i32_i32_f32, )( __local float *Pointer, int Scope, int Semantics, float Value )
325{
326    SPIRV_BUILTIN(AtomicExchange, _p3f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value );
327}
328
329#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
330
331void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4f32_i32_i32_f32, )( __generic float *Pointer, int Scope, int Semantics, float Value )
332{
333    SPIRV_BUILTIN(AtomicExchange, _p4f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value );
334}
335
336#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
337
338#if defined(cl_khr_fp64)
339#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
340
341void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0f64_i32_i32_f64, )( __private double *Pointer, int Scope, int Semantics, double Value )
342{
343    SPIRV_BUILTIN(AtomicExchange, _p0f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value );
344}
345
346
347void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1f64_i32_i32_f64, )( __global double *Pointer, int Scope, int Semantics, double Value )
348{
349    SPIRV_BUILTIN(AtomicExchange, _p1f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value );
350}
351
352
353void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3f64_i32_i32_f64, )( __local double *Pointer, int Scope, int Semantics, double Value )
354{
355    SPIRV_BUILTIN(AtomicExchange, _p3f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value );
356}
357
358#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
359
360void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4f64_i32_i32_f64, )( __generic double *Pointer, int Scope, int Semantics, double Value )
361{
362    SPIRV_BUILTIN(AtomicExchange, _p4f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value );
363}
364
365#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
366
367#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics)
368#endif // defined(cl_khr_fp64)
369
370
371// Atomic Exchange
372
373
374int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
375{
376    uint orig = *Pointer;
377    *Pointer = Value;
378    return orig;
379}
380
381
382int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
383{
384    atomic_operation_1op( __builtin_IB_atomic_xchg_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
385}
386
387
388int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
389{
390    atomic_operation_1op( __builtin_IB_atomic_xchg_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
391}
392
393#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
394
395int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
396{
397    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
398    {
399        atomic_operation_1op( __builtin_IB_atomic_xchg_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
400    }
401    else
402    {
403        atomic_operation_1op( __builtin_IB_atomic_xchg_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
404    }
405
406}
407
408#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
409
410#if defined(cl_khr_int64_base_atomics)
411long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
412{
413    ulong orig = *Pointer;
414    *Pointer = Value;
415    return orig;
416}
417
418
419long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
420{
421    atomic_operation_1op( __builtin_IB_atomic_xchg_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true );
422}
423
424enum IntAtomicOp
425{
426    ATOMIC_IADD64,
427    ATOMIC_SUB64,
428    ATOMIC_XCHG64,
429    ATOMIC_AND64,
430    ATOMIC_OR64,
431    ATOMIC_XOR64,
432    ATOMIC_IMIN64,
433    ATOMIC_IMAX64,
434    ATOMIC_UMAX64,
435    ATOMIC_UMIN64
436};
437
438// handle int64 SLM atomic add/sub/xchg/and/or/xor/umax/umin
439ulong OVERLOADABLE __intel_atomic_binary( enum IntAtomicOp atomicOp, volatile __local ulong *Pointer,
440    uint Scope, uint Semantics, ulong Value )
441{
442
443    ulong orig;
444    FENCE_PRE_OP(Scope, Semantics, false)
445    LOCAL_SPINLOCK_START();
446    orig = *Pointer;
447    switch (atomicOp)
448    {
449        case ATOMIC_UMIN64: *Pointer = ( orig < Value ) ? orig : Value; break;
450        case ATOMIC_UMAX64: *Pointer = ( orig > Value ) ? orig : Value; break;
451        default: break; // What should we do here? OCL doesn't have assert
452    }
453    LOCAL_SPINLOCK_END();
454    FENCE_POST_OP(Scope, Semantics, false)
455    return orig;
456}
457
458// handle int64 SLM atomic IMin and IMax
459long OVERLOADABLE __intel_atomic_binary( enum IntAtomicOp atomicOp, volatile __local long *Pointer,
460    uint Scope, uint Semantics, long Value )
461{
462
463    long orig;
464    FENCE_PRE_OP(Scope, Semantics, false)
465    LOCAL_SPINLOCK_START()
466    orig = *Pointer;
467    switch (atomicOp)
468    {
469        case ATOMIC_IADD64: *Pointer += Value; break;
470        case ATOMIC_SUB64:  *Pointer -= Value; break;
471        case ATOMIC_AND64:  *Pointer &= Value; break;
472        case ATOMIC_OR64:   *Pointer |= Value; break;
473        case ATOMIC_XOR64:  *Pointer ^= Value; break;
474        case ATOMIC_XCHG64: *Pointer = Value; break;
475        case ATOMIC_IMIN64: *Pointer = ( orig < Value ) ? orig : Value; break;
476        case ATOMIC_IMAX64: *Pointer = ( orig > Value ) ? orig : Value; break;
477        default: break; // What should we do here? OCL doesn't have assert
478    }
479    LOCAL_SPINLOCK_END()
480    FENCE_POST_OP(Scope, Semantics, false)
481    return orig;
482}
483
484// handle uint64 SLM atomic inc/dec
485ulong OVERLOADABLE __intel_atomic_unary( bool isInc, volatile __local ulong *Pointer, uint Scope, uint Semantics )
486{
487
488    ulong orig;
489    FENCE_PRE_OP(Scope, Semantics, false)
490    LOCAL_SPINLOCK_START()
491    orig = *Pointer;
492    *Pointer = isInc ? orig + 1 : orig - 1;
493    LOCAL_SPINLOCK_END()
494    FENCE_POST_OP(Scope, Semantics, false)
495    return orig;
496}
497
498long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
499{
500    return __intel_atomic_binary(ATOMIC_XCHG64, Pointer, Scope, Semantics, Value);
501}
502
503
504#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
505
506long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
507{
508    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
509    {
510        return SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )((__local long*)Pointer, Scope, Semantics, Value);
511    }
512    else
513    {
514        return SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )((__global long*)Pointer, Scope, Semantics, Value);
515    }
516}
517
518#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
519
520#endif // defined(cl_khr_int64_base_atomics)
521
522float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0f32_i32_i32_f32, )( __private float *Pointer, int Scope, int Semantics, float Value)
523{
524    float orig = *Pointer;
525
526    *Pointer = Value;
527
528    return orig;
529}
530
531float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1f32_i32_i32_f32, )( __global float *Pointer, int Scope, int Semantics, float Value)
532{
533    atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_global_i32, float, (global int*)Pointer, Scope, Semantics, as_int(Value), true );
534}
535
536
537float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3f32_i32_i32_f32, )( __local float *Pointer, int Scope, int Semantics, float Value)
538{
539    atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_local_i32, float, (local int*)Pointer, Scope, Semantics, as_int(Value), false );
540}
541
542#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
543
544float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4f32_i32_i32_f32, )( __generic float *Pointer, int Scope, int Semantics, float Value)
545{
546    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
547    {
548        atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_local_i32, float, (local int*)Pointer, Scope, Semantics, as_int(Value), false );
549    }
550    else
551    {
552        atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_global_i32, float, (global int*)Pointer, Scope, Semantics, as_int(Value), true );
553    }
554}
555
556#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
557
558#if defined(cl_khr_fp64)
559#if defined(cl_khr_int64_base_atomics)
560
561double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0f64_i32_i32_f64, )( __private double *Pointer, int Scope, int Semantics, double Value)
562{
563    return as_double(SPIRV_BUILTIN(AtomicExchange, _p0i64_i32_i32_i64, )((__private long*) Pointer, Scope, Semantics, as_long(Value)));
564}
565
566double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1f64_i32_i32_f64, )( __global double *Pointer, int Scope, int Semantics, double Value)
567{
568    return as_double(SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )((__global long*) Pointer, Scope, Semantics, as_long(Value)));
569}
570
571
572double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3f64_i32_i32_f64, )( __local double *Pointer, int Scope, int Semantics, double Value)
573{
574    return as_double(SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )((__local long*) Pointer, Scope, Semantics, as_long(Value)));
575}
576
577#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
578
579double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4f64_i32_i32_f64, )( __generic double *Pointer, int Scope, int Semantics, double Value)
580{
581    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
582    {
583        return SPIRV_BUILTIN(AtomicExchange, _p3f64_i32_i32_f64, )((__local double*) Pointer, Scope, Semantics, Value);
584    }
585    else
586    {
587        return SPIRV_BUILTIN(AtomicExchange, _p1f64_i32_i32_f64, )((__global double*) Pointer, Scope, Semantics, Value);
588    }
589}
590
591#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
592
593#endif // defined(cl_khr_int64_base_atomics)
594#endif // defined(cl_khr_fp64)
595
596
597// Atomic Compare Exchange
598
599
600int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p0i32_i32_i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
601{
602    uint orig = *Pointer;
603    if( orig == Comparator )
604    {
605        *Pointer = Value;
606    }
607    return orig;
608}
609
610
611int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
612{
613    atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_global_i32, uint, (global int*)Pointer, Scope, Equal, Value, Comparator, true );
614}
615
616
617int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
618{
619    atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_local_i32, uint, (local int*)Pointer, Scope, Equal, Value, Comparator, false );
620}
621
622#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
623
624int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p4i32_i32_i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
625{
626    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
627    {
628        atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_local_i32, uint, (__local int*)Pointer, Scope, Equal, Value, Comparator, false );
629    }
630    else
631    {
632        atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_global_i32, uint, (__global int*)Pointer, Scope, Equal, Value, Comparator, true );
633    }
634}
635
636#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
637
638
639#if defined(cl_khr_int64_base_atomics)
640long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p0i64_i32_i32_i32_i64_i64, )( __private long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
641{
642    ulong orig = *Pointer;
643    if( orig == Comparator )
644    {
645        *Pointer = Value;
646    }
647    return orig;
648}
649
650
651long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( __global long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
652{
653    atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_global_i64, ulong, (global long*)Pointer, Scope, Equal, Value, Comparator, true );
654}
655
656
657long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p3i64_i32_i32_i32_i64_i64, )( __local long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
658{
659    ulong orig;
660    FENCE_PRE_OP(Scope, Equal, false)
661    LOCAL_SPINLOCK_START()
662    orig = *Pointer;
663    if( orig == Comparator )
664    {
665        *Pointer = Value;
666    }
667    LOCAL_SPINLOCK_END()
668    FENCE_POST_OP(Scope, Equal, false)
669    return orig;
670}
671
672#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
673
674long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p4i64_i32_i32_i32_i64_i64, )( __generic long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
675{
676    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
677    {
678        return SPIRV_BUILTIN(AtomicCompareExchange, _p3i64_i32_i32_i32_i64_i64, )( (__local long*)Pointer, Scope, Equal, Unequal, Value, Comparator );
679    }
680    else
681    {
682        return SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( (__global long*)Pointer, Scope, Equal, Unequal, Value, Comparator );
683    }
684}
685
686#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
687
688#endif // defined(cl_khr_int64_base_atomics)
689
690float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p0f32_i32_i32_i32_f32_f32, )( __private float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator)
691{
692    float orig = *Pointer;
693
694    if( orig == Comparator )
695    {
696        *Pointer = Value;
697    }
698
699    return orig;
700}
701
702// Float compare-and-exchange builtins are handled as integer builtins, because OpenCL C specification says that the float atomics are
703// doing bitwise comparisons, not float comparisons
704
705float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p1f32_i32_i32_i32_f32_f32, )( __global float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator)
706{
707    atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_global_i32, float, (global int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), true );
708}
709
710
711float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p3f32_i32_i32_i32_f32_f32, )( __local float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator)
712{
713    atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_local_i32, float, (local int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), false );
714}
715
716#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
717
718float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p4f32_i32_i32_i32_f32_f32, )( __generic float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator)
719{
720    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
721    {
722        atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_local_i32, float, (__local int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), false );
723    }
724    else
725    {
726        atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_global_i32, float, (__global int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), true );
727    }
728}
729
730#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
731
732int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p0i32_i32_i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
733{
734    return SPIRV_BUILTIN(AtomicCompareExchange, _p0i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
735}
736
737
738int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p1i32_i32_i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
739{
740    return SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
741}
742
743
744int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p3i32_i32_i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
745{
746    return SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
747}
748
749#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
750
751int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p4i32_i32_i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator)
752{
753    return SPIRV_BUILTIN(AtomicCompareExchange, _p4i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
754}
755
756#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
757
758#if defined(cl_khr_int64_base_atomics)
759long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p0i64_i32_i32_i32_i64_i64, )( __private long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
760{
761    return SPIRV_BUILTIN(AtomicCompareExchange, _p0i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
762}
763
764
765long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p1i64_i32_i32_i32_i64_i64, )( __global long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
766{
767    return SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
768}
769
770
771long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p3i64_i32_i32_i32_i64_i64, )( __local long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
772{
773    return SPIRV_BUILTIN(AtomicCompareExchange, _p3i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
774}
775
776#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
777
778long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p4i64_i32_i32_i32_i64_i64, )( __generic long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator)
779{
780    return SPIRV_BUILTIN(AtomicCompareExchange, _p4i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator );
781}
782
783#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
784#endif // defined(cl_khr_int64_base_atomics)
785
786// Atomic Increment
787
788
789int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics )
790{
791    uint orig = *Pointer;
792    *Pointer += 1;
793    return orig;
794}
795
796
797int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics )
798{
799    atomic_operation_0op( __builtin_IB_atomic_inc_global_i32, uint, (global int*)Pointer, Scope, Semantics, true );
800}
801
802
803int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics )
804{
805    atomic_operation_0op( __builtin_IB_atomic_inc_local_i32, uint, (local int*)Pointer, Scope, Semantics, false );
806}
807
808#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
809
810int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics )
811{
812    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
813    {
814        atomic_operation_0op( __builtin_IB_atomic_inc_local_i32, uint, (__local int*)Pointer, Scope, Semantics, false );
815    }
816    else
817    {
818        atomic_operation_0op( __builtin_IB_atomic_inc_global_i32, uint, (__global int*)Pointer, Scope, Semantics, true );
819    }
820}
821
822#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
823
824#if defined(cl_khr_int64_base_atomics)
825long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p0i64_i32_i32, )( __private long *Pointer, int Scope, int Semantics )
826{
827    ulong orig = *Pointer;
828    *Pointer += 1;
829    return orig;
830}
831
832
833long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p1i64_i32_i32, )( __global long *Pointer, int Scope, int Semantics )
834{
835    atomic_operation_0op( __builtin_IB_atomic_inc_global_i64, ulong, (global int*)Pointer, Scope, Semantics, true );
836}
837
838
839long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p3i64_i32_i32, )( __local long *Pointer, int Scope, int Semantics )
840{
841    return __intel_atomic_unary(true, Pointer, Scope, Semantics);
842}
843
844#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
845
846long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p4i64_i32_i32, )( __generic long *Pointer, int Scope, int Semantics )
847{
848    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
849    {
850        return SPIRV_BUILTIN(AtomicIIncrement, _p3i64_i32_i32, )((__local long*)Pointer, Scope, Semantics );
851    }
852    else
853    {
854        return SPIRV_BUILTIN(AtomicIIncrement, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics );
855    }
856}
857
858#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
859#endif // defined(cl_khr_int64_base_atomics)
860
861// Atomic Decrement
862
863
864int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics )
865{
866    uint orig = *Pointer;
867
868    *Pointer -= 1;
869
870    return orig;
871}
872
873int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics )
874{
875    atomic_operation_0op( __builtin_IB_atomic_dec_global_i32, uint, (global int*)Pointer, Scope, Semantics, true );
876}
877
878int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics )
879{
880    atomic_operation_0op( __builtin_IB_atomic_dec_local_i32, uint, (local int*)Pointer, Scope, Semantics, false );
881}
882
883#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
884
885int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics )
886{
887    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
888    {
889        atomic_operation_0op( __builtin_IB_atomic_dec_local_i32, uint, (__local int*)Pointer, Scope, Semantics, false );
890    }
891    else
892    {
893        atomic_operation_0op( __builtin_IB_atomic_dec_global_i32, uint, (__global int*)Pointer, Scope, Semantics, true );
894    }
895}
896
897#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
898
899#if defined(cl_khr_int64_base_atomics)
900long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p0i64_i32_i32, )( __private long *Pointer, int Scope, int Semantics )
901{
902    ulong orig = *Pointer;
903    *Pointer -= 1;
904    return orig;
905}
906
907long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p1i64_i32_i32, )( __global long *Pointer, int Scope, int Semantics )
908{
909    atomic_operation_0op( __builtin_IB_atomic_dec_global_i64, ulong, (global long*)Pointer, Scope, Semantics, true );
910}
911
912long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p3i64_i32_i32, )( __local long *Pointer, int Scope, int Semantics )
913{
914    return __intel_atomic_unary(false, Pointer, Scope, Semantics);
915}
916
917#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
918
919long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p4i64_i32_i32, )( __generic long *Pointer, int Scope, int Semantics )
920{
921    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
922    {
923        return SPIRV_BUILTIN(AtomicIDecrement, _p3i64_i32_i32, )( (__local long*)Pointer, Scope, Semantics );
924    }
925    else
926    {
927        return SPIRV_BUILTIN(AtomicIDecrement, _p1i64_i32_i32, )( (__global long*)Pointer, Scope, Semantics );
928    }
929}
930
931#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
932#endif // defined(cl_khr_int64_base_atomics)
933
934
935// Atomic IAdd
936
937
938int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
939{
940    uint orig = *Pointer;
941
942    *Pointer += Value;
943
944    return orig;
945}
946
947
948int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
949{
950    atomic_operation_1op( __builtin_IB_atomic_add_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
951}
952
953int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
954{
955    atomic_operation_1op( __builtin_IB_atomic_add_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
956}
957
958#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
959
960int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
961{
962    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
963    {
964        atomic_operation_1op( __builtin_IB_atomic_add_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
965    }
966    else
967    {
968        atomic_operation_1op( __builtin_IB_atomic_add_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
969    }
970}
971
972#if defined(cl_khr_int64_base_atomics)
973long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
974{
975    ulong orig = *Pointer;
976    *Pointer += Value;
977    return orig;
978}
979
980long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
981{
982    atomic_operation_1op( __builtin_IB_atomic_add_global_i64, ulong, (__global ulong*)Pointer, Scope, Semantics, Value, true );
983}
984
985long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
986{
987    return __intel_atomic_binary(ATOMIC_IADD64, Pointer, Scope, Semantics, Value);
988}
989
990
991long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
992{
993    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
994    {
995        return SPIRV_BUILTIN(AtomicIAdd, _p3i64_i32_i32_i64, )((__local long*)Pointer, Scope, Semantics, Value);
996    }
997    else
998    {
999        return SPIRV_BUILTIN(AtomicIAdd, _p1i64_i32_i32_i64, )((__global long*)Pointer, Scope, Semantics, Value);
1000    }
1001}
1002
1003#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1004#endif // defined(cl_khr_int64_base_atomics)
1005
1006// Atomic ISub
1007
1008int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
1009{
1010    uint orig = *Pointer;
1011
1012    *Pointer -= Value;
1013
1014    return orig;
1015}
1016
1017
1018int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
1019{
1020    atomic_operation_1op( __builtin_IB_atomic_sub_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
1021}
1022
1023
1024int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
1025{
1026    atomic_operation_1op( __builtin_IB_atomic_sub_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
1027}
1028
1029#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1030
1031int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
1032{
1033    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1034    {
1035        atomic_operation_1op( __builtin_IB_atomic_sub_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
1036    }
1037    else
1038    {
1039        atomic_operation_1op( __builtin_IB_atomic_sub_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1040    }
1041}
1042
1043#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1044
1045#if defined(cl_khr_int64_base_atomics)
1046long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
1047{
1048    ulong orig = *Pointer;
1049    *Pointer -= Value;
1050    return orig;
1051}
1052
1053
1054long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
1055{
1056    atomic_operation_1op( __builtin_IB_atomic_sub_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true );
1057}
1058
1059
1060long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
1061{
1062    return __intel_atomic_binary(ATOMIC_SUB64, Pointer, Scope, Semantics, Value);
1063}
1064
1065#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1066
1067long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
1068{
1069    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1070    {
1071        return SPIRV_BUILTIN(AtomicISub, _p3i64_i32_i32_i64, )((__local long*)Pointer, Scope, Semantics, Value );
1072    }
1073    else
1074    {
1075        return SPIRV_BUILTIN(AtomicISub, _p1i64_i32_i32_i64, )((__global long*)Pointer, Scope, Semantics, Value );
1076    }
1077}
1078
1079#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1080
1081#endif // defined(cl_khr_int64_base_atomics)
1082
1083
1084// Atomic SMin
1085
1086
1087int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value)
1088{
1089    int orig = *Pointer;
1090    *Pointer = ( orig < Value ) ? orig : Value;
1091    return orig;
1092}
1093
1094int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value)
1095{
1096    atomic_operation_1op( __builtin_IB_atomic_min_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1097}
1098
1099int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value)
1100{
1101    atomic_operation_1op( __builtin_IB_atomic_min_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
1102}
1103
1104#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1105
1106int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value)
1107{
1108    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1109    {
1110        atomic_operation_1op( __builtin_IB_atomic_min_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
1111    }
1112    else
1113    {
1114        atomic_operation_1op( __builtin_IB_atomic_min_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1115    }
1116}
1117
1118#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1119
1120#if defined(cl_khr_int64_extended_atomics)
1121
1122long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value)
1123{
1124    long orig = *Pointer;
1125    *Pointer = ( orig < Value ) ? orig : Value;
1126    return orig;
1127}
1128
1129long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value)
1130{
1131    atomic_operation_1op( __builtin_IB_atomic_min_global_i64, ulong, (__global long*)Pointer, Scope, Semantics, Value, true );
1132}
1133
1134long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value)
1135{
1136    return __intel_atomic_binary(ATOMIC_IMIN64, (volatile __local long *)Pointer, Scope, Semantics, Value);
1137}
1138
1139#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1140
1141long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value)
1142{
1143    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1144    {
1145        return SPIRV_BUILTIN(AtomicSMin, _p3i64_i32_i32_i64, )((__local int*)Pointer, Scope, Semantics, Value );
1146    }
1147    else
1148    {
1149        return SPIRV_BUILTIN(AtomicSMin, _p1i64_i32_i32_i64, )((__global int*)Pointer, Scope, Semantics, Value );
1150    }
1151}
1152
1153#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1154
1155#endif // defined(cl_khr_int64_extended_atomics)
1156
1157uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p0i32_i32_i32_i32, )( __private uint *Pointer, int Scope, int Semantics, uint Value )
1158{
1159    uint orig = *Pointer;
1160
1161    *Pointer = ( orig < Value ) ? orig : Value;
1162
1163    return orig;
1164}
1165
1166uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p1i32_i32_i32_i32, )( __global uint *Pointer, int Scope, int Semantics, uint Value )
1167{
1168    atomic_operation_1op( __builtin_IB_atomic_min_global_u32, uint, Pointer, Scope, Semantics, Value, true );
1169}
1170
1171uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p3i32_i32_i32_i32, )( __local uint *Pointer, int Scope, int Semantics, uint Value )
1172{
1173    atomic_operation_1op( __builtin_IB_atomic_min_local_u32, uint, Pointer, Scope, Semantics, Value, false );
1174}
1175
1176#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1177
1178uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p4i32_i32_i32_i32, )( __generic uint *Pointer, int Scope, int Semantics, uint Value )
1179{
1180    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1181    {
1182        atomic_operation_1op( __builtin_IB_atomic_min_local_u32, uint, (__local uint*)Pointer, Scope, Semantics, Value, false );
1183    }
1184    else
1185    {
1186        atomic_operation_1op( __builtin_IB_atomic_min_global_u32, uint, (__global uint*)Pointer, Scope, Semantics, Value, true );
1187    }
1188}
1189
1190#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1191
1192#if defined(cl_khr_int64_extended_atomics)
1193
1194ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p0i64_i32_i32_i64, )( __private ulong *Pointer, int Scope, int Semantics, ulong Value )
1195{
1196    ulong orig = *Pointer;
1197    *Pointer = ( orig < Value ) ? orig : Value;
1198    return orig;
1199}
1200
1201ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p1i64_i32_i32_i64, )( __global ulong *Pointer, int Scope, int Semantics, ulong Value )
1202{
1203    atomic_operation_1op( __builtin_IB_atomic_min_global_u64, ulong, Pointer, Scope, Semantics, Value, true );
1204}
1205
1206ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p3i64_i32_i32_i64, )( __local ulong *Pointer, int Scope, int Semantics, ulong Value )
1207{
1208    return __intel_atomic_binary(ATOMIC_UMIN64, Pointer, Scope, Semantics, Value);
1209}
1210
1211#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1212
1213ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p4i64_i32_i32_i64, )( __generic ulong *Pointer, int Scope, int Semantics, ulong Value )
1214{
1215    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1216    {
1217        return SPIRV_BUILTIN(AtomicUMin, _p3i64_i32_i32_i64, )( (__local ulong*)Pointer, Scope, Semantics, Value );
1218    }
1219    else
1220    {
1221        return SPIRV_BUILTIN(AtomicUMin, _p1i64_i32_i32_i64, )( (__global ulong*)Pointer, Scope, Semantics, Value );
1222    }
1223}
1224
1225#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1226
1227#endif // defined(cl_khr_int64_extended_atomics)
1228
1229// Atomic SMax
1230
1231
1232int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value)
1233{
1234    int orig = *Pointer;
1235    *Pointer = ( orig > Value ) ? orig : Value;
1236    return orig;
1237}
1238
1239int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value)
1240{
1241    atomic_operation_1op( __builtin_IB_atomic_max_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
1242}
1243
1244int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value)
1245{
1246    atomic_operation_1op( __builtin_IB_atomic_max_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
1247}
1248
1249#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1250
1251int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value)
1252{
1253    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1254    {
1255        atomic_operation_1op( __builtin_IB_atomic_max_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
1256    }
1257    else
1258    {
1259        atomic_operation_1op( __builtin_IB_atomic_max_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1260    }
1261}
1262
1263#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1264
1265#if defined(cl_khr_int64_extended_atomics)
1266
1267long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value)
1268{
1269    long orig = *Pointer;
1270    *Pointer = ( orig > Value ) ? orig : Value;
1271    return orig;
1272}
1273
1274long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value)
1275{
1276    atomic_operation_1op( __builtin_IB_atomic_max_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true );
1277}
1278
1279long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value)
1280{
1281    return __intel_atomic_binary(ATOMIC_IMAX64, (volatile __local long *)Pointer, Scope, Semantics, Value);
1282}
1283
1284#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1285
1286long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value)
1287{
1288    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1289    {
1290        return SPIRV_BUILTIN(AtomicSMax, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value );
1291    }
1292    else
1293    {
1294        return SPIRV_BUILTIN(AtomicSMax, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value );
1295    }
1296}
1297
1298#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1299
1300#endif // defined(cl_khr_int64_extended_atomics)
1301
1302// Atomic UMax
1303
1304
1305uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p0i32_i32_i32_i32, )( __private uint *Pointer, int Scope, int Semantics, uint Value )
1306{
1307    uint orig = *Pointer;
1308
1309    *Pointer = ( orig > Value ) ? orig : Value;
1310
1311    return orig;
1312}
1313
1314uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p1i32_i32_i32_i32, )( __global uint *Pointer, int Scope, int Semantics, uint Value )
1315{
1316    atomic_operation_1op( __builtin_IB_atomic_max_global_u32, uint, Pointer, Scope, Semantics, Value, true );
1317}
1318
1319uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p3i32_i32_i32_i32, )( __local uint *Pointer, int Scope, int Semantics, uint Value )
1320{
1321    atomic_operation_1op( __builtin_IB_atomic_max_local_u32, uint, Pointer, Scope, Semantics, Value, false );
1322}
1323
1324#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1325
1326uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p4i32_i32_i32_i32, )( __generic uint *Pointer, int Scope, int Semantics, uint Value )
1327{
1328    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1329    {
1330        atomic_operation_1op( __builtin_IB_atomic_max_local_u32, uint, (__local uint*)Pointer, Scope, Semantics, Value, false );
1331    }
1332    else
1333    {
1334        atomic_operation_1op( __builtin_IB_atomic_max_global_u32, uint, (__global uint*)Pointer, Scope, Semantics, Value, true );
1335    }
1336}
1337
1338#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1339
1340#if defined(cl_khr_int64_extended_atomics)
1341
1342ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p0i64_i32_i32_i64, )( __private ulong *Pointer, int Scope, int Semantics, ulong Value )
1343{
1344    ulong orig = *Pointer;
1345    *Pointer = ( orig > Value ) ? orig : Value;
1346    return orig;
1347}
1348
1349ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p1i64_i32_i32_i64, )( __global ulong *Pointer, int Scope, int Semantics, ulong Value )
1350{
1351    atomic_operation_1op( __builtin_IB_atomic_max_global_u64, ulong, Pointer, Scope, Semantics, Value, true );
1352}
1353
1354ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p3i64_i32_i32_i64, )( __local ulong *Pointer, int Scope, int Semantics, ulong Value )
1355{
1356    return __intel_atomic_binary(ATOMIC_UMAX64, Pointer, Scope, Semantics, Value);
1357}
1358
1359#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1360
1361ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p4i64_i32_i32_i64, )( __generic ulong *Pointer, int Scope, int Semantics, ulong Value )
1362{
1363    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1364    {
1365        return SPIRV_BUILTIN(AtomicUMax, _p3i64_i32_i32_i64, )( (__local ulong*)Pointer, Scope, Semantics, Value );
1366    }
1367    else
1368    {
1369        return SPIRV_BUILTIN(AtomicUMax, _p1i64_i32_i32_i64, )( (__global ulong*)Pointer, Scope, Semantics, Value );
1370    }
1371}
1372
1373#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1374
1375#endif // defined(cl_khr_int64_extended_atomics)
1376
1377// Atomic And
1378
1379
1380int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
1381{
1382    uint orig = *Pointer;
1383    *Pointer &= Value;
1384    return orig;
1385}
1386
1387int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
1388{
1389    atomic_operation_1op( __builtin_IB_atomic_and_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
1390}
1391
1392int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
1393{
1394    atomic_operation_1op( __builtin_IB_atomic_and_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
1395}
1396
1397#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1398
1399int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
1400{
1401    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1402    {
1403        atomic_operation_1op( __builtin_IB_atomic_and_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
1404    }
1405    else
1406    {
1407        atomic_operation_1op( __builtin_IB_atomic_and_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1408    }
1409}
1410
1411#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1412
1413#if defined(cl_khr_int64_extended_atomics)
1414
1415long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
1416{
1417    ulong orig = *Pointer;
1418    *Pointer &= Value;
1419    return orig;
1420}
1421
1422long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
1423{
1424    atomic_operation_1op( __builtin_IB_atomic_and_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true );
1425}
1426
1427long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
1428{
1429    return __intel_atomic_binary(ATOMIC_AND64, Pointer, Scope, Semantics, Value);
1430}
1431
1432#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1433
1434long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
1435{
1436    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1437    {
1438        return SPIRV_BUILTIN(AtomicAnd, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value );
1439    }
1440    else
1441    {
1442        return SPIRV_BUILTIN(AtomicAnd, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value );
1443    }
1444}
1445
1446#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1447
1448#endif // defined(cl_khr_int64_extended_atomics)
1449
1450// Atomic OR
1451
1452
1453int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
1454{
1455    uint orig = *Pointer;
1456    *Pointer |= Value;
1457    return orig;
1458}
1459
1460int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
1461{
1462    atomic_operation_1op( __builtin_IB_atomic_or_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
1463}
1464
1465int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
1466{
1467    atomic_operation_1op( __builtin_IB_atomic_or_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
1468}
1469
1470#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1471
1472int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
1473{
1474    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1475    {
1476        atomic_operation_1op( __builtin_IB_atomic_or_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
1477    }
1478    else
1479    {
1480        atomic_operation_1op( __builtin_IB_atomic_or_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1481    }
1482}
1483
1484#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1485
1486#if defined(cl_khr_int64_extended_atomics)
1487
1488long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
1489{
1490    ulong orig = *Pointer;
1491    *Pointer |= Value;
1492    return orig;
1493}
1494
1495long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
1496{
1497    atomic_operation_1op( __builtin_IB_atomic_or_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true );
1498}
1499
1500long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
1501{
1502    return __intel_atomic_binary(ATOMIC_OR64, Pointer, Scope, Semantics, Value);
1503}
1504
1505#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1506
1507long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
1508{
1509    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1510    {
1511      return SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value );
1512    }
1513    else
1514    {
1515      return SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value );
1516    }
1517}
1518
1519#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1520
1521#endif // defined(cl_khr_int64_extended_atomics)
1522
1523
1524// Atomic Xor
1525
1526
1527int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value )
1528{
1529    uint orig = *Pointer;
1530    *Pointer ^= Value;
1531    return orig;
1532}
1533
1534int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value )
1535{
1536    atomic_operation_1op( __builtin_IB_atomic_xor_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true );
1537}
1538
1539int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value )
1540{
1541    atomic_operation_1op( __builtin_IB_atomic_xor_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false );
1542}
1543
1544#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1545
1546int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value )
1547{
1548    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1549    {
1550        atomic_operation_1op( __builtin_IB_atomic_xor_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false );
1551    }
1552    else
1553    {
1554        atomic_operation_1op( __builtin_IB_atomic_xor_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true );
1555    }
1556}
1557
1558#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1559
1560#if defined(cl_khr_int64_extended_atomics)
1561
1562long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value )
1563{
1564    ulong orig = *Pointer;
1565    *Pointer ^= Value;
1566    return orig;
1567}
1568
1569long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value )
1570{
1571    atomic_operation_1op( __builtin_IB_atomic_xor_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true );
1572}
1573
1574long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value )
1575{
1576    return __intel_atomic_binary(ATOMIC_XOR64, Pointer, Scope, Semantics, Value);
1577}
1578
1579#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1580
1581long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value )
1582{
1583    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1584    {
1585        return SPIRV_BUILTIN(AtomicXor, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value );
1586    }
1587    else
1588    {
1589        return SPIRV_BUILTIN(AtomicXor, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value );
1590    }
1591}
1592
1593#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1594
1595#endif // defined(cl_khr_int64_extended_atomics)
1596
1597// Atomic FlagTestAndSet
1598
1599
1600bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics )
1601{
1602    return (bool)SPIRV_BUILTIN(AtomicExchange, _p0i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE );
1603}
1604
1605bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics )
1606{
1607    return (bool)SPIRV_BUILTIN(AtomicExchange, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE );
1608}
1609
1610bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics )
1611{
1612    return (bool)SPIRV_BUILTIN(AtomicExchange, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE );
1613}
1614
1615#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1616
1617bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics )
1618{
1619    return (bool)SPIRV_BUILTIN(AtomicExchange, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE );
1620}
1621
1622#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1623
1624
1625// Atomic FlagClear
1626
1627
1628void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics )
1629{
1630    SPIRV_BUILTIN(AtomicStore, _p0i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE );
1631}
1632
1633void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics )
1634{
1635    SPIRV_BUILTIN(AtomicStore, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE );
1636}
1637
1638void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics )
1639{
1640    SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE );
1641}
1642
1643#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1644
1645void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics )
1646{
1647    SPIRV_BUILTIN(AtomicStore, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE );
1648}
1649
1650#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1651
1652float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p0f32_i32_i32_f32, )( __private float *Pointer, int Scope, int Semantics, float Value)
1653{
1654    float orig = *Pointer;
1655    *Pointer += Value;
1656    return orig;
1657}
1658
1659float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p1f32_i32_i32_f32, )( __global float *Pointer, int Scope, int Semantics, float Value)
1660{
1661    if(__UseNativeFP32GlobalAtomicAdd)
1662    {
1663        atomic_operation_1op_as_float( __builtin_IB_atomic_add_global_f32, float, Pointer, Scope, Semantics, Value, true );
1664    }
1665    // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics.
1666    float orig;
1667    float desired;
1668    do {
1669        orig = as_float(SPIRV_BUILTIN(AtomicLoad, _p1i32_i32_i32, )((__global int*)Pointer, Scope, Semantics));
1670        desired = orig + Value;
1671    } while(as_int(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )(
1672                                (__global int*)Pointer, Scope, Semantics, Semantics,
1673                                as_int(desired), as_int(orig)));
1674    return orig;
1675}
1676
1677float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p3f32_i32_i32_f32, )( __local float *Pointer, int Scope, int Semantics, float Value)
1678{
1679    float orig;
1680    FENCE_PRE_OP(Scope, Semantics, false)
1681    LOCAL_SPINLOCK_START()
1682    orig = *Pointer;
1683    *Pointer = orig + Value;
1684    LOCAL_SPINLOCK_END()
1685    FENCE_POST_OP(Scope, Semantics, false)
1686    return orig;
1687}
1688
1689#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1690float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p4f32_i32_i32_f32, )( __generic float *Pointer, int Scope, int Semantics, float Value)
1691{
1692    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1693    {
1694        return SPIRV_BUILTIN(AtomicFAddEXT, _p3f32_i32_i32_f32, )((local float*)Pointer, Scope, Semantics, Value);
1695    }
1696    else
1697    {
1698        return SPIRV_BUILTIN(AtomicFAddEXT, _p1f32_i32_i32_f32, )((global float*)Pointer, Scope, Semantics, Value);
1699    }
1700}
1701#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1702
1703double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p0f64_i32_i32_f64, )( __private double *Pointer, int Scope, int Semantics, double Value)
1704{
1705    double orig = *Pointer;
1706    *Pointer += Value;
1707    return orig;
1708}
1709
1710double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p1f64_i32_i32_f64, )( __global double *Pointer, int Scope, int Semantics, double Value)
1711{
1712    // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics.
1713    // Another important reason of using do-while loop emulation is to avoid HW Bug on XeHP SDV:
1714    // "NodeDSS works in fixed arbitration mode where writes are always prioritized over reads.
1715    //  This is causing the IC read request to stall behind other pending write requests.
1716    //  Since IC read is not progressing, the thread which acquired the lock is not proceeding
1717    //  further to clear the lock and thus causing hang."
1718    // do-while loop emulation doesn't expose the HW issue since it reads 'Pointer' value inside a loop.
1719    double orig;
1720    double desired;
1721    do {
1722        orig = as_double(SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics));
1723        desired = orig + Value;
1724    } while(as_long(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )(
1725                                (__global long*)Pointer, Scope, Semantics, Semantics,
1726                                as_long(desired), as_long(orig)));
1727    return orig;
1728}
1729
1730double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p3f64_i32_i32_f64, )( __local double *Pointer, int Scope, int Semantics, double Value)
1731{
1732    double orig;
1733    FENCE_PRE_OP(Scope, Semantics, false)
1734    LOCAL_SPINLOCK_START()
1735    orig = *Pointer;
1736    *Pointer = orig + Value;
1737    LOCAL_SPINLOCK_END()
1738    FENCE_POST_OP(Scope, Semantics, false)
1739    return orig;
1740}
1741
1742#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1743double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p4f64_i32_i32_f64, )( __generic double *Pointer, int Scope, int Semantics, double Value)
1744{
1745    if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1746    {
1747        return SPIRV_BUILTIN(AtomicFAddEXT, _p3f64_i32_i32_f64, )((local double*)Pointer, Scope, Semantics, Value);
1748    }
1749    else
1750    {
1751        return SPIRV_BUILTIN(AtomicFAddEXT, _p1f64_i32_i32_f64, )((global double*)Pointer, Scope, Semantics, Value);
1752    }
1753}
1754#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1755
1756half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p0f16_i32_i32_f16, )( private half* Pointer, int Scope, int Semantics, half Value)
1757{
1758    half orig = *Pointer;
1759    *Pointer = (orig < Value) ? orig : Value;
1760    return orig;
1761}
1762
1763half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p1f16_i32_i32_f16, )( global half* Pointer, int Scope, int Semantics, half Value)
1764{
1765    if(__UseNativeFP16AtomicMinMax)
1766    {
1767        atomic_operation_1op_as_half( __builtin_IB_atomic_min_global_f16, half, Pointer, Scope, Semantics, Value, true );
1768    }
1769    half orig;
1770    FENCE_PRE_OP(Scope, Semantics, true)
1771    GLOBAL_SPINLOCK_START()
1772    orig = *Pointer;
1773    *Pointer = (orig < Value) ? orig : Value;
1774    GLOBAL_SPINLOCK_END()
1775    FENCE_POST_OP(Scope, Semantics, true)
1776    return orig;
1777}
1778
1779half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p3f16_i32_i32_f16, )( local half* Pointer, int Scope, int Semantics, half Value)
1780{
1781    if(__UseNativeFP16AtomicMinMax)
1782    {
1783        atomic_operation_1op_as_half( __builtin_IB_atomic_min_local_f16, half, Pointer, Scope, Semantics, Value, false );
1784    }
1785    half orig;
1786    FENCE_PRE_OP(Scope, Semantics, false)
1787    LOCAL_SPINLOCK_START()
1788    orig = *Pointer;
1789    *Pointer = (orig < Value) ? orig : Value;
1790    LOCAL_SPINLOCK_END()
1791    FENCE_POST_OP(Scope, Semantics, false)
1792    return orig;
1793}
1794
1795#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1796half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p4f16_i32_i32_f16, )( generic half* Pointer, int Scope, int Semantics, half Value)
1797{
1798    if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1799    {
1800        return SPIRV_BUILTIN(AtomicFMinEXT, _p3f16_i32_i32_f16, )((__local half*)Pointer, Scope, Semantics, Value);
1801    }
1802    else
1803    {
1804        return SPIRV_BUILTIN(AtomicFMinEXT, _p1f16_i32_i32_f16, )((__global half*)Pointer, Scope, Semantics, Value);
1805    }
1806}
1807#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1808
1809float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p0f32_i32_i32_f32, )( private float* Pointer, int Scope, int Semantics, float Value)
1810{
1811    float orig = *Pointer;
1812    *Pointer = (orig < Value) ? orig : Value;
1813    return orig;
1814}
1815
1816float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p1f32_i32_i32_f32, )( global float* Pointer, int Scope, int Semantics, float Value)
1817{
1818    atomic_operation_1op_as_float(__builtin_IB_atomic_min_global_f32, float, Pointer, Scope, Semantics, Value, true);
1819}
1820
1821float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p3f32_i32_i32_f32, )( local float* Pointer, int Scope, int Semantics, float Value)
1822{
1823    atomic_operation_1op_as_float(__builtin_IB_atomic_min_local_f32, float, Pointer, Scope, Semantics, Value, false);
1824}
1825
1826#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1827float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p4f32_i32_i32_f32, )( generic float* Pointer, int Scope, int Semantics, float Value)
1828{
1829    if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1830    {
1831        return SPIRV_BUILTIN(AtomicFMinEXT, _p3f32_i32_i32_f32, )((__local float*)Pointer, Scope, Semantics, Value);
1832    }
1833    else
1834    {
1835        return SPIRV_BUILTIN(AtomicFMinEXT, _p1f32_i32_i32_f32, )((__global float*)Pointer, Scope, Semantics, Value);
1836    }
1837}
1838#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1839
1840double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p0f64_i32_i32_f64, )( private double* Pointer, int Scope, int Semantics, double Value)
1841{
1842    double orig = *Pointer;
1843    *Pointer = (orig < Value) ? orig : Value;
1844    return orig;
1845}
1846
1847double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p1f64_i32_i32_f64, )( global double* Pointer, int Scope, int Semantics, double Value)
1848{
1849    // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics.
1850    // Another important reason of using do-while loop emulation is to avoid HW Bug on XeHP SDV:
1851    // "NodeDSS works in fixed arbitration mode where writes are always prioritized over reads.
1852    //  This is causing the IC read request to stall behind other pending write requests.
1853    //  Since IC read is not progressing, the thread which acquired the lock is not proceeding
1854    //  further to clear the lock and thus causing hang."
1855    // do-while loop emulation doesn't expose the HW issue since it reads 'Pointer' value inside a loop.
1856    double orig;
1857    double desired;
1858    do {
1859        orig = as_double(SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics));
1860        desired = ( orig < Value ) ? orig : Value;
1861    } while(as_long(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )(
1862                                (__global long*)Pointer, Scope, Semantics, Semantics,
1863                                as_long(desired), as_long(orig)));
1864    return orig;
1865}
1866
1867double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p3f64_i32_i32_f64, )( local double* Pointer, int Scope, int Semantics, double Value)
1868{
1869    double orig;
1870    FENCE_PRE_OP(Scope, Semantics, false)
1871    LOCAL_SPINLOCK_START()
1872    orig = *Pointer;
1873    *Pointer = (orig < Value) ? orig : Value;
1874    LOCAL_SPINLOCK_END()
1875    FENCE_POST_OP(Scope, Semantics, false)
1876    return orig;
1877}
1878
1879#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1880double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p4f64_i32_i32_f64, )( generic double* Pointer, int Scope, int Semantics, double Value)
1881{
1882    if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1883    {
1884        return SPIRV_BUILTIN(AtomicFMinEXT, _p3f64_i32_i32_f64, )((__local double*)Pointer, Scope, Semantics, Value);
1885    }
1886    else
1887    {
1888        return SPIRV_BUILTIN(AtomicFMinEXT, _p1f64_i32_i32_f64, )((__global double*)Pointer, Scope, Semantics, Value);
1889    }
1890}
1891#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1892
1893half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p0f16_i32_i32_f16, )( private half* Pointer, int Scope, int Semantics, half Value)
1894{
1895    half orig = *Pointer;
1896    *Pointer = (orig > Value) ? orig : Value;
1897    return orig;
1898}
1899
1900half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p1f16_i32_i32_f16, )( global half* Pointer, int Scope, int Semantics, half Value)
1901{
1902    if(__UseNativeFP16AtomicMinMax)
1903    {
1904        atomic_operation_1op_as_half( __builtin_IB_atomic_max_global_f16, half, Pointer, Scope, Semantics, Value, true );
1905    }
1906    half orig;
1907    FENCE_PRE_OP(Scope, Semantics, true)
1908    GLOBAL_SPINLOCK_START()
1909    orig = *Pointer;
1910    *Pointer = (orig > Value) ? orig : Value;
1911    GLOBAL_SPINLOCK_END()
1912    FENCE_POST_OP(Scope, Semantics, true)
1913    return orig;
1914}
1915
1916half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p3f16_i32_i32_f16, )( local half* Pointer, int Scope, int Semantics, half Value)
1917{
1918    if(__UseNativeFP16AtomicMinMax)
1919    {
1920        atomic_operation_1op_as_half( __builtin_IB_atomic_max_local_f16, half, Pointer, Scope, Semantics, Value, false );
1921    }
1922    half orig;
1923    FENCE_PRE_OP(Scope, Semantics, false)
1924    LOCAL_SPINLOCK_START()
1925    orig = *Pointer;
1926    *Pointer = (orig > Value) ? orig : Value;
1927    LOCAL_SPINLOCK_END()
1928    FENCE_POST_OP(Scope, Semantics, false)
1929    return orig;
1930}
1931
1932#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1933half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p4f16_i32_i32_f16, )( generic half* Pointer, int Scope, int Semantics, half Value)
1934{
1935    if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1936    {
1937        return SPIRV_BUILTIN(AtomicFMaxEXT, _p3f16_i32_i32_f16, )((__local half*)Pointer, Scope, Semantics, Value);
1938    }
1939    else
1940    {
1941        return SPIRV_BUILTIN(AtomicFMaxEXT, _p1f16_i32_i32_f16, )((__global half*)Pointer, Scope, Semantics, Value);
1942    }
1943}
1944#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1945
1946float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p0f32_i32_i32_f32, )( private float* Pointer, int Scope, int Semantics, float Value)
1947{
1948    float orig = *Pointer;
1949    *Pointer = (orig > Value) ? orig : Value;
1950    return orig;
1951}
1952
1953float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p1f32_i32_i32_f32, )( global float* Pointer, int Scope, int Semantics, float Value)
1954{
1955    atomic_operation_1op_as_float(__builtin_IB_atomic_max_global_f32, float, Pointer, Scope, Semantics, Value, true);
1956}
1957
1958float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p3f32_i32_i32_f32, )( local float* Pointer, int Scope, int Semantics, float Value)
1959{
1960    atomic_operation_1op_as_float(__builtin_IB_atomic_max_local_f32, float, Pointer, Scope, Semantics, Value, false);
1961}
1962
1963#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1964float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p4f32_i32_i32_f32, )( generic float* Pointer, int Scope, int Semantics, float Value)
1965{
1966    if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
1967    {
1968        return SPIRV_BUILTIN(AtomicFMaxEXT, _p3f32_i32_i32_f32, )((__local float*)Pointer, Scope, Semantics, Value);
1969    }
1970    else
1971    {
1972        return SPIRV_BUILTIN(AtomicFMaxEXT, _p1f32_i32_i32_f32, )((__global float*)Pointer, Scope, Semantics, Value);
1973    }
1974}
1975#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1976
1977double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p0f64_i32_i32_f64, )( private double* Pointer, int Scope, int Semantics, double Value)
1978{
1979    double orig = *Pointer;
1980    *Pointer = (orig > Value) ? orig : Value;
1981    return orig;
1982}
1983
1984double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p1f64_i32_i32_f64, )( global double* Pointer, int Scope, int Semantics, double Value)
1985{
1986    // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics.
1987    // Another important reason of using do-while loop emulation is to avoid HW Bug on XeHP SDV:
1988    // "NodeDSS works in fixed arbitration mode where writes are always prioritized over reads.
1989    //  This is causing the IC read request to stall behind other pending write requests.
1990    //  Since IC read is not progressing, the thread which acquired the lock is not proceeding
1991    //  further to clear the lock and thus causing hang."
1992    // do-while loop emulation doesn't expose the HW issue since it reads 'Pointer' value inside a loop.
1993    double orig;
1994    double desired;
1995    do {
1996        orig = as_double(SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics));
1997        desired = ( orig > Value ) ? orig : Value;
1998    } while(as_long(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )(
1999                                (__global long*)Pointer, Scope, Semantics, Semantics,
2000                                as_long(desired), as_long(orig)));
2001    return orig;
2002}
2003
2004double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p3f64_i32_i32_f64, )( local double* Pointer, int Scope, int Semantics, double Value)
2005{
2006    double orig;
2007    FENCE_PRE_OP(Scope, Semantics, false)
2008    LOCAL_SPINLOCK_START()
2009    orig = *Pointer;
2010    *Pointer = (orig > Value) ? orig : Value;
2011    LOCAL_SPINLOCK_END()
2012    FENCE_POST_OP(Scope, Semantics, false)
2013    return orig;
2014}
2015
2016#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2017double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p4f64_i32_i32_f64, )( generic double* Pointer, int Scope, int Semantics, double Value)
2018{
2019    if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup))
2020    {
2021        return SPIRV_BUILTIN(AtomicFMaxEXT, _p3f64_i32_i32_f64, )((__local double*)Pointer, Scope, Semantics, Value);
2022    }
2023    else
2024    {
2025        return SPIRV_BUILTIN(AtomicFMaxEXT, _p1f64_i32_i32_f64, )((__global double*)Pointer, Scope, Semantics, Value);
2026    }
2027}
2028#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2029
2030#undef ATOMIC_FLAG_FALSE
2031#undef ATOMIC_FLAG_TRUE
2032
2033#define KMP_LOCK_FREE 0
2034#define KMP_LOCK_BUSY 1
2035
2036void __builtin_IB_kmp_acquire_lock(int *lock)
2037{
2038  volatile atomic_uint *lck = (volatile atomic_uint *)lock;
2039  uint expected = KMP_LOCK_FREE;
2040  while (atomic_load_explicit(lck, memory_order_relaxed) != KMP_LOCK_FREE ||
2041      !atomic_compare_exchange_strong_explicit(lck, &expected, KMP_LOCK_BUSY,
2042                                               memory_order_acquire,
2043                                               memory_order_relaxed)) {
2044    expected = KMP_LOCK_FREE;
2045  }
2046}
2047
2048void __builtin_IB_kmp_release_lock(int *lock)
2049{
2050  volatile atomic_uint *lck = (volatile atomic_uint *)lock;
2051  atomic_store_explicit(lck, KMP_LOCK_FREE, memory_order_release);
2052}
2053
2054#undef KMP_LOCK_FREE
2055#undef KMP_LOCK_BUSY
2056
2057#undef SEMANTICS_NEED_FENCE
2058#undef FENCE_PRE_OP
2059#undef FENCE_POST_OP
2060#undef SPINLOCK_START
2061#undef SPINLOCK_END
2062
2063#undef atomic_operation_1op
2064#undef atomic_operation_0op
2065#undef atomic_cmpxhg
2066