1/*========================== begin_copyright_notice ============================ 2 3Copyright (C) 2017-2021 Intel Corporation 4 5SPDX-License-Identifier: MIT 6 7============================= end_copyright_notice ===========================*/ 8 9// Atomic Instructions 10 11#include "../Headers/spirv.h" 12 13#define ATOMIC_FLAG_TRUE 1 14#define ATOMIC_FLAG_FALSE 0 15 16#define SEMANTICS_PRE_OP_NEED_FENCE ( Release | AcquireRelease | SequentiallyConsistent) 17 18#define SEMANTICS_POST_OP_NEEDS_FENCE ( Acquire | AcquireRelease | SequentiallyConsistent) 19 20extern __constant int __UseNativeFP32GlobalAtomicAdd; 21extern __constant int __UseNativeFP16AtomicMinMax; 22 23 24 __local int* __builtin_IB_get_local_lock(); 25 __global int* __builtin_IB_get_global_lock(); 26 void __builtin_IB_eu_thread_pause(uint value); 27 void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1); 28 29#define LOCAL_SPINLOCK_START() \ 30 { \ 31 volatile bool done = false; \ 32 while(!done) { \ 33 __builtin_IB_eu_thread_pause(32); \ 34 if(SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )(__builtin_IB_get_local_lock(), Device, Relaxed, Relaxed, 1, 0) == 0) { 35 36#define LOCAL_SPINLOCK_END() \ 37 done = true; \ 38 SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )(__builtin_IB_get_local_lock(), Device, SequentiallyConsistent | WorkgroupMemory, 0); \ 39 }}} 40 41#define GLOBAL_SPINLOCK_START() \ 42 { \ 43 volatile bool done = false; \ 44 while(!done) { \ 45 __builtin_IB_eu_thread_pause(32); \ 46 if(SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )(__builtin_IB_get_global_lock(), Device, Relaxed, Relaxed, 1, 0) == 0) { 47 48#define GLOBAL_SPINLOCK_END() \ 49 done = true; \ 50 SPIRV_BUILTIN(AtomicStore, _p1i32_i32_i32_i32, )(__builtin_IB_get_global_lock(), Device, SequentiallyConsistent | CrossWorkgroupMemory, 0); \ 51 }}} 52 53#define FENCE_PRE_OP(Scope, Semantics, isGlobal) \ 54 if( ( (Semantics) & ( SEMANTICS_PRE_OP_NEED_FENCE ) ) > 0 ) \ 55 { \ 56 bool flushL3 = (isGlobal) && ((Scope) == Device || (Scope) == CrossDevice); \ 57 __intel_memfence_handler(flushL3, isGlobal, false); \ 58 } 59 60#define FENCE_POST_OP(Scope, Semantics, isGlobal) \ 61 if( ( (Semantics) & ( SEMANTICS_POST_OP_NEEDS_FENCE ) ) > 0 ) \ 62 { \ 63 bool flushL3 = (isGlobal) && ((Scope) == Device || (Scope) == CrossDevice); \ 64 __intel_memfence_handler(flushL3, isGlobal, false); \ 65 } 66 67// This fencing scheme allows us to obey the memory model when coherency is 68// enabled or disabled. Because the L3$ has 2 pipelines (cohereny&atomics and 69// non-coherant) the fences guarentee the memory model is followed when coherency 70// is disabled. 71// 72// When coherency is enabled, though, all HDC traffic uses the same L3$ pipe so 73// these fences would not be needed. The compiler is agnostic to coherency 74// being enabled or disbled so we asume the worst case. 75 76 77#define atomic_operation_1op( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal ) \ 78{ \ 79 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 80 TYPE result = INTRINSIC( (Pointer), (Value) ); \ 81 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 82 return result; \ 83} 84 85#define atomic_operation_1op_as_float( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )\ 86{ \ 87 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 88 TYPE result = as_float(INTRINSIC( (Pointer), (Value) )); \ 89 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 90 return result; \ 91} 92 93#define atomic_operation_1op_as_double( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )\ 94{ \ 95 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 96 TYPE result = as_double(INTRINSIC( (Pointer), (Value) )); \ 97 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 98 return result; \ 99} 100 101#define atomic_operation_1op_as_half( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, isGlobal )\ 102{ \ 103 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 104 TYPE result = as_half(INTRINSIC( (Pointer), (Value) )); \ 105 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 106 return result; \ 107} 108 109#define atomic_operation_0op( INTRINSIC, TYPE, Pointer, Scope, Semantics, isGlobal ) \ 110{ \ 111 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 112 TYPE result = INTRINSIC( (Pointer) ); \ 113 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 114 return result; \ 115} 116 117#define atomic_cmpxhg( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, Comp, isGlobal )\ 118{ \ 119 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 120 TYPE result = INTRINSIC( (Pointer), (Comp), (Value) ); \ 121 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 122 return result; \ 123} 124 125#define atomic_cmpxhg_as_float( INTRINSIC, TYPE, Pointer, Scope, Semantics, Value, Comp, isGlobal )\ 126{ \ 127 FENCE_PRE_OP((Scope), (Semantics), isGlobal) \ 128 TYPE result = as_float(INTRINSIC( (Pointer), (Comp), (Value) )); \ 129 FENCE_POST_OP((Scope), (Semantics), isGlobal) \ 130 return result; \ 131} 132 133 134// Atomic loads/stores must be implemented with an atomic operation - While our HDC has an in-order 135// pipeline the L3$ has 2 pipelines - coherant and non-coherant. Even when coherency is disabled atomics 136// will still go down the coherant pipeline. The 2 L3$ pipes do not guarentee order of operations between 137// themselves. 138 139// Since we dont have specialized atomic load/store HDC message we're using atomic_or( a, 0x0 ) to emulate 140// an atomic load since it does not modify the in memory value and returns the 'old' value. atomic store 141// can be implemented with an atomic_exchance with the return value ignored. 142 143int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics ) 144{ 145 return *Pointer; 146} 147 148int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics ) 149{ 150 return SPIRV_BUILTIN(AtomicOr, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, 0 ); 151} 152 153int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics ) 154{ 155 return SPIRV_BUILTIN(AtomicOr, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, 0 ); 156} 157 158#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 159 160int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics ) 161{ 162 return SPIRV_BUILTIN(AtomicOr, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, 0 ); 163} 164 165#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 166 167#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 168 169long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0i64_i32_i32, )( __private long *Pointer, int Scope, int Semantics ) 170{ 171 return *Pointer; 172} 173 174long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )( __global long *Pointer, int Scope, int Semantics ) 175{ 176 return SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( Pointer, Scope, Semantics, 0 ); 177} 178 179long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3i64_i32_i32, )( __local long *Pointer, int Scope, int Semantics ) 180{ 181 return SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( Pointer, Scope, Semantics, 0 ); 182} 183 184#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 185 186long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4i64_i32_i32, )( __generic long *Pointer, int Scope, int Semantics ) 187{ 188 return SPIRV_BUILTIN(AtomicOr, _p4i64_i32_i32_i64, )( Pointer, Scope, Semantics, 0 ); 189} 190 191#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 192 193#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 194 195 196float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0f32_i32_i32, )( __private float *Pointer, int Scope, int Semantics ) 197{ 198 return *Pointer; 199} 200 201 202float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1f32_i32_i32, )( __global float *Pointer, int Scope, int Semantics ) 203{ 204 return as_float( SPIRV_BUILTIN(AtomicOr, _p1i32_i32_i32_i32, )( (__global int*)Pointer, Scope, Semantics, 0 ) ); 205} 206 207float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3f32_i32_i32, )( __local float *Pointer, int Scope, int Semantics ) 208{ 209 return as_float( SPIRV_BUILTIN(AtomicOr, _p3i32_i32_i32_i32, )( (__local int*)Pointer, Scope, Semantics, 0 ) ); 210} 211 212#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 213 214float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4f32_i32_i32, )( __generic float *Pointer, int Scope, int Semantics ) 215{ 216 return as_float( SPIRV_BUILTIN(AtomicOr, _p4i32_i32_i32_i32, )( (volatile __generic int*)Pointer, Scope, Semantics, 0 ) ); 217} 218 219#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 220 221#if defined(cl_khr_fp64) 222#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 223double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p0f64_i32_i32, )( __private double *Pointer, int Scope, int Semantics ) 224{ 225 return *Pointer; 226} 227 228 229double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p1f64_i32_i32, )( __global double *Pointer, int Scope, int Semantics ) 230{ 231 return as_double( SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, 0 ) ); 232} 233 234double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p3f64_i32_i32, )( __local double *Pointer, int Scope, int Semantics ) 235{ 236 return as_double( SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, 0 ) ); 237} 238 239#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 240 241double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicLoad, _p4f64_i32_i32, )( __generic double *Pointer, int Scope, int Semantics ) 242{ 243 return as_double( SPIRV_BUILTIN(AtomicOr, _p4i64_i32_i32_i64, )( (__generic long*)Pointer, Scope, Semantics, 0 ) ); 244} 245 246#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 247#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 248#endif // defined(cl_khr_fp64) 249 250 251// Atomic Stores 252 253 254void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 255{ 256 *Pointer = Value; 257} 258 259 260void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 261{ 262 SPIRV_BUILTIN(AtomicExchange, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, Value ); 263} 264 265 266void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 267{ 268 SPIRV_BUILTIN(AtomicExchange, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, Value ); 269} 270 271#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 272 273void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 274{ 275 SPIRV_BUILTIN(AtomicExchange, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, Value ); 276} 277 278#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 279 280 281#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 282 283void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 284{ 285 *Pointer = Value; 286} 287 288 289void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 290{ 291 SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )( Pointer, Scope, Semantics, Value ); 292} 293 294 295void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 296{ 297 SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )( Pointer, Scope, Semantics, Value ); 298} 299 300#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 301 302void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 303{ 304 SPIRV_BUILTIN(AtomicExchange, _p4i64_i32_i32_i64, )( Pointer, Scope, Semantics, Value ); 305} 306 307#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 308 309#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 310 311 312void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0f32_i32_i32_f32, )( __private float *Pointer, int Scope, int Semantics, float Value ) 313{ 314 SPIRV_BUILTIN(AtomicExchange, _p0f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value ); 315} 316 317 318void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1f32_i32_i32_f32, )( __global float *Pointer, int Scope, int Semantics, float Value ) 319{ 320 SPIRV_BUILTIN(AtomicExchange, _p1f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value ); 321} 322 323 324void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3f32_i32_i32_f32, )( __local float *Pointer, int Scope, int Semantics, float Value ) 325{ 326 SPIRV_BUILTIN(AtomicExchange, _p3f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value ); 327} 328 329#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 330 331void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4f32_i32_i32_f32, )( __generic float *Pointer, int Scope, int Semantics, float Value ) 332{ 333 SPIRV_BUILTIN(AtomicExchange, _p4f32_i32_i32_f32, )( Pointer, Scope, Semantics, Value ); 334} 335 336#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 337 338#if defined(cl_khr_fp64) 339#if defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 340 341void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p0f64_i32_i32_f64, )( __private double *Pointer, int Scope, int Semantics, double Value ) 342{ 343 SPIRV_BUILTIN(AtomicExchange, _p0f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value ); 344} 345 346 347void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p1f64_i32_i32_f64, )( __global double *Pointer, int Scope, int Semantics, double Value ) 348{ 349 SPIRV_BUILTIN(AtomicExchange, _p1f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value ); 350} 351 352 353void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p3f64_i32_i32_f64, )( __local double *Pointer, int Scope, int Semantics, double Value ) 354{ 355 SPIRV_BUILTIN(AtomicExchange, _p3f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value ); 356} 357 358#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 359 360void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicStore, _p4f64_i32_i32_f64, )( __generic double *Pointer, int Scope, int Semantics, double Value ) 361{ 362 SPIRV_BUILTIN(AtomicExchange, _p4f64_i32_i32_f64, )( Pointer, Scope, Semantics, Value ); 363} 364 365#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 366 367#endif // defined(cl_khr_int64_base_atomics) || defined(cl_khr_int64_extended_atomics) 368#endif // defined(cl_khr_fp64) 369 370 371// Atomic Exchange 372 373 374int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 375{ 376 uint orig = *Pointer; 377 *Pointer = Value; 378 return orig; 379} 380 381 382int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 383{ 384 atomic_operation_1op( __builtin_IB_atomic_xchg_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 385} 386 387 388int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 389{ 390 atomic_operation_1op( __builtin_IB_atomic_xchg_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 391} 392 393#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 394 395int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 396{ 397 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 398 { 399 atomic_operation_1op( __builtin_IB_atomic_xchg_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 400 } 401 else 402 { 403 atomic_operation_1op( __builtin_IB_atomic_xchg_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 404 } 405 406} 407 408#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 409 410#if defined(cl_khr_int64_base_atomics) 411long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 412{ 413 ulong orig = *Pointer; 414 *Pointer = Value; 415 return orig; 416} 417 418 419long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 420{ 421 atomic_operation_1op( __builtin_IB_atomic_xchg_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true ); 422} 423 424enum IntAtomicOp 425{ 426 ATOMIC_IADD64, 427 ATOMIC_SUB64, 428 ATOMIC_XCHG64, 429 ATOMIC_AND64, 430 ATOMIC_OR64, 431 ATOMIC_XOR64, 432 ATOMIC_IMIN64, 433 ATOMIC_IMAX64, 434 ATOMIC_UMAX64, 435 ATOMIC_UMIN64 436}; 437 438// handle int64 SLM atomic add/sub/xchg/and/or/xor/umax/umin 439ulong OVERLOADABLE __intel_atomic_binary( enum IntAtomicOp atomicOp, volatile __local ulong *Pointer, 440 uint Scope, uint Semantics, ulong Value ) 441{ 442 443 ulong orig; 444 FENCE_PRE_OP(Scope, Semantics, false) 445 LOCAL_SPINLOCK_START(); 446 orig = *Pointer; 447 switch (atomicOp) 448 { 449 case ATOMIC_UMIN64: *Pointer = ( orig < Value ) ? orig : Value; break; 450 case ATOMIC_UMAX64: *Pointer = ( orig > Value ) ? orig : Value; break; 451 default: break; // What should we do here? OCL doesn't have assert 452 } 453 LOCAL_SPINLOCK_END(); 454 FENCE_POST_OP(Scope, Semantics, false) 455 return orig; 456} 457 458// handle int64 SLM atomic IMin and IMax 459long OVERLOADABLE __intel_atomic_binary( enum IntAtomicOp atomicOp, volatile __local long *Pointer, 460 uint Scope, uint Semantics, long Value ) 461{ 462 463 long orig; 464 FENCE_PRE_OP(Scope, Semantics, false) 465 LOCAL_SPINLOCK_START() 466 orig = *Pointer; 467 switch (atomicOp) 468 { 469 case ATOMIC_IADD64: *Pointer += Value; break; 470 case ATOMIC_SUB64: *Pointer -= Value; break; 471 case ATOMIC_AND64: *Pointer &= Value; break; 472 case ATOMIC_OR64: *Pointer |= Value; break; 473 case ATOMIC_XOR64: *Pointer ^= Value; break; 474 case ATOMIC_XCHG64: *Pointer = Value; break; 475 case ATOMIC_IMIN64: *Pointer = ( orig < Value ) ? orig : Value; break; 476 case ATOMIC_IMAX64: *Pointer = ( orig > Value ) ? orig : Value; break; 477 default: break; // What should we do here? OCL doesn't have assert 478 } 479 LOCAL_SPINLOCK_END() 480 FENCE_POST_OP(Scope, Semantics, false) 481 return orig; 482} 483 484// handle uint64 SLM atomic inc/dec 485ulong OVERLOADABLE __intel_atomic_unary( bool isInc, volatile __local ulong *Pointer, uint Scope, uint Semantics ) 486{ 487 488 ulong orig; 489 FENCE_PRE_OP(Scope, Semantics, false) 490 LOCAL_SPINLOCK_START() 491 orig = *Pointer; 492 *Pointer = isInc ? orig + 1 : orig - 1; 493 LOCAL_SPINLOCK_END() 494 FENCE_POST_OP(Scope, Semantics, false) 495 return orig; 496} 497 498long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 499{ 500 return __intel_atomic_binary(ATOMIC_XCHG64, Pointer, Scope, Semantics, Value); 501} 502 503 504#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 505 506long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 507{ 508 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 509 { 510 return SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )((__local long*)Pointer, Scope, Semantics, Value); 511 } 512 else 513 { 514 return SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )((__global long*)Pointer, Scope, Semantics, Value); 515 } 516} 517 518#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 519 520#endif // defined(cl_khr_int64_base_atomics) 521 522float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0f32_i32_i32_f32, )( __private float *Pointer, int Scope, int Semantics, float Value) 523{ 524 float orig = *Pointer; 525 526 *Pointer = Value; 527 528 return orig; 529} 530 531float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1f32_i32_i32_f32, )( __global float *Pointer, int Scope, int Semantics, float Value) 532{ 533 atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_global_i32, float, (global int*)Pointer, Scope, Semantics, as_int(Value), true ); 534} 535 536 537float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3f32_i32_i32_f32, )( __local float *Pointer, int Scope, int Semantics, float Value) 538{ 539 atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_local_i32, float, (local int*)Pointer, Scope, Semantics, as_int(Value), false ); 540} 541 542#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 543 544float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4f32_i32_i32_f32, )( __generic float *Pointer, int Scope, int Semantics, float Value) 545{ 546 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 547 { 548 atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_local_i32, float, (local int*)Pointer, Scope, Semantics, as_int(Value), false ); 549 } 550 else 551 { 552 atomic_operation_1op_as_float( __builtin_IB_atomic_xchg_global_i32, float, (global int*)Pointer, Scope, Semantics, as_int(Value), true ); 553 } 554} 555 556#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 557 558#if defined(cl_khr_fp64) 559#if defined(cl_khr_int64_base_atomics) 560 561double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p0f64_i32_i32_f64, )( __private double *Pointer, int Scope, int Semantics, double Value) 562{ 563 return as_double(SPIRV_BUILTIN(AtomicExchange, _p0i64_i32_i32_i64, )((__private long*) Pointer, Scope, Semantics, as_long(Value))); 564} 565 566double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p1f64_i32_i32_f64, )( __global double *Pointer, int Scope, int Semantics, double Value) 567{ 568 return as_double(SPIRV_BUILTIN(AtomicExchange, _p1i64_i32_i32_i64, )((__global long*) Pointer, Scope, Semantics, as_long(Value))); 569} 570 571 572double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p3f64_i32_i32_f64, )( __local double *Pointer, int Scope, int Semantics, double Value) 573{ 574 return as_double(SPIRV_BUILTIN(AtomicExchange, _p3i64_i32_i32_i64, )((__local long*) Pointer, Scope, Semantics, as_long(Value))); 575} 576 577#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 578 579double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicExchange, _p4f64_i32_i32_f64, )( __generic double *Pointer, int Scope, int Semantics, double Value) 580{ 581 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 582 { 583 return SPIRV_BUILTIN(AtomicExchange, _p3f64_i32_i32_f64, )((__local double*) Pointer, Scope, Semantics, Value); 584 } 585 else 586 { 587 return SPIRV_BUILTIN(AtomicExchange, _p1f64_i32_i32_f64, )((__global double*) Pointer, Scope, Semantics, Value); 588 } 589} 590 591#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 592 593#endif // defined(cl_khr_int64_base_atomics) 594#endif // defined(cl_khr_fp64) 595 596 597// Atomic Compare Exchange 598 599 600int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p0i32_i32_i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 601{ 602 uint orig = *Pointer; 603 if( orig == Comparator ) 604 { 605 *Pointer = Value; 606 } 607 return orig; 608} 609 610 611int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 612{ 613 atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_global_i32, uint, (global int*)Pointer, Scope, Equal, Value, Comparator, true ); 614} 615 616 617int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 618{ 619 atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_local_i32, uint, (local int*)Pointer, Scope, Equal, Value, Comparator, false ); 620} 621 622#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 623 624int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p4i32_i32_i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 625{ 626 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 627 { 628 atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_local_i32, uint, (__local int*)Pointer, Scope, Equal, Value, Comparator, false ); 629 } 630 else 631 { 632 atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_global_i32, uint, (__global int*)Pointer, Scope, Equal, Value, Comparator, true ); 633 } 634} 635 636#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 637 638 639#if defined(cl_khr_int64_base_atomics) 640long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p0i64_i32_i32_i32_i64_i64, )( __private long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 641{ 642 ulong orig = *Pointer; 643 if( orig == Comparator ) 644 { 645 *Pointer = Value; 646 } 647 return orig; 648} 649 650 651long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( __global long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 652{ 653 atomic_cmpxhg( __builtin_IB_atomic_cmpxchg_global_i64, ulong, (global long*)Pointer, Scope, Equal, Value, Comparator, true ); 654} 655 656 657long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p3i64_i32_i32_i32_i64_i64, )( __local long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 658{ 659 ulong orig; 660 FENCE_PRE_OP(Scope, Equal, false) 661 LOCAL_SPINLOCK_START() 662 orig = *Pointer; 663 if( orig == Comparator ) 664 { 665 *Pointer = Value; 666 } 667 LOCAL_SPINLOCK_END() 668 FENCE_POST_OP(Scope, Equal, false) 669 return orig; 670} 671 672#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 673 674long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p4i64_i32_i32_i32_i64_i64, )( __generic long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 675{ 676 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 677 { 678 return SPIRV_BUILTIN(AtomicCompareExchange, _p3i64_i32_i32_i32_i64_i64, )( (__local long*)Pointer, Scope, Equal, Unequal, Value, Comparator ); 679 } 680 else 681 { 682 return SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( (__global long*)Pointer, Scope, Equal, Unequal, Value, Comparator ); 683 } 684} 685 686#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 687 688#endif // defined(cl_khr_int64_base_atomics) 689 690float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p0f32_i32_i32_i32_f32_f32, )( __private float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator) 691{ 692 float orig = *Pointer; 693 694 if( orig == Comparator ) 695 { 696 *Pointer = Value; 697 } 698 699 return orig; 700} 701 702// Float compare-and-exchange builtins are handled as integer builtins, because OpenCL C specification says that the float atomics are 703// doing bitwise comparisons, not float comparisons 704 705float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p1f32_i32_i32_i32_f32_f32, )( __global float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator) 706{ 707 atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_global_i32, float, (global int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), true ); 708} 709 710 711float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p3f32_i32_i32_i32_f32_f32, )( __local float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator) 712{ 713 atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_local_i32, float, (local int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), false ); 714} 715 716#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 717 718float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchange, _p4f32_i32_i32_i32_f32_f32, )( __generic float *Pointer, int Scope, int Equal, int Unequal, float Value, float Comparator) 719{ 720 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 721 { 722 atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_local_i32, float, (__local int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), false ); 723 } 724 else 725 { 726 atomic_cmpxhg_as_float( __builtin_IB_atomic_cmpxchg_global_i32, float, (__global int*)Pointer, Scope, Equal, as_uint(Value), as_uint(Comparator), true ); 727 } 728} 729 730#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 731 732int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p0i32_i32_i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 733{ 734 return SPIRV_BUILTIN(AtomicCompareExchange, _p0i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 735} 736 737 738int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p1i32_i32_i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 739{ 740 return SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 741} 742 743 744int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p3i32_i32_i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 745{ 746 return SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 747} 748 749#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 750 751int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p4i32_i32_i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Equal, int Unequal, int Value, int Comparator) 752{ 753 return SPIRV_BUILTIN(AtomicCompareExchange, _p4i32_i32_i32_i32_i32_i32, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 754} 755 756#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 757 758#if defined(cl_khr_int64_base_atomics) 759long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p0i64_i32_i32_i32_i64_i64, )( __private long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 760{ 761 return SPIRV_BUILTIN(AtomicCompareExchange, _p0i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 762} 763 764 765long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p1i64_i32_i32_i32_i64_i64, )( __global long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 766{ 767 return SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 768} 769 770 771long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p3i64_i32_i32_i32_i64_i64, )( __local long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 772{ 773 return SPIRV_BUILTIN(AtomicCompareExchange, _p3i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 774} 775 776#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 777 778long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicCompareExchangeWeak, _p4i64_i32_i32_i32_i64_i64, )( __generic long *Pointer, int Scope, int Equal, int Unequal, long Value, long Comparator) 779{ 780 return SPIRV_BUILTIN(AtomicCompareExchange, _p4i64_i32_i32_i32_i64_i64, )( Pointer, Scope, Equal, Unequal, Value, Comparator ); 781} 782 783#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 784#endif // defined(cl_khr_int64_base_atomics) 785 786// Atomic Increment 787 788 789int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics ) 790{ 791 uint orig = *Pointer; 792 *Pointer += 1; 793 return orig; 794} 795 796 797int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics ) 798{ 799 atomic_operation_0op( __builtin_IB_atomic_inc_global_i32, uint, (global int*)Pointer, Scope, Semantics, true ); 800} 801 802 803int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics ) 804{ 805 atomic_operation_0op( __builtin_IB_atomic_inc_local_i32, uint, (local int*)Pointer, Scope, Semantics, false ); 806} 807 808#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 809 810int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics ) 811{ 812 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 813 { 814 atomic_operation_0op( __builtin_IB_atomic_inc_local_i32, uint, (__local int*)Pointer, Scope, Semantics, false ); 815 } 816 else 817 { 818 atomic_operation_0op( __builtin_IB_atomic_inc_global_i32, uint, (__global int*)Pointer, Scope, Semantics, true ); 819 } 820} 821 822#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 823 824#if defined(cl_khr_int64_base_atomics) 825long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p0i64_i32_i32, )( __private long *Pointer, int Scope, int Semantics ) 826{ 827 ulong orig = *Pointer; 828 *Pointer += 1; 829 return orig; 830} 831 832 833long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p1i64_i32_i32, )( __global long *Pointer, int Scope, int Semantics ) 834{ 835 atomic_operation_0op( __builtin_IB_atomic_inc_global_i64, ulong, (global int*)Pointer, Scope, Semantics, true ); 836} 837 838 839long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p3i64_i32_i32, )( __local long *Pointer, int Scope, int Semantics ) 840{ 841 return __intel_atomic_unary(true, Pointer, Scope, Semantics); 842} 843 844#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 845 846long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIIncrement, _p4i64_i32_i32, )( __generic long *Pointer, int Scope, int Semantics ) 847{ 848 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 849 { 850 return SPIRV_BUILTIN(AtomicIIncrement, _p3i64_i32_i32, )((__local long*)Pointer, Scope, Semantics ); 851 } 852 else 853 { 854 return SPIRV_BUILTIN(AtomicIIncrement, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics ); 855 } 856} 857 858#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 859#endif // defined(cl_khr_int64_base_atomics) 860 861// Atomic Decrement 862 863 864int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics ) 865{ 866 uint orig = *Pointer; 867 868 *Pointer -= 1; 869 870 return orig; 871} 872 873int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics ) 874{ 875 atomic_operation_0op( __builtin_IB_atomic_dec_global_i32, uint, (global int*)Pointer, Scope, Semantics, true ); 876} 877 878int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics ) 879{ 880 atomic_operation_0op( __builtin_IB_atomic_dec_local_i32, uint, (local int*)Pointer, Scope, Semantics, false ); 881} 882 883#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 884 885int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics ) 886{ 887 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 888 { 889 atomic_operation_0op( __builtin_IB_atomic_dec_local_i32, uint, (__local int*)Pointer, Scope, Semantics, false ); 890 } 891 else 892 { 893 atomic_operation_0op( __builtin_IB_atomic_dec_global_i32, uint, (__global int*)Pointer, Scope, Semantics, true ); 894 } 895} 896 897#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 898 899#if defined(cl_khr_int64_base_atomics) 900long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p0i64_i32_i32, )( __private long *Pointer, int Scope, int Semantics ) 901{ 902 ulong orig = *Pointer; 903 *Pointer -= 1; 904 return orig; 905} 906 907long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p1i64_i32_i32, )( __global long *Pointer, int Scope, int Semantics ) 908{ 909 atomic_operation_0op( __builtin_IB_atomic_dec_global_i64, ulong, (global long*)Pointer, Scope, Semantics, true ); 910} 911 912long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p3i64_i32_i32, )( __local long *Pointer, int Scope, int Semantics ) 913{ 914 return __intel_atomic_unary(false, Pointer, Scope, Semantics); 915} 916 917#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 918 919long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIDecrement, _p4i64_i32_i32, )( __generic long *Pointer, int Scope, int Semantics ) 920{ 921 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 922 { 923 return SPIRV_BUILTIN(AtomicIDecrement, _p3i64_i32_i32, )( (__local long*)Pointer, Scope, Semantics ); 924 } 925 else 926 { 927 return SPIRV_BUILTIN(AtomicIDecrement, _p1i64_i32_i32, )( (__global long*)Pointer, Scope, Semantics ); 928 } 929} 930 931#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 932#endif // defined(cl_khr_int64_base_atomics) 933 934 935// Atomic IAdd 936 937 938int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 939{ 940 uint orig = *Pointer; 941 942 *Pointer += Value; 943 944 return orig; 945} 946 947 948int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 949{ 950 atomic_operation_1op( __builtin_IB_atomic_add_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 951} 952 953int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 954{ 955 atomic_operation_1op( __builtin_IB_atomic_add_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 956} 957 958#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 959 960int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 961{ 962 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 963 { 964 atomic_operation_1op( __builtin_IB_atomic_add_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 965 } 966 else 967 { 968 atomic_operation_1op( __builtin_IB_atomic_add_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 969 } 970} 971 972#if defined(cl_khr_int64_base_atomics) 973long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 974{ 975 ulong orig = *Pointer; 976 *Pointer += Value; 977 return orig; 978} 979 980long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 981{ 982 atomic_operation_1op( __builtin_IB_atomic_add_global_i64, ulong, (__global ulong*)Pointer, Scope, Semantics, Value, true ); 983} 984 985long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 986{ 987 return __intel_atomic_binary(ATOMIC_IADD64, Pointer, Scope, Semantics, Value); 988} 989 990 991long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicIAdd, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 992{ 993 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 994 { 995 return SPIRV_BUILTIN(AtomicIAdd, _p3i64_i32_i32_i64, )((__local long*)Pointer, Scope, Semantics, Value); 996 } 997 else 998 { 999 return SPIRV_BUILTIN(AtomicIAdd, _p1i64_i32_i32_i64, )((__global long*)Pointer, Scope, Semantics, Value); 1000 } 1001} 1002 1003#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1004#endif // defined(cl_khr_int64_base_atomics) 1005 1006// Atomic ISub 1007 1008int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 1009{ 1010 uint orig = *Pointer; 1011 1012 *Pointer -= Value; 1013 1014 return orig; 1015} 1016 1017 1018int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 1019{ 1020 atomic_operation_1op( __builtin_IB_atomic_sub_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 1021} 1022 1023 1024int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 1025{ 1026 atomic_operation_1op( __builtin_IB_atomic_sub_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 1027} 1028 1029#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1030 1031int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 1032{ 1033 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1034 { 1035 atomic_operation_1op( __builtin_IB_atomic_sub_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 1036 } 1037 else 1038 { 1039 atomic_operation_1op( __builtin_IB_atomic_sub_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1040 } 1041} 1042 1043#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1044 1045#if defined(cl_khr_int64_base_atomics) 1046long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 1047{ 1048 ulong orig = *Pointer; 1049 *Pointer -= Value; 1050 return orig; 1051} 1052 1053 1054long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 1055{ 1056 atomic_operation_1op( __builtin_IB_atomic_sub_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true ); 1057} 1058 1059 1060long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 1061{ 1062 return __intel_atomic_binary(ATOMIC_SUB64, Pointer, Scope, Semantics, Value); 1063} 1064 1065#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1066 1067long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicISub, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 1068{ 1069 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1070 { 1071 return SPIRV_BUILTIN(AtomicISub, _p3i64_i32_i32_i64, )((__local long*)Pointer, Scope, Semantics, Value ); 1072 } 1073 else 1074 { 1075 return SPIRV_BUILTIN(AtomicISub, _p1i64_i32_i32_i64, )((__global long*)Pointer, Scope, Semantics, Value ); 1076 } 1077} 1078 1079#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1080 1081#endif // defined(cl_khr_int64_base_atomics) 1082 1083 1084// Atomic SMin 1085 1086 1087int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value) 1088{ 1089 int orig = *Pointer; 1090 *Pointer = ( orig < Value ) ? orig : Value; 1091 return orig; 1092} 1093 1094int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value) 1095{ 1096 atomic_operation_1op( __builtin_IB_atomic_min_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1097} 1098 1099int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value) 1100{ 1101 atomic_operation_1op( __builtin_IB_atomic_min_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 1102} 1103 1104#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1105 1106int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value) 1107{ 1108 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1109 { 1110 atomic_operation_1op( __builtin_IB_atomic_min_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 1111 } 1112 else 1113 { 1114 atomic_operation_1op( __builtin_IB_atomic_min_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1115 } 1116} 1117 1118#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1119 1120#if defined(cl_khr_int64_extended_atomics) 1121 1122long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value) 1123{ 1124 long orig = *Pointer; 1125 *Pointer = ( orig < Value ) ? orig : Value; 1126 return orig; 1127} 1128 1129long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value) 1130{ 1131 atomic_operation_1op( __builtin_IB_atomic_min_global_i64, ulong, (__global long*)Pointer, Scope, Semantics, Value, true ); 1132} 1133 1134long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value) 1135{ 1136 return __intel_atomic_binary(ATOMIC_IMIN64, (volatile __local long *)Pointer, Scope, Semantics, Value); 1137} 1138 1139#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1140 1141long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMin, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value) 1142{ 1143 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1144 { 1145 return SPIRV_BUILTIN(AtomicSMin, _p3i64_i32_i32_i64, )((__local int*)Pointer, Scope, Semantics, Value ); 1146 } 1147 else 1148 { 1149 return SPIRV_BUILTIN(AtomicSMin, _p1i64_i32_i32_i64, )((__global int*)Pointer, Scope, Semantics, Value ); 1150 } 1151} 1152 1153#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1154 1155#endif // defined(cl_khr_int64_extended_atomics) 1156 1157uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p0i32_i32_i32_i32, )( __private uint *Pointer, int Scope, int Semantics, uint Value ) 1158{ 1159 uint orig = *Pointer; 1160 1161 *Pointer = ( orig < Value ) ? orig : Value; 1162 1163 return orig; 1164} 1165 1166uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p1i32_i32_i32_i32, )( __global uint *Pointer, int Scope, int Semantics, uint Value ) 1167{ 1168 atomic_operation_1op( __builtin_IB_atomic_min_global_u32, uint, Pointer, Scope, Semantics, Value, true ); 1169} 1170 1171uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p3i32_i32_i32_i32, )( __local uint *Pointer, int Scope, int Semantics, uint Value ) 1172{ 1173 atomic_operation_1op( __builtin_IB_atomic_min_local_u32, uint, Pointer, Scope, Semantics, Value, false ); 1174} 1175 1176#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1177 1178uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p4i32_i32_i32_i32, )( __generic uint *Pointer, int Scope, int Semantics, uint Value ) 1179{ 1180 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1181 { 1182 atomic_operation_1op( __builtin_IB_atomic_min_local_u32, uint, (__local uint*)Pointer, Scope, Semantics, Value, false ); 1183 } 1184 else 1185 { 1186 atomic_operation_1op( __builtin_IB_atomic_min_global_u32, uint, (__global uint*)Pointer, Scope, Semantics, Value, true ); 1187 } 1188} 1189 1190#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1191 1192#if defined(cl_khr_int64_extended_atomics) 1193 1194ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p0i64_i32_i32_i64, )( __private ulong *Pointer, int Scope, int Semantics, ulong Value ) 1195{ 1196 ulong orig = *Pointer; 1197 *Pointer = ( orig < Value ) ? orig : Value; 1198 return orig; 1199} 1200 1201ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p1i64_i32_i32_i64, )( __global ulong *Pointer, int Scope, int Semantics, ulong Value ) 1202{ 1203 atomic_operation_1op( __builtin_IB_atomic_min_global_u64, ulong, Pointer, Scope, Semantics, Value, true ); 1204} 1205 1206ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p3i64_i32_i32_i64, )( __local ulong *Pointer, int Scope, int Semantics, ulong Value ) 1207{ 1208 return __intel_atomic_binary(ATOMIC_UMIN64, Pointer, Scope, Semantics, Value); 1209} 1210 1211#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1212 1213ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMin, _p4i64_i32_i32_i64, )( __generic ulong *Pointer, int Scope, int Semantics, ulong Value ) 1214{ 1215 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1216 { 1217 return SPIRV_BUILTIN(AtomicUMin, _p3i64_i32_i32_i64, )( (__local ulong*)Pointer, Scope, Semantics, Value ); 1218 } 1219 else 1220 { 1221 return SPIRV_BUILTIN(AtomicUMin, _p1i64_i32_i32_i64, )( (__global ulong*)Pointer, Scope, Semantics, Value ); 1222 } 1223} 1224 1225#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1226 1227#endif // defined(cl_khr_int64_extended_atomics) 1228 1229// Atomic SMax 1230 1231 1232int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value) 1233{ 1234 int orig = *Pointer; 1235 *Pointer = ( orig > Value ) ? orig : Value; 1236 return orig; 1237} 1238 1239int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value) 1240{ 1241 atomic_operation_1op( __builtin_IB_atomic_max_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 1242} 1243 1244int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value) 1245{ 1246 atomic_operation_1op( __builtin_IB_atomic_max_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 1247} 1248 1249#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1250 1251int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value) 1252{ 1253 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1254 { 1255 atomic_operation_1op( __builtin_IB_atomic_max_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 1256 } 1257 else 1258 { 1259 atomic_operation_1op( __builtin_IB_atomic_max_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1260 } 1261} 1262 1263#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1264 1265#if defined(cl_khr_int64_extended_atomics) 1266 1267long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value) 1268{ 1269 long orig = *Pointer; 1270 *Pointer = ( orig > Value ) ? orig : Value; 1271 return orig; 1272} 1273 1274long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value) 1275{ 1276 atomic_operation_1op( __builtin_IB_atomic_max_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true ); 1277} 1278 1279long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value) 1280{ 1281 return __intel_atomic_binary(ATOMIC_IMAX64, (volatile __local long *)Pointer, Scope, Semantics, Value); 1282} 1283 1284#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1285 1286long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicSMax, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value) 1287{ 1288 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1289 { 1290 return SPIRV_BUILTIN(AtomicSMax, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value ); 1291 } 1292 else 1293 { 1294 return SPIRV_BUILTIN(AtomicSMax, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value ); 1295 } 1296} 1297 1298#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1299 1300#endif // defined(cl_khr_int64_extended_atomics) 1301 1302// Atomic UMax 1303 1304 1305uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p0i32_i32_i32_i32, )( __private uint *Pointer, int Scope, int Semantics, uint Value ) 1306{ 1307 uint orig = *Pointer; 1308 1309 *Pointer = ( orig > Value ) ? orig : Value; 1310 1311 return orig; 1312} 1313 1314uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p1i32_i32_i32_i32, )( __global uint *Pointer, int Scope, int Semantics, uint Value ) 1315{ 1316 atomic_operation_1op( __builtin_IB_atomic_max_global_u32, uint, Pointer, Scope, Semantics, Value, true ); 1317} 1318 1319uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p3i32_i32_i32_i32, )( __local uint *Pointer, int Scope, int Semantics, uint Value ) 1320{ 1321 atomic_operation_1op( __builtin_IB_atomic_max_local_u32, uint, Pointer, Scope, Semantics, Value, false ); 1322} 1323 1324#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1325 1326uint SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p4i32_i32_i32_i32, )( __generic uint *Pointer, int Scope, int Semantics, uint Value ) 1327{ 1328 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1329 { 1330 atomic_operation_1op( __builtin_IB_atomic_max_local_u32, uint, (__local uint*)Pointer, Scope, Semantics, Value, false ); 1331 } 1332 else 1333 { 1334 atomic_operation_1op( __builtin_IB_atomic_max_global_u32, uint, (__global uint*)Pointer, Scope, Semantics, Value, true ); 1335 } 1336} 1337 1338#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1339 1340#if defined(cl_khr_int64_extended_atomics) 1341 1342ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p0i64_i32_i32_i64, )( __private ulong *Pointer, int Scope, int Semantics, ulong Value ) 1343{ 1344 ulong orig = *Pointer; 1345 *Pointer = ( orig > Value ) ? orig : Value; 1346 return orig; 1347} 1348 1349ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p1i64_i32_i32_i64, )( __global ulong *Pointer, int Scope, int Semantics, ulong Value ) 1350{ 1351 atomic_operation_1op( __builtin_IB_atomic_max_global_u64, ulong, Pointer, Scope, Semantics, Value, true ); 1352} 1353 1354ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p3i64_i32_i32_i64, )( __local ulong *Pointer, int Scope, int Semantics, ulong Value ) 1355{ 1356 return __intel_atomic_binary(ATOMIC_UMAX64, Pointer, Scope, Semantics, Value); 1357} 1358 1359#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1360 1361ulong SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicUMax, _p4i64_i32_i32_i64, )( __generic ulong *Pointer, int Scope, int Semantics, ulong Value ) 1362{ 1363 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1364 { 1365 return SPIRV_BUILTIN(AtomicUMax, _p3i64_i32_i32_i64, )( (__local ulong*)Pointer, Scope, Semantics, Value ); 1366 } 1367 else 1368 { 1369 return SPIRV_BUILTIN(AtomicUMax, _p1i64_i32_i32_i64, )( (__global ulong*)Pointer, Scope, Semantics, Value ); 1370 } 1371} 1372 1373#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1374 1375#endif // defined(cl_khr_int64_extended_atomics) 1376 1377// Atomic And 1378 1379 1380int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 1381{ 1382 uint orig = *Pointer; 1383 *Pointer &= Value; 1384 return orig; 1385} 1386 1387int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 1388{ 1389 atomic_operation_1op( __builtin_IB_atomic_and_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 1390} 1391 1392int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 1393{ 1394 atomic_operation_1op( __builtin_IB_atomic_and_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 1395} 1396 1397#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1398 1399int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 1400{ 1401 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1402 { 1403 atomic_operation_1op( __builtin_IB_atomic_and_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 1404 } 1405 else 1406 { 1407 atomic_operation_1op( __builtin_IB_atomic_and_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1408 } 1409} 1410 1411#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1412 1413#if defined(cl_khr_int64_extended_atomics) 1414 1415long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 1416{ 1417 ulong orig = *Pointer; 1418 *Pointer &= Value; 1419 return orig; 1420} 1421 1422long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 1423{ 1424 atomic_operation_1op( __builtin_IB_atomic_and_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true ); 1425} 1426 1427long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 1428{ 1429 return __intel_atomic_binary(ATOMIC_AND64, Pointer, Scope, Semantics, Value); 1430} 1431 1432#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1433 1434long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicAnd, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 1435{ 1436 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1437 { 1438 return SPIRV_BUILTIN(AtomicAnd, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value ); 1439 } 1440 else 1441 { 1442 return SPIRV_BUILTIN(AtomicAnd, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value ); 1443 } 1444} 1445 1446#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1447 1448#endif // defined(cl_khr_int64_extended_atomics) 1449 1450// Atomic OR 1451 1452 1453int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 1454{ 1455 uint orig = *Pointer; 1456 *Pointer |= Value; 1457 return orig; 1458} 1459 1460int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 1461{ 1462 atomic_operation_1op( __builtin_IB_atomic_or_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 1463} 1464 1465int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 1466{ 1467 atomic_operation_1op( __builtin_IB_atomic_or_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 1468} 1469 1470#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1471 1472int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 1473{ 1474 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1475 { 1476 atomic_operation_1op( __builtin_IB_atomic_or_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 1477 } 1478 else 1479 { 1480 atomic_operation_1op( __builtin_IB_atomic_or_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1481 } 1482} 1483 1484#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1485 1486#if defined(cl_khr_int64_extended_atomics) 1487 1488long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 1489{ 1490 ulong orig = *Pointer; 1491 *Pointer |= Value; 1492 return orig; 1493} 1494 1495long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 1496{ 1497 atomic_operation_1op( __builtin_IB_atomic_or_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true ); 1498} 1499 1500long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 1501{ 1502 return __intel_atomic_binary(ATOMIC_OR64, Pointer, Scope, Semantics, Value); 1503} 1504 1505#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1506 1507long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicOr, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 1508{ 1509 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1510 { 1511 return SPIRV_BUILTIN(AtomicOr, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value ); 1512 } 1513 else 1514 { 1515 return SPIRV_BUILTIN(AtomicOr, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value ); 1516 } 1517} 1518 1519#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1520 1521#endif // defined(cl_khr_int64_extended_atomics) 1522 1523 1524// Atomic Xor 1525 1526 1527int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p0i32_i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics, int Value ) 1528{ 1529 uint orig = *Pointer; 1530 *Pointer ^= Value; 1531 return orig; 1532} 1533 1534int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p1i32_i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics, int Value ) 1535{ 1536 atomic_operation_1op( __builtin_IB_atomic_xor_global_i32, uint, (global int*)Pointer, Scope, Semantics, Value, true ); 1537} 1538 1539int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p3i32_i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics, int Value ) 1540{ 1541 atomic_operation_1op( __builtin_IB_atomic_xor_local_i32, uint, (local int*)Pointer, Scope, Semantics, Value, false ); 1542} 1543 1544#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1545 1546int SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p4i32_i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics, int Value ) 1547{ 1548 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1549 { 1550 atomic_operation_1op( __builtin_IB_atomic_xor_local_i32, uint, (__local int*)Pointer, Scope, Semantics, Value, false ); 1551 } 1552 else 1553 { 1554 atomic_operation_1op( __builtin_IB_atomic_xor_global_i32, uint, (__global int*)Pointer, Scope, Semantics, Value, true ); 1555 } 1556} 1557 1558#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1559 1560#if defined(cl_khr_int64_extended_atomics) 1561 1562long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p0i64_i32_i32_i64, )( __private long *Pointer, int Scope, int Semantics, long Value ) 1563{ 1564 ulong orig = *Pointer; 1565 *Pointer ^= Value; 1566 return orig; 1567} 1568 1569long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p1i64_i32_i32_i64, )( __global long *Pointer, int Scope, int Semantics, long Value ) 1570{ 1571 atomic_operation_1op( __builtin_IB_atomic_xor_global_i64, ulong, (global long*)Pointer, Scope, Semantics, Value, true ); 1572} 1573 1574long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p3i64_i32_i32_i64, )( __local long *Pointer, int Scope, int Semantics, long Value ) 1575{ 1576 return __intel_atomic_binary(ATOMIC_XOR64, Pointer, Scope, Semantics, Value); 1577} 1578 1579#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1580 1581long SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicXor, _p4i64_i32_i32_i64, )( __generic long *Pointer, int Scope, int Semantics, long Value ) 1582{ 1583 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1584 { 1585 return SPIRV_BUILTIN(AtomicXor, _p3i64_i32_i32_i64, )( (__local long*)Pointer, Scope, Semantics, Value ); 1586 } 1587 else 1588 { 1589 return SPIRV_BUILTIN(AtomicXor, _p1i64_i32_i32_i64, )( (__global long*)Pointer, Scope, Semantics, Value ); 1590 } 1591} 1592 1593#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1594 1595#endif // defined(cl_khr_int64_extended_atomics) 1596 1597// Atomic FlagTestAndSet 1598 1599 1600bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics ) 1601{ 1602 return (bool)SPIRV_BUILTIN(AtomicExchange, _p0i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE ); 1603} 1604 1605bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics ) 1606{ 1607 return (bool)SPIRV_BUILTIN(AtomicExchange, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE ); 1608} 1609 1610bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics ) 1611{ 1612 return (bool)SPIRV_BUILTIN(AtomicExchange, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE ); 1613} 1614 1615#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1616 1617bool SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagTestAndSet, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics ) 1618{ 1619 return (bool)SPIRV_BUILTIN(AtomicExchange, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_TRUE ); 1620} 1621 1622#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1623 1624 1625// Atomic FlagClear 1626 1627 1628void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p0i32_i32_i32, )( __private int *Pointer, int Scope, int Semantics ) 1629{ 1630 SPIRV_BUILTIN(AtomicStore, _p0i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE ); 1631} 1632 1633void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p1i32_i32_i32, )( __global int *Pointer, int Scope, int Semantics ) 1634{ 1635 SPIRV_BUILTIN(AtomicStore, _p1i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE ); 1636} 1637 1638void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p3i32_i32_i32, )( __local int *Pointer, int Scope, int Semantics ) 1639{ 1640 SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE ); 1641} 1642 1643#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1644 1645void SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFlagClear, _p4i32_i32_i32, )( __generic int *Pointer, int Scope, int Semantics ) 1646{ 1647 SPIRV_BUILTIN(AtomicStore, _p4i32_i32_i32_i32, )( Pointer, Scope, Semantics, ATOMIC_FLAG_FALSE ); 1648} 1649 1650#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1651 1652float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p0f32_i32_i32_f32, )( __private float *Pointer, int Scope, int Semantics, float Value) 1653{ 1654 float orig = *Pointer; 1655 *Pointer += Value; 1656 return orig; 1657} 1658 1659float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p1f32_i32_i32_f32, )( __global float *Pointer, int Scope, int Semantics, float Value) 1660{ 1661 if(__UseNativeFP32GlobalAtomicAdd) 1662 { 1663 atomic_operation_1op_as_float( __builtin_IB_atomic_add_global_f32, float, Pointer, Scope, Semantics, Value, true ); 1664 } 1665 // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics. 1666 float orig; 1667 float desired; 1668 do { 1669 orig = as_float(SPIRV_BUILTIN(AtomicLoad, _p1i32_i32_i32, )((__global int*)Pointer, Scope, Semantics)); 1670 desired = orig + Value; 1671 } while(as_int(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i32_i32_i32_i32_i32_i32, )( 1672 (__global int*)Pointer, Scope, Semantics, Semantics, 1673 as_int(desired), as_int(orig))); 1674 return orig; 1675} 1676 1677float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p3f32_i32_i32_f32, )( __local float *Pointer, int Scope, int Semantics, float Value) 1678{ 1679 float orig; 1680 FENCE_PRE_OP(Scope, Semantics, false) 1681 LOCAL_SPINLOCK_START() 1682 orig = *Pointer; 1683 *Pointer = orig + Value; 1684 LOCAL_SPINLOCK_END() 1685 FENCE_POST_OP(Scope, Semantics, false) 1686 return orig; 1687} 1688 1689#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1690float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p4f32_i32_i32_f32, )( __generic float *Pointer, int Scope, int Semantics, float Value) 1691{ 1692 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1693 { 1694 return SPIRV_BUILTIN(AtomicFAddEXT, _p3f32_i32_i32_f32, )((local float*)Pointer, Scope, Semantics, Value); 1695 } 1696 else 1697 { 1698 return SPIRV_BUILTIN(AtomicFAddEXT, _p1f32_i32_i32_f32, )((global float*)Pointer, Scope, Semantics, Value); 1699 } 1700} 1701#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1702 1703double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p0f64_i32_i32_f64, )( __private double *Pointer, int Scope, int Semantics, double Value) 1704{ 1705 double orig = *Pointer; 1706 *Pointer += Value; 1707 return orig; 1708} 1709 1710double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p1f64_i32_i32_f64, )( __global double *Pointer, int Scope, int Semantics, double Value) 1711{ 1712 // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics. 1713 // Another important reason of using do-while loop emulation is to avoid HW Bug on XeHP SDV: 1714 // "NodeDSS works in fixed arbitration mode where writes are always prioritized over reads. 1715 // This is causing the IC read request to stall behind other pending write requests. 1716 // Since IC read is not progressing, the thread which acquired the lock is not proceeding 1717 // further to clear the lock and thus causing hang." 1718 // do-while loop emulation doesn't expose the HW issue since it reads 'Pointer' value inside a loop. 1719 double orig; 1720 double desired; 1721 do { 1722 orig = as_double(SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics)); 1723 desired = orig + Value; 1724 } while(as_long(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( 1725 (__global long*)Pointer, Scope, Semantics, Semantics, 1726 as_long(desired), as_long(orig))); 1727 return orig; 1728} 1729 1730double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p3f64_i32_i32_f64, )( __local double *Pointer, int Scope, int Semantics, double Value) 1731{ 1732 double orig; 1733 FENCE_PRE_OP(Scope, Semantics, false) 1734 LOCAL_SPINLOCK_START() 1735 orig = *Pointer; 1736 *Pointer = orig + Value; 1737 LOCAL_SPINLOCK_END() 1738 FENCE_POST_OP(Scope, Semantics, false) 1739 return orig; 1740} 1741 1742#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1743double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFAddEXT, _p4f64_i32_i32_f64, )( __generic double *Pointer, int Scope, int Semantics, double Value) 1744{ 1745 if(SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1746 { 1747 return SPIRV_BUILTIN(AtomicFAddEXT, _p3f64_i32_i32_f64, )((local double*)Pointer, Scope, Semantics, Value); 1748 } 1749 else 1750 { 1751 return SPIRV_BUILTIN(AtomicFAddEXT, _p1f64_i32_i32_f64, )((global double*)Pointer, Scope, Semantics, Value); 1752 } 1753} 1754#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1755 1756half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p0f16_i32_i32_f16, )( private half* Pointer, int Scope, int Semantics, half Value) 1757{ 1758 half orig = *Pointer; 1759 *Pointer = (orig < Value) ? orig : Value; 1760 return orig; 1761} 1762 1763half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p1f16_i32_i32_f16, )( global half* Pointer, int Scope, int Semantics, half Value) 1764{ 1765 if(__UseNativeFP16AtomicMinMax) 1766 { 1767 atomic_operation_1op_as_half( __builtin_IB_atomic_min_global_f16, half, Pointer, Scope, Semantics, Value, true ); 1768 } 1769 half orig; 1770 FENCE_PRE_OP(Scope, Semantics, true) 1771 GLOBAL_SPINLOCK_START() 1772 orig = *Pointer; 1773 *Pointer = (orig < Value) ? orig : Value; 1774 GLOBAL_SPINLOCK_END() 1775 FENCE_POST_OP(Scope, Semantics, true) 1776 return orig; 1777} 1778 1779half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p3f16_i32_i32_f16, )( local half* Pointer, int Scope, int Semantics, half Value) 1780{ 1781 if(__UseNativeFP16AtomicMinMax) 1782 { 1783 atomic_operation_1op_as_half( __builtin_IB_atomic_min_local_f16, half, Pointer, Scope, Semantics, Value, false ); 1784 } 1785 half orig; 1786 FENCE_PRE_OP(Scope, Semantics, false) 1787 LOCAL_SPINLOCK_START() 1788 orig = *Pointer; 1789 *Pointer = (orig < Value) ? orig : Value; 1790 LOCAL_SPINLOCK_END() 1791 FENCE_POST_OP(Scope, Semantics, false) 1792 return orig; 1793} 1794 1795#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1796half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p4f16_i32_i32_f16, )( generic half* Pointer, int Scope, int Semantics, half Value) 1797{ 1798 if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1799 { 1800 return SPIRV_BUILTIN(AtomicFMinEXT, _p3f16_i32_i32_f16, )((__local half*)Pointer, Scope, Semantics, Value); 1801 } 1802 else 1803 { 1804 return SPIRV_BUILTIN(AtomicFMinEXT, _p1f16_i32_i32_f16, )((__global half*)Pointer, Scope, Semantics, Value); 1805 } 1806} 1807#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1808 1809float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p0f32_i32_i32_f32, )( private float* Pointer, int Scope, int Semantics, float Value) 1810{ 1811 float orig = *Pointer; 1812 *Pointer = (orig < Value) ? orig : Value; 1813 return orig; 1814} 1815 1816float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p1f32_i32_i32_f32, )( global float* Pointer, int Scope, int Semantics, float Value) 1817{ 1818 atomic_operation_1op_as_float(__builtin_IB_atomic_min_global_f32, float, Pointer, Scope, Semantics, Value, true); 1819} 1820 1821float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p3f32_i32_i32_f32, )( local float* Pointer, int Scope, int Semantics, float Value) 1822{ 1823 atomic_operation_1op_as_float(__builtin_IB_atomic_min_local_f32, float, Pointer, Scope, Semantics, Value, false); 1824} 1825 1826#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1827float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p4f32_i32_i32_f32, )( generic float* Pointer, int Scope, int Semantics, float Value) 1828{ 1829 if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1830 { 1831 return SPIRV_BUILTIN(AtomicFMinEXT, _p3f32_i32_i32_f32, )((__local float*)Pointer, Scope, Semantics, Value); 1832 } 1833 else 1834 { 1835 return SPIRV_BUILTIN(AtomicFMinEXT, _p1f32_i32_i32_f32, )((__global float*)Pointer, Scope, Semantics, Value); 1836 } 1837} 1838#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1839 1840double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p0f64_i32_i32_f64, )( private double* Pointer, int Scope, int Semantics, double Value) 1841{ 1842 double orig = *Pointer; 1843 *Pointer = (orig < Value) ? orig : Value; 1844 return orig; 1845} 1846 1847double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p1f64_i32_i32_f64, )( global double* Pointer, int Scope, int Semantics, double Value) 1848{ 1849 // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics. 1850 // Another important reason of using do-while loop emulation is to avoid HW Bug on XeHP SDV: 1851 // "NodeDSS works in fixed arbitration mode where writes are always prioritized over reads. 1852 // This is causing the IC read request to stall behind other pending write requests. 1853 // Since IC read is not progressing, the thread which acquired the lock is not proceeding 1854 // further to clear the lock and thus causing hang." 1855 // do-while loop emulation doesn't expose the HW issue since it reads 'Pointer' value inside a loop. 1856 double orig; 1857 double desired; 1858 do { 1859 orig = as_double(SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics)); 1860 desired = ( orig < Value ) ? orig : Value; 1861 } while(as_long(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( 1862 (__global long*)Pointer, Scope, Semantics, Semantics, 1863 as_long(desired), as_long(orig))); 1864 return orig; 1865} 1866 1867double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p3f64_i32_i32_f64, )( local double* Pointer, int Scope, int Semantics, double Value) 1868{ 1869 double orig; 1870 FENCE_PRE_OP(Scope, Semantics, false) 1871 LOCAL_SPINLOCK_START() 1872 orig = *Pointer; 1873 *Pointer = (orig < Value) ? orig : Value; 1874 LOCAL_SPINLOCK_END() 1875 FENCE_POST_OP(Scope, Semantics, false) 1876 return orig; 1877} 1878 1879#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1880double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMinEXT, _p4f64_i32_i32_f64, )( generic double* Pointer, int Scope, int Semantics, double Value) 1881{ 1882 if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1883 { 1884 return SPIRV_BUILTIN(AtomicFMinEXT, _p3f64_i32_i32_f64, )((__local double*)Pointer, Scope, Semantics, Value); 1885 } 1886 else 1887 { 1888 return SPIRV_BUILTIN(AtomicFMinEXT, _p1f64_i32_i32_f64, )((__global double*)Pointer, Scope, Semantics, Value); 1889 } 1890} 1891#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1892 1893half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p0f16_i32_i32_f16, )( private half* Pointer, int Scope, int Semantics, half Value) 1894{ 1895 half orig = *Pointer; 1896 *Pointer = (orig > Value) ? orig : Value; 1897 return orig; 1898} 1899 1900half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p1f16_i32_i32_f16, )( global half* Pointer, int Scope, int Semantics, half Value) 1901{ 1902 if(__UseNativeFP16AtomicMinMax) 1903 { 1904 atomic_operation_1op_as_half( __builtin_IB_atomic_max_global_f16, half, Pointer, Scope, Semantics, Value, true ); 1905 } 1906 half orig; 1907 FENCE_PRE_OP(Scope, Semantics, true) 1908 GLOBAL_SPINLOCK_START() 1909 orig = *Pointer; 1910 *Pointer = (orig > Value) ? orig : Value; 1911 GLOBAL_SPINLOCK_END() 1912 FENCE_POST_OP(Scope, Semantics, true) 1913 return orig; 1914} 1915 1916half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p3f16_i32_i32_f16, )( local half* Pointer, int Scope, int Semantics, half Value) 1917{ 1918 if(__UseNativeFP16AtomicMinMax) 1919 { 1920 atomic_operation_1op_as_half( __builtin_IB_atomic_max_local_f16, half, Pointer, Scope, Semantics, Value, false ); 1921 } 1922 half orig; 1923 FENCE_PRE_OP(Scope, Semantics, false) 1924 LOCAL_SPINLOCK_START() 1925 orig = *Pointer; 1926 *Pointer = (orig > Value) ? orig : Value; 1927 LOCAL_SPINLOCK_END() 1928 FENCE_POST_OP(Scope, Semantics, false) 1929 return orig; 1930} 1931 1932#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1933half SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p4f16_i32_i32_f16, )( generic half* Pointer, int Scope, int Semantics, half Value) 1934{ 1935 if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1936 { 1937 return SPIRV_BUILTIN(AtomicFMaxEXT, _p3f16_i32_i32_f16, )((__local half*)Pointer, Scope, Semantics, Value); 1938 } 1939 else 1940 { 1941 return SPIRV_BUILTIN(AtomicFMaxEXT, _p1f16_i32_i32_f16, )((__global half*)Pointer, Scope, Semantics, Value); 1942 } 1943} 1944#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1945 1946float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p0f32_i32_i32_f32, )( private float* Pointer, int Scope, int Semantics, float Value) 1947{ 1948 float orig = *Pointer; 1949 *Pointer = (orig > Value) ? orig : Value; 1950 return orig; 1951} 1952 1953float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p1f32_i32_i32_f32, )( global float* Pointer, int Scope, int Semantics, float Value) 1954{ 1955 atomic_operation_1op_as_float(__builtin_IB_atomic_max_global_f32, float, Pointer, Scope, Semantics, Value, true); 1956} 1957 1958float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p3f32_i32_i32_f32, )( local float* Pointer, int Scope, int Semantics, float Value) 1959{ 1960 atomic_operation_1op_as_float(__builtin_IB_atomic_max_local_f32, float, Pointer, Scope, Semantics, Value, false); 1961} 1962 1963#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1964float SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p4f32_i32_i32_f32, )( generic float* Pointer, int Scope, int Semantics, float Value) 1965{ 1966 if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 1967 { 1968 return SPIRV_BUILTIN(AtomicFMaxEXT, _p3f32_i32_i32_f32, )((__local float*)Pointer, Scope, Semantics, Value); 1969 } 1970 else 1971 { 1972 return SPIRV_BUILTIN(AtomicFMaxEXT, _p1f32_i32_i32_f32, )((__global float*)Pointer, Scope, Semantics, Value); 1973 } 1974} 1975#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 1976 1977double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p0f64_i32_i32_f64, )( private double* Pointer, int Scope, int Semantics, double Value) 1978{ 1979 double orig = *Pointer; 1980 *Pointer = (orig > Value) ? orig : Value; 1981 return orig; 1982} 1983 1984double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p1f64_i32_i32_f64, )( global double* Pointer, int Scope, int Semantics, double Value) 1985{ 1986 // We don't use SPINLOCK_START and SPINLOCK_END emulation here, since do-while loop is more efficient for global atomics. 1987 // Another important reason of using do-while loop emulation is to avoid HW Bug on XeHP SDV: 1988 // "NodeDSS works in fixed arbitration mode where writes are always prioritized over reads. 1989 // This is causing the IC read request to stall behind other pending write requests. 1990 // Since IC read is not progressing, the thread which acquired the lock is not proceeding 1991 // further to clear the lock and thus causing hang." 1992 // do-while loop emulation doesn't expose the HW issue since it reads 'Pointer' value inside a loop. 1993 double orig; 1994 double desired; 1995 do { 1996 orig = as_double(SPIRV_BUILTIN(AtomicLoad, _p1i64_i32_i32, )((__global long*)Pointer, Scope, Semantics)); 1997 desired = ( orig > Value ) ? orig : Value; 1998 } while(as_long(orig) != SPIRV_BUILTIN(AtomicCompareExchange, _p1i64_i32_i32_i32_i64_i64, )( 1999 (__global long*)Pointer, Scope, Semantics, Semantics, 2000 as_long(desired), as_long(orig))); 2001 return orig; 2002} 2003 2004double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p3f64_i32_i32_f64, )( local double* Pointer, int Scope, int Semantics, double Value) 2005{ 2006 double orig; 2007 FENCE_PRE_OP(Scope, Semantics, false) 2008 LOCAL_SPINLOCK_START() 2009 orig = *Pointer; 2010 *Pointer = (orig > Value) ? orig : Value; 2011 LOCAL_SPINLOCK_END() 2012 FENCE_POST_OP(Scope, Semantics, false) 2013 return orig; 2014} 2015 2016#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 2017double SPIRV_OVERLOADABLE SPIRV_BUILTIN(AtomicFMaxEXT, _p4f64_i32_i32_f64, )( generic double* Pointer, int Scope, int Semantics, double Value) 2018{ 2019 if (SPIRV_BUILTIN(GenericCastToPtrExplicit, _p3i8_p4i8_i32, _ToLocal)(__builtin_astype((Pointer), __generic char*), StorageWorkgroup)) 2020 { 2021 return SPIRV_BUILTIN(AtomicFMaxEXT, _p3f64_i32_i32_f64, )((__local double*)Pointer, Scope, Semantics, Value); 2022 } 2023 else 2024 { 2025 return SPIRV_BUILTIN(AtomicFMaxEXT, _p1f64_i32_i32_f64, )((__global double*)Pointer, Scope, Semantics, Value); 2026 } 2027} 2028#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 2029 2030#undef ATOMIC_FLAG_FALSE 2031#undef ATOMIC_FLAG_TRUE 2032 2033#define KMP_LOCK_FREE 0 2034#define KMP_LOCK_BUSY 1 2035 2036void __builtin_IB_kmp_acquire_lock(int *lock) 2037{ 2038 volatile atomic_uint *lck = (volatile atomic_uint *)lock; 2039 uint expected = KMP_LOCK_FREE; 2040 while (atomic_load_explicit(lck, memory_order_relaxed) != KMP_LOCK_FREE || 2041 !atomic_compare_exchange_strong_explicit(lck, &expected, KMP_LOCK_BUSY, 2042 memory_order_acquire, 2043 memory_order_relaxed)) { 2044 expected = KMP_LOCK_FREE; 2045 } 2046} 2047 2048void __builtin_IB_kmp_release_lock(int *lock) 2049{ 2050 volatile atomic_uint *lck = (volatile atomic_uint *)lock; 2051 atomic_store_explicit(lck, KMP_LOCK_FREE, memory_order_release); 2052} 2053 2054#undef KMP_LOCK_FREE 2055#undef KMP_LOCK_BUSY 2056 2057#undef SEMANTICS_NEED_FENCE 2058#undef FENCE_PRE_OP 2059#undef FENCE_POST_OP 2060#undef SPINLOCK_START 2061#undef SPINLOCK_END 2062 2063#undef atomic_operation_1op 2064#undef atomic_operation_0op 2065#undef atomic_cmpxhg 2066