1/*========================== begin_copyright_notice ============================ 2 3Copyright (C) 2017-2021 Intel Corporation 4 5SPDX-License-Identifier: MIT 6 7============================= end_copyright_notice ===========================*/ 8 9#include "../Languages/OpenCL/IBiF_SPIRV_Utils.cl" 10 11extern __constant int __OptDisable; 12 13// EmitVISAPass support for __builtin_IB_memfence/__builtin_IB_typedmemfence requires some arguments to 14// be constants as those are used to prepare a message descriptor, so must be known at compile time. 15// To assure that all the arguments are constants for O0 path, there is a special function 16// marked with __attribute__((optnone)) which implements seperate call instruction created for each 17// arguments configuration. 18 19// MEMFENCE IMPLEMENTATION 20 21void __attribute__((optnone)) __intel_memfence_optnone(bool flushRW, bool isGlobal, bool invalidateL1) 22{ 23#define MEMFENCE_IF(V1, V5, V6) \ 24if (flushRW == V1 && isGlobal == V5 && invalidateL1 == V6) \ 25{ \ 26 __builtin_IB_memfence(true, V1, false, false, false, V5, V6); \ 27} 28 29 MEMFENCE_IF(false, false, false) 30 else MEMFENCE_IF(false, false, true) 31 else MEMFENCE_IF(false, true, false) 32 else MEMFENCE_IF(false, true, true) 33 else MEMFENCE_IF(true, false, false) 34 else MEMFENCE_IF(true, false, true) 35 else MEMFENCE_IF(true, true, false) 36 else MEMFENCE_IF(true, true, true) 37 38#undef MEMFENCE_IF 39} 40void __intel_memfence(bool flushRW, bool isGlobal, bool invalidateL1) 41{ 42 __builtin_IB_memfence(true, flushRW, false, false, false, isGlobal, invalidateL1); 43} 44 45void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1) 46{ 47 if (__OptDisable) 48 __intel_memfence_optnone(flushRW, isGlobal, invalidateL1); 49 else 50 __intel_memfence(flushRW, isGlobal, invalidateL1); 51} 52 53// TYPEDMEMFENCE IMPLEMENTATION 54 55void __attribute__((optnone)) __intel_typedmemfence_optnone(bool invalidateL1) 56{ 57 if (invalidateL1) 58 __builtin_IB_typedmemfence(true); 59 else 60 __builtin_IB_typedmemfence(false); 61} 62 63void __intel_typedmemfence(bool invalidateL1) 64{ 65 __builtin_IB_typedmemfence(invalidateL1); 66} 67 68void __intel_typedmemfence_handler(bool invalidateL1) 69{ 70 if (__OptDisable) 71 __intel_typedmemfence_optnone(invalidateL1); 72 else 73 __intel_typedmemfence(invalidateL1); 74} 75 76// Barrier Instructions 77 78static void __intel_atomic_work_item_fence( Scope_t Memory, uint Semantics ) 79{ 80 bool fence = Semantics & ( Acquire | Release | AcquireRelease | SequentiallyConsistent ); 81 82 bool invalidateL1 = Semantics & ( Acquire | AcquireRelease | SequentiallyConsistent ); 83 84 // We always need to 'fence' image memory (aka, flush caches, drain pipelines) 85 fence |= ( Semantics & ImageMemory ); 86 87 if (fence) 88 { 89 if (Semantics & ImageMemory) 90 { 91 // An image fence requires a fence with R/W invalidate (L3 flush) + a flush 92 // of the sampler cache 93 __intel_typedmemfence_handler(invalidateL1); 94 } 95 // A global/local memory fence requires a hardware fence in general, 96 // although on some platforms they may be elided; platform-specific checks are performed in codegen 97 if (Semantics & WorkgroupMemory) 98 { 99 __intel_memfence_handler(false, false, false); 100 } 101 if (Semantics & CrossWorkgroupMemory) 102 { 103 bool flushL3 = Memory == Device || Memory == CrossDevice; 104 __intel_memfence_handler(flushL3, true, invalidateL1); 105 } 106 } 107} 108 109void SPIRV_OVERLOADABLE SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(int Execution, int Memory, int Semantics) 110{ 111 if (Execution != Subgroup) 112 { 113 // sub group barrier requires no fence 114 __intel_atomic_work_item_fence( Memory, Semantics ); 115 } 116 117 if( Execution <= Workgroup ) 118 { 119 __builtin_IB_thread_group_barrier(); 120 } 121 else if( Execution == Subgroup ) 122 { 123 // nothing will be emited but we need to prevent optimization spliting control flow 124 __builtin_IB_sub_group_barrier(); 125 } 126} 127 128void SPIRV_OVERLOADABLE SPIRV_BUILTIN(MemoryBarrier, _i32_i32, )(int Memory, int Semantics) 129{ 130 __intel_atomic_work_item_fence( Memory, Semantics ); 131} 132 133 134// Named Barrier 135 136void __intel_getInitializedNamedBarrierArray(local uint* id) 137{ 138 *id = 0; 139 SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )( Workgroup, 0, SequentiallyConsistent | WorkgroupMemory ); 140} 141 142bool __intel_is_first_work_group_item( void ); 143 144local __namedBarrier* __builtin_spirv_OpNamedBarrierInitialize_i32_p3__namedBarrier_p3i32(int SubGroupCount, local __namedBarrier* nb_array, local uint* id) 145{ 146 local __namedBarrier* NB = &nb_array[*id]; 147 NB->count = SubGroupCount; 148 NB->orig_count = SubGroupCount; 149 NB->inc = 0; 150 SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )( Workgroup, 0, SequentiallyConsistent | WorkgroupMemory ); 151 if (__intel_is_first_work_group_item()) 152 { 153 (*id)++; 154 } 155 SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )( Workgroup, 0, SequentiallyConsistent | WorkgroupMemory ); 156 return NB; 157} 158 159 160static INLINE OVERLOADABLE 161uint AtomicCompareExchange(local uint *Pointer, uint Scope, uint Equal, uint Unequal, uint Value, uint Comparator) 162{ 163 return SPIRV_BUILTIN(AtomicCompareExchange, _p3i32_i32_i32_i32_i32_i32, )((local int*)Pointer, Scope, Equal, Unequal, Value, Comparator); 164} 165 166static INLINE 167uint SubgroupLocalId() 168{ 169 return SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); 170} 171 172static INLINE OVERLOADABLE 173uint AtomicLoad(local uint *Pointer, uint Scope, uint Semantics) 174{ 175 return SPIRV_BUILTIN(AtomicLoad, _p3i32_i32_i32, )((local int*)Pointer, Scope, Semantics); 176} 177 178static INLINE OVERLOADABLE 179void AtomicStore(local uint *Pointer, uint Scope, uint Semantics, uint Value) 180{ 181 SPIRV_BUILTIN(AtomicStore, _p3i32_i32_i32_i32, )((local int*)Pointer, Scope, Semantics, Value); 182} 183 184static INLINE OVERLOADABLE 185uint AtomicInc(local uint *Pointer, uint Scope, uint Semantics) 186{ 187 return SPIRV_BUILTIN(AtomicIIncrement, _p3i32_i32_i32, )((local int*)Pointer, Scope, Semantics); 188} 189 190static INLINE 191uint Broadcast(uint Execution, uint Value, uint3 LocalId) 192{ 193 return SPIRV_BUILTIN(GroupBroadcast, _i32_i32_v3i32, )(Execution, as_int(Value), as_int3(LocalId)); 194} 195 196static INLINE OVERLOADABLE 197uint SubgroupAtomicCompareExchange(local uint *Pointer, uint Scope, uint Equal, uint Unequal, uint Value, uint Comparator) 198{ 199 uint result = 0; 200 if (SubgroupLocalId() == 0) 201 result = AtomicCompareExchange((volatile local uint*)Pointer, Scope, Equal, Unequal, Value, Comparator); 202 result = Broadcast(Subgroup, result, (uint3)0); 203 return result; 204} 205 206static INLINE OVERLOADABLE 207uint SubgroupAtomicInc(local uint *Pointer, uint Scope, uint Semantics) 208{ 209 uint result = 0; 210 if (SubgroupLocalId() == 0) 211 result = AtomicInc((volatile local uint*)Pointer, Scope, Semantics); 212 result = Broadcast(Subgroup, result, (uint3)0); 213 return result; 214} 215 216static void MemoryBarrier(Scope_t Memory, uint Semantics) 217{ 218 SPIRV_BUILTIN(MemoryBarrier, _i32_i32, )(Memory, Semantics); 219} 220 221void __builtin_spirv_OpMemoryNamedBarrier_p3__namedBarrier_i32_i32(local __namedBarrier* NB,Scope_t Memory, uint Semantics) 222{ 223 const uint AtomSema = SequentiallyConsistent | WorkgroupMemory; 224 while (1) 225 { 226 const uint cnt = AtomicLoad(&NB->count, Workgroup, AtomSema); 227 if (cnt > 0) 228 { 229 uint before = SubgroupAtomicCompareExchange(&NB->count, Workgroup, AtomSema, AtomSema, cnt - 1, cnt); 230 if (before == cnt) 231 { 232 break; 233 } 234 } 235 } 236 237 while(AtomicLoad(&NB->count, Workgroup, AtomSema) > 0); 238 MemoryBarrier(Memory, Semantics); 239 uint inc = SubgroupAtomicInc(&NB->inc, Workgroup, AtomSema); 240 if(inc == ((NB->orig_count) - 1)) 241 { 242 AtomicStore(&NB->inc, Workgroup, AtomSema, 0); 243 AtomicStore(&NB->count, Workgroup, AtomSema, NB->orig_count); 244 } 245} 246void __builtin_spirv_OpMemoryNamedBarrierWrapperOCL_p3__namedBarrier_i32(local __namedBarrier* barrier, cl_mem_fence_flags flags) 247{ 248 __builtin_spirv_OpMemoryNamedBarrier_p3__namedBarrier_i32_i32(barrier, Workgroup, AcquireRelease | get_spirv_mem_fence(flags)); 249} 250 251void __builtin_spirv_OpMemoryNamedBarrierWrapperOCL_p3__namedBarrier_i32_i32(local __namedBarrier* barrier, cl_mem_fence_flags flags, memory_scope scope) 252{ 253 __builtin_spirv_OpMemoryNamedBarrier_p3__namedBarrier_i32_i32(barrier, get_spirv_mem_scope(scope), AcquireRelease | get_spirv_mem_fence(flags)); 254} 255 256 257