1 //===- OCLUtil.h - OCL Utilities declarations -------------------*- C++ -*-===//
2 //
3 //                     The LLVM/SPIRV Translator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 // Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved.
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal with the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
16 //
17 // Redistributions of source code must retain the above copyright notice,
18 // this list of conditions and the following disclaimers.
19 // Redistributions in binary form must reproduce the above copyright notice,
20 // this list of conditions and the following disclaimers in the documentation
21 // and/or other materials provided with the distribution.
22 // Neither the names of Advanced Micro Devices, Inc., nor the names of its
23 // contributors may be used to endorse or promote products derived from this
24 // Software without specific prior written permission.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
31 // THE SOFTWARE.
32 //
33 //===----------------------------------------------------------------------===//
34 //
35 // This file declares OCL utility functions.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #ifndef SPIRV_OCLUTIL_H
40 #define SPIRV_OCLUTIL_H
41 
42 #include "SPIRVInternal.h"
43 #include "llvm/ADT/SmallString.h"
44 #include "llvm/IR/IRBuilder.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/Support/Path.h"
47 
48 #include <atomic>
49 #include <functional>
50 #include <tuple>
51 #include <type_traits>
52 #include <utility>
53 using namespace SPIRV;
54 using namespace llvm;
55 using namespace spv;
56 
57 namespace OCLUtil {
58 
59 ///////////////////////////////////////////////////////////////////////////////
60 //
61 // Enums
62 //
63 ///////////////////////////////////////////////////////////////////////////////
64 
65 enum OCLMemFenceKind {
66   OCLMF_Local = 1,
67   OCLMF_Global = 2,
68   OCLMF_Image = 4,
69 };
70 
71 // This enum declares extra constants for OpenCL mem_fence flag. It includes
72 // combinations of local/global/image flags.
73 enum OCLMemFenceExtendedKind {
74   OCLMFEx_Local = OCLMF_Local,
75   OCLMFEx_Global = OCLMF_Global,
76   OCLMFEx_Local_Global = OCLMF_Global | OCLMF_Local,
77   OCLMFEx_Image = OCLMF_Image,
78   OCLMFEx_Image_Local = OCLMF_Image | OCLMF_Local,
79   OCLMFEx_Image_Global = OCLMF_Image | OCLMF_Global,
80   OCLMFEx_Image_Local_Global = OCLMF_Image | OCLMF_Global | OCLMF_Local,
81 };
82 
83 enum OCLScopeKind {
84   OCLMS_work_item,
85   OCLMS_work_group,
86   OCLMS_device,
87   OCLMS_all_svm_devices,
88   OCLMS_sub_group,
89 };
90 
91 // The enum below declares constants corresponding to memory synchronization
92 // operations constants defined in
93 // https://www.khronos.org/registry/OpenCL/sdk/2.1/docs/man/xhtml/memory_order.html
94 // To avoid any inconsistence here, constants are explicitly initialized with
95 // the corresponding constants from 'std::memory_order' enum.
96 enum OCLMemOrderKind {
97   OCLMO_relaxed = std::memory_order::memory_order_relaxed,
98   OCLMO_acquire = std::memory_order::memory_order_acquire,
99   OCLMO_release = std::memory_order::memory_order_release,
100   OCLMO_acq_rel = std::memory_order::memory_order_acq_rel,
101   OCLMO_seq_cst = std::memory_order::memory_order_seq_cst
102 };
103 
104 enum IntelFPGAMemoryAccessesVal {
105   BurstCoalesce = 0x1,
106   CacheSizeFlag = 0x2,
107   DontStaticallyCoalesce = 0x4,
108   PrefetchFlag = 0x8
109 };
110 
111 ///////////////////////////////////////////////////////////////////////////////
112 //
113 // Types
114 //
115 ///////////////////////////////////////////////////////////////////////////////
116 
117 typedef SPIRVMap<OCLMemFenceKind, MemorySemanticsMask> OCLMemFenceMap;
118 
119 typedef SPIRVMap<OCLMemFenceExtendedKind, MemorySemanticsMask>
120     OCLMemFenceExtendedMap;
121 
122 typedef SPIRVMap<OCLMemOrderKind, unsigned, MemorySemanticsMask> OCLMemOrderMap;
123 
124 typedef SPIRVMap<OCLScopeKind, Scope> OCLMemScopeMap;
125 
126 typedef SPIRVMap<std::string, SPIRVGroupOperationKind>
127     SPIRSPIRVGroupOperationMap;
128 
129 typedef SPIRVMap<std::string, SPIRVFPRoundingModeKind>
130     SPIRSPIRVFPRoundingModeMap;
131 
132 typedef SPIRVMap<std::string, Op, SPIRVInstruction> OCLSPIRVBuiltinMap;
133 
134 class OCL12Builtin;
135 typedef SPIRVMap<std::string, Op, OCL12Builtin> OCL12SPIRVBuiltinMap;
136 
137 typedef SPIRVMap<std::string, SPIRVBuiltinVariableKind>
138     SPIRSPIRVBuiltinVariableMap;
139 
140 /// Tuple of literals for atomic_work_item_fence (flag, order, scope)
141 typedef std::tuple<unsigned, OCLMemOrderKind, OCLScopeKind>
142     AtomicWorkItemFenceLiterals;
143 
144 /// Tuple of literals for work_group_barrier or sub_group_barrier
145 ///     (flag, mem_scope, exec_scope)
146 typedef std::tuple<unsigned, OCLScopeKind, OCLScopeKind> BarrierLiterals;
147 
148 class OCLOpaqueType;
149 typedef SPIRVMap<std::string, Op, OCLOpaqueType> OCLOpaqueTypeOpCodeMap;
150 
151 /// Information for translating OCL builtin.
152 struct OCLBuiltinTransInfo {
153   std::string UniqName;
154   std::string MangledName;
155   std::string Postfix; // Postfix to be added
156   /// Postprocessor of operands
157   std::function<void(std::vector<Value *> &)> PostProc;
158   Type *RetTy;      // Return type of the translated function
159   bool IsRetSigned; // When RetTy is int, determines if extensions
160                     // on it should be a sext or zet.
OCLBuiltinTransInfoOCLBuiltinTransInfo161   OCLBuiltinTransInfo() : RetTy(nullptr), IsRetSigned(false) {
162     PostProc = [](std::vector<Value *> &) {};
163   }
164 };
165 
166 ///////////////////////////////////////////////////////////////////////////////
167 //
168 // Constants
169 //
170 ///////////////////////////////////////////////////////////////////////////////
171 namespace kOCLBuiltinName {
172 const static char All[] = "all";
173 const static char Any[] = "any";
174 #define _SPIRV_OP(x, y)                                                        \
175   const static char ArbitraryFloat##x##INTEL[] = "intel_arbitrary_float_" #y;
176 _SPIRV_OP(Cast, cast)
177 _SPIRV_OP(CastFromInt, cast_from_int)
178 _SPIRV_OP(CastToInt, cast_to_int)
179 _SPIRV_OP(Add, add)
180 _SPIRV_OP(Sub, sub)
181 _SPIRV_OP(Mul, mul)
182 _SPIRV_OP(Div, div)
183 _SPIRV_OP(GT, gt)
184 _SPIRV_OP(GE, ge)
185 _SPIRV_OP(LT, lt)
186 _SPIRV_OP(LE, le)
187 _SPIRV_OP(EQ, eq)
188 _SPIRV_OP(Recip, recip)
189 _SPIRV_OP(RSqrt, rsqrt)
190 _SPIRV_OP(Cbrt, cbrt)
191 _SPIRV_OP(Hypot, hypot)
192 _SPIRV_OP(Sqrt, sqrt)
193 _SPIRV_OP(Log, log)
194 _SPIRV_OP(Log2, log2)
195 _SPIRV_OP(Log10, log10)
196 _SPIRV_OP(Log1p, log1p)
197 _SPIRV_OP(Exp, exp)
198 _SPIRV_OP(Exp2, exp2)
199 _SPIRV_OP(Exp10, exp10)
200 _SPIRV_OP(Expm1, expm1)
201 _SPIRV_OP(Sin, sin)
202 _SPIRV_OP(Cos, cos)
203 _SPIRV_OP(SinCos, sincos)
204 _SPIRV_OP(SinPi, sinpi)
205 _SPIRV_OP(CosPi, cospi)
206 _SPIRV_OP(SinCosPi, sincospi)
207 _SPIRV_OP(ASin, asin)
208 _SPIRV_OP(ASinPi, asinpi)
209 _SPIRV_OP(ACos, acos)
210 _SPIRV_OP(ACosPi, acospi)
211 _SPIRV_OP(ATan, atan)
212 _SPIRV_OP(ATanPi, atanpi)
213 _SPIRV_OP(ATan2, atan2)
214 _SPIRV_OP(Pow, pow)
215 _SPIRV_OP(PowR, powr)
216 _SPIRV_OP(PowN, pown)
217 #undef _SPIRV_OP
218 const static char AsyncWorkGroupCopy[] = "async_work_group_copy";
219 const static char AsyncWorkGroupStridedCopy[] = "async_work_group_strided_copy";
220 const static char AtomPrefix[] = "atom_";
221 const static char AtomCmpXchg[] = "atom_cmpxchg";
222 const static char AtomicPrefix[] = "atomic_";
223 const static char AtomicCmpXchg[] = "atomic_cmpxchg";
224 const static char AtomicCmpXchgStrong[] = "atomic_compare_exchange_strong";
225 const static char AtomicCmpXchgStrongExplicit[] =
226     "atomic_compare_exchange_strong_explicit";
227 const static char AtomicCmpXchgWeak[] = "atomic_compare_exchange_weak";
228 const static char AtomicCmpXchgWeakExplicit[] =
229     "atomic_compare_exchange_weak_explicit";
230 const static char AtomicInit[] = "atomic_init";
231 const static char AtomicWorkItemFence[] = "atomic_work_item_fence";
232 const static char Barrier[] = "barrier";
233 const static char Clamp[] = "clamp";
234 const static char ConvertPrefix[] = "convert_";
235 const static char Dot[] = "dot";
236 const static char EnqueueKernel[] = "enqueue_kernel";
237 const static char FixedSqrtINTEL[] = "intel_arbitrary_fixed_sqrt";
238 const static char FixedRecipINTEL[] = "intel_arbitrary_fixed_recip";
239 const static char FixedRsqrtINTEL[] = "intel_arbitrary_fixed_rsqrt";
240 const static char FixedSinINTEL[] = "intel_arbitrary_fixed_sin";
241 const static char FixedCosINTEL[] = "intel_arbitrary_fixed_cos";
242 const static char FixedSinCosINTEL[] = "intel_arbitrary_fixed_sincos";
243 const static char FixedSinPiINTEL[] = "intel_arbitrary_fixed_sinpi";
244 const static char FixedCosPiINTEL[] = "intel_arbitrary_fixed_cospi";
245 const static char FixedSinCosPiINTEL[] = "intel_arbitrary_fixed_sincospi";
246 const static char FixedLogINTEL[] = "intel_arbitrary_fixed_log";
247 const static char FixedExpINTEL[] = "intel_arbitrary_fixed_exp";
248 const static char FMax[] = "fmax";
249 const static char FMin[] = "fmin";
250 const static char FPGARegIntel[] = "__builtin_intel_fpga_reg";
251 const static char GetFence[] = "get_fence";
252 const static char GetImageArraySize[] = "get_image_array_size";
253 const static char GetImageChannelOrder[] = "get_image_channel_order";
254 const static char GetImageChannelDataType[] = "get_image_channel_data_type";
255 const static char GetImageDepth[] = "get_image_depth";
256 const static char GetImageDim[] = "get_image_dim";
257 const static char GetImageHeight[] = "get_image_height";
258 const static char GetImageWidth[] = "get_image_width";
259 const static char IsFinite[] = "isfinite";
260 const static char IsNan[] = "isnan";
261 const static char IsNormal[] = "isnormal";
262 const static char IsInf[] = "isinf";
263 const static char Max[] = "max";
264 const static char MemFence[] = "mem_fence";
265 const static char ReadMemFence[] = "read_mem_fence";
266 const static char WriteMemFence[] = "write_mem_fence";
267 const static char Min[] = "min";
268 const static char Mix[] = "mix";
269 const static char NDRangePrefix[] = "ndrange_";
270 const static char Pipe[] = "pipe";
271 const static char ReadImage[] = "read_image";
272 const static char ReadPipe[] = "read_pipe";
273 const static char ReadPipeBlockingINTEL[] = "read_pipe_bl";
274 const static char RoundingPrefix[] = "_r";
275 const static char Sampled[] = "sampled_";
276 const static char SampledReadImage[] = "sampled_read_image";
277 const static char Signbit[] = "signbit";
278 const static char SmoothStep[] = "smoothstep";
279 const static char Step[] = "step";
280 const static char SubGroupPrefix[] = "sub_group_";
281 const static char SubGroupBarrier[] = "sub_group_barrier";
282 const static char SubPrefix[] = "sub_";
283 const static char ToGlobal[] = "to_global";
284 const static char ToLocal[] = "to_local";
285 const static char ToPrivate[] = "to_private";
286 const static char VLoadPrefix[] = "vload";
287 const static char VLoadAPrefix[] = "vloada";
288 const static char VLoadHalf[] = "vload_half";
289 const static char VStorePrefix[] = "vstore";
290 const static char VStoreAPrefix[] = "vstorea";
291 const static char WaitGroupEvent[] = "wait_group_events";
292 const static char WriteImage[] = "write_image";
293 const static char WorkGroupBarrier[] = "work_group_barrier";
294 const static char WritePipe[] = "write_pipe";
295 const static char WritePipeBlockingINTEL[] = "write_pipe_bl";
296 const static char WorkGroupPrefix[] = "work_group_";
297 const static char WorkGroupAll[] = "work_group_all";
298 const static char WorkGroupAny[] = "work_group_any";
299 const static char SubGroupAll[] = "sub_group_all";
300 const static char SubGroupAny[] = "sub_group_any";
301 const static char WorkPrefix[] = "work_";
302 const static char SubgroupBlockReadINTELPrefix[] = "intel_sub_group_block_read";
303 const static char SubgroupBlockWriteINTELPrefix[] =
304     "intel_sub_group_block_write";
305 const static char SubgroupImageMediaBlockINTELPrefix[] =
306     "intel_sub_group_media_block";
307 } // namespace kOCLBuiltinName
308 
309 /// Offset for OpenCL image channel order enumeration values.
310 const unsigned int OCLImageChannelOrderOffset = 0x10B0;
311 
312 /// Offset for OpenCL image channel data type enumeration values.
313 const unsigned int OCLImageChannelDataTypeOffset = 0x10D0;
314 
315 /// OCL 1.x atomic memory order when translated to 2.0 atomics.
316 const OCLMemOrderKind OCLLegacyAtomicMemOrder = OCLMO_relaxed;
317 
318 /// OCL 1.x atomic memory scope when translated to 2.0 atomics.
319 const OCLScopeKind OCLLegacyAtomicMemScope = OCLMS_work_group;
320 
321 namespace kOCLVer {
322 const unsigned CL12 = 102000;
323 const unsigned CL20 = 200000;
324 const unsigned CL21 = 201000;
325 const unsigned CL30 = 300000;
326 } // namespace kOCLVer
327 
328 namespace OclExt {
329 // clang-format off
330 enum Kind {
331 #define _SPIRV_OP(x) x,
332   _SPIRV_OP(cl_images)
333   _SPIRV_OP(cl_doubles)
334   _SPIRV_OP(cl_khr_int64_base_atomics)
335   _SPIRV_OP(cl_khr_int64_extended_atomics)
336   _SPIRV_OP(cl_khr_fp16)
337   _SPIRV_OP(cl_khr_gl_sharing)
338   _SPIRV_OP(cl_khr_gl_event)
339   _SPIRV_OP(cl_khr_d3d10_sharing)
340   _SPIRV_OP(cl_khr_media_sharing)
341   _SPIRV_OP(cl_khr_d3d11_sharing)
342   _SPIRV_OP(cl_khr_global_int32_base_atomics)
343   _SPIRV_OP(cl_khr_global_int32_extended_atomics)
344   _SPIRV_OP(cl_khr_local_int32_base_atomics)
345   _SPIRV_OP(cl_khr_local_int32_extended_atomics)
346   _SPIRV_OP(cl_khr_byte_addressable_store)
347   _SPIRV_OP(cl_khr_3d_image_writes)
348   _SPIRV_OP(cl_khr_gl_msaa_sharing)
349   _SPIRV_OP(cl_khr_depth_images)
350   _SPIRV_OP(cl_khr_gl_depth_images)
351   _SPIRV_OP(cl_khr_subgroups)
352   _SPIRV_OP(cl_khr_mipmap_image)
353   _SPIRV_OP(cl_khr_mipmap_image_writes)
354   _SPIRV_OP(cl_khr_egl_event)
355   _SPIRV_OP(cl_khr_srgb_image_writes)
356   _SPIRV_OP(cl_khr_extended_bit_ops)
357 #undef _SPIRV_OP
358 };
359 // clang-format on
360 } // namespace OclExt
361 namespace kOCLSubgroupsAVCIntel {
362 const static char Prefix[] = "intel_sub_group_avc_";
363 const static char MCEPrefix[] = "intel_sub_group_avc_mce_";
364 const static char IMEPrefix[] = "intel_sub_group_avc_ime_";
365 const static char REFPrefix[] = "intel_sub_group_avc_ref_";
366 const static char SICPrefix[] = "intel_sub_group_avc_sic_";
367 const static char TypePrefix[] = "opencl.intel_sub_group_avc_";
368 } // namespace kOCLSubgroupsAVCIntel
369 
370 ///////////////////////////////////////////////////////////////////////////////
371 //
372 // Functions
373 //
374 ///////////////////////////////////////////////////////////////////////////////
375 
376 /// Get instruction index for SPIR-V extended instruction for OpenCL.std
377 ///   extended instruction set.
378 /// \param MangledName The mangled name of OpenCL builtin function.
379 /// \param DemangledName The demangled name of OpenCL builtin function if
380 ///   not empty.
381 /// \return instruction index of extended instruction if the OpenCL builtin
382 ///   function is translated to an extended instruction, otherwise ~0U.
383 unsigned getExtOp(StringRef MangledName, StringRef DemangledName = "");
384 
385 /// Get literal arguments of call of atomic_work_item_fence.
386 AtomicWorkItemFenceLiterals getAtomicWorkItemFenceLiterals(CallInst *CI);
387 
388 /// Get literal arguments of call of work_group_barrier or sub_group_barrier.
389 BarrierLiterals getBarrierLiterals(CallInst *CI);
390 
391 /// Get number of memory order arguments for atomic builtin function.
392 size_t getAtomicBuiltinNumMemoryOrderArgs(StringRef Name);
393 
394 /// Get number of memory order arguments for spirv atomic builtin function.
395 size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC);
396 
397 /// Return true for OpenCL builtins which do compute operations
398 /// (like add, sub, min, max, inc, dec, ...) atomically
399 bool isComputeAtomicOCLBuiltin(StringRef DemangledName);
400 
401 /// Get OCL version from metadata opencl.ocl.version.
402 /// \param AllowMulti Allows multiple operands if true.
403 /// \return OCL version encoded as Major*10^5+Minor*10^3+Rev,
404 /// e.g. 201000 for OCL 2.1, 200000 for OCL 2.0, 102000 for OCL 1.2,
405 /// 0 if metadata not found.
406 /// If there are multiple operands, check they are identical.
407 unsigned getOCLVersion(Module *M, bool AllowMulti = false);
408 
409 /// Encode OpenCL version as Major*10^5+Minor*10^3+Rev.
410 unsigned encodeOCLVer(unsigned short Major, unsigned char Minor,
411                       unsigned char Rev);
412 
413 /// Decode OpenCL version which is encoded as Major*10^5+Minor*10^3+Rev
414 std::tuple<unsigned short, unsigned char, unsigned char>
415 decodeOCLVer(unsigned Ver);
416 
417 /// Decode a MDNode assuming it contains three integer constants.
418 void decodeMDNode(MDNode *N, unsigned &X, unsigned &Y, unsigned &Z);
419 
420 /// Get full path from debug info metadata
421 /// Return empty string if the path is not available.
getFullPath(const T * Scope)422 template <typename T> std::string getFullPath(const T *Scope) {
423   if (!Scope)
424     return std::string();
425   std::string Filename = Scope->getFilename().str();
426   if (sys::path::is_absolute(Filename))
427     return Filename;
428   SmallString<16> DirName = Scope->getDirectory();
429   sys::path::append(DirName, sys::path::Style::posix, Filename);
430   return DirName.str().str();
431 }
432 
433 /// Decode OpenCL vector type hint MDNode and encode it as SPIR-V execution
434 /// mode VecTypeHint.
435 unsigned transVecTypeHint(MDNode *Node);
436 
437 /// Decode SPIR-V encoding of vector type hint execution mode.
438 Type *decodeVecTypeHint(LLVMContext &C, unsigned Code);
439 
440 SPIRAddressSpace getOCLOpaqueTypeAddrSpace(Op OpCode);
441 SPIR::TypeAttributeEnum getOCLOpaqueTypeAddrSpace(SPIR::TypePrimitiveEnum Prim);
442 
mapOCLMemSemanticToSPIRV(unsigned MemFenceFlag,OCLMemOrderKind Order)443 inline unsigned mapOCLMemSemanticToSPIRV(unsigned MemFenceFlag,
444                                          OCLMemOrderKind Order) {
445   return OCLMemOrderMap::map(Order) | mapBitMask<OCLMemFenceMap>(MemFenceFlag);
446 }
447 
mapOCLMemFenceFlagToSPIRV(unsigned MemFenceFlag)448 inline unsigned mapOCLMemFenceFlagToSPIRV(unsigned MemFenceFlag) {
449   return mapBitMask<OCLMemFenceMap>(MemFenceFlag);
450 }
451 
452 inline std::pair<unsigned, OCLMemOrderKind>
mapSPIRVMemSemanticToOCL(unsigned Sema)453 mapSPIRVMemSemanticToOCL(unsigned Sema) {
454   return std::make_pair(
455       rmapBitMask<OCLMemFenceMap>(Sema),
456       OCLMemOrderMap::rmap(extractSPIRVMemOrderSemantic(Sema)));
457 }
458 
mapSPIRVMemOrderToOCL(unsigned Sema)459 inline OCLMemOrderKind mapSPIRVMemOrderToOCL(unsigned Sema) {
460   return OCLMemOrderMap::rmap(extractSPIRVMemOrderSemantic(Sema));
461 }
462 
463 /// Mutate call instruction to call OpenCL builtin function.
464 CallInst *mutateCallInstOCL(
465     Module *M, CallInst *CI,
466     std::function<std::string(CallInst *, std::vector<Value *> &)> ArgMutate,
467     AttributeList *Attrs = nullptr);
468 
469 /// Mutate call instruction to call OpenCL builtin function.
470 Instruction *mutateCallInstOCL(
471     Module *M, CallInst *CI,
472     std::function<std::string(CallInst *, std::vector<Value *> &, Type *&RetTy)>
473         ArgMutate,
474     std::function<Instruction *(CallInst *)> RetMutate,
475     AttributeList *Attrs = nullptr, bool TakeFuncName = false);
476 
477 /// Check if instruction is bitcast from spirv.ConstantSampler to spirv.Sampler
478 bool isSamplerInitializer(Instruction *Inst);
479 
480 /// Check if instruction is bitcast from spirv.ConstantPipeStorage
481 /// to spirv.PipeStorage
482 bool isPipeStorageInitializer(Instruction *Inst);
483 
484 /// Check (isSamplerInitializer || isPipeStorageInitializer)
485 bool isSpecialTypeInitializer(Instruction *Inst);
486 
487 bool isPipeOrAddressSpaceCastBI(const StringRef MangledName);
488 bool isEnqueueKernelBI(const StringRef MangledName);
489 bool isKernelQueryBI(const StringRef MangledName);
490 
491 /// Check that the type is the sampler_t
492 bool isSamplerTy(Type *Ty);
493 
494 // Checks if the binary operator is an unfused fmul + fadd instruction.
495 bool isUnfusedMulAdd(BinaryOperator *B);
496 
toString(const T * Object)497 template <typename T> std::string toString(const T *Object) {
498   std::string S;
499   llvm::raw_string_ostream RSOS(S);
500   Object->print(RSOS);
501   RSOS.flush();
502   return S;
503 }
504 
505 // Get data and vector size postfix for sugroup_block_{read|write} builtins
506 // as specified by cl_intel_subgroups* extensions.
507 // Scalar data assumed to be represented as vector of one element.
508 std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,
509                                              unsigned VectorNumElements);
510 
511 void insertImageNameAccessQualifier(SPIRVAccessQualifierKind Acc,
512                                     std::string &Name);
513 } // namespace OCLUtil
514 
515 using namespace OCLUtil;
516 namespace SPIRV {
517 
518 template <class KeyTy, class ValTy, class Identifier = void>
519 Instruction *
520 getOrCreateSwitchFunc(StringRef MapName, Value *V,
521                       const SPIRVMap<KeyTy, ValTy, Identifier> &Map,
522                       bool IsReverse, Optional<int> DefaultCase,
523                       Instruction *InsertPoint, int KeyMask = 0) {
524   static_assert(std::is_convertible<KeyTy, int>::value &&
525                     std::is_convertible<ValTy, int>::value,
526                 "Can map only integer values");
527   Type *Ty = V->getType();
528   assert(Ty && Ty->isIntegerTy() && "Can't map non-integer types");
529   Module *M = InsertPoint->getModule();
530   Function *F = getOrCreateFunction(M, Ty, Ty, MapName);
531   if (!F->empty()) // The switch function already exists. just call it.
532     return addCallInst(M, MapName, Ty, V, nullptr, InsertPoint);
533 
534   F->setLinkage(GlobalValue::PrivateLinkage);
535 
536   LLVMContext &Ctx = M->getContext();
537   BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
538   IRBuilder<> IRB(BB);
539   SwitchInst *SI;
540   F->arg_begin()->setName("key");
541   if (KeyMask) {
542     Value *MaskV = ConstantInt::get(Type::getInt32Ty(Ctx), KeyMask);
543     Value *NewKey = IRB.CreateAnd(MaskV, F->arg_begin());
544     NewKey->setName("key.masked");
545     SI = IRB.CreateSwitch(NewKey, BB);
546   } else {
547     SI = IRB.CreateSwitch(F->arg_begin(), BB);
548   }
549 
550   if (!DefaultCase) {
551     BasicBlock *DefaultBB = BasicBlock::Create(Ctx, "default", F);
552     IRBuilder<> DefaultIRB(DefaultBB);
553     DefaultIRB.CreateUnreachable();
554     SI->setDefaultDest(DefaultBB);
555   }
556 
557   Map.foreach ([&](int Key, int Val) {
558     if (IsReverse)
559       std::swap(Key, Val);
560     BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case." + Twine(Key), F);
561     IRBuilder<> CaseIRB(CaseBB);
562     CaseIRB.CreateRet(CaseIRB.getInt32(Val));
563     SI->addCase(IRB.getInt32(Key), CaseBB);
564     if (Key == DefaultCase)
565       SI->setDefaultDest(CaseBB);
566   });
567   assert(SI->getDefaultDest() != BB && "Invalid default destination in switch");
568   return addCallInst(M, MapName, Ty, V, nullptr, InsertPoint);
569 }
570 
571 /// Performs conversion from OpenCL memory_scope into SPIR-V Scope.
572 ///
573 /// Supports both constant and non-constant values. To handle the latter case,
574 /// function with switch..case statement will be inserted into module which
575 /// \arg InsertBefore belongs to (in order to perform mapping at runtime)
576 ///
577 /// \param [in] MemScope memory_scope value which needs to be translated
578 /// \param [in] DefaultCase default value for switch..case construct if
579 ///             dynamic mapping is used
580 /// \param [in] InsertBefore insertion point for call into conversion function
581 ///             which is generated if \arg MemScope is not a constant
582 /// \returns \c Value corresponding to SPIR-V Scope equivalent to OpenCL
583 ///          memory_scope passed in \arg MemScope
584 Value *transOCLMemScopeIntoSPIRVScope(Value *MemScope,
585                                       Optional<int> DefaultCase,
586                                       Instruction *InsertBefore);
587 
588 /// Performs conversion from OpenCL memory_order into SPIR-V Memory Semantics.
589 ///
590 /// Supports both constant and non-constant values. To handle the latter case,
591 /// function with switch..case statement will be inserted into module which
592 /// \arg InsertBefore belongs to (in order to perform mapping at runtime)
593 ///
594 /// \param [in] MemOrder memory_scope value which needs to be translated
595 /// \param [in] DefaultCase default value for switch..case construct if
596 ///             dynamic mapping is used
597 /// \param [in] InsertBefore insertion point for call into conversion function
598 ///             which is generated if \arg MemOrder is not a constant
599 /// \returns \c Value corresponding to SPIR-V Memory Semantics equivalent to
600 ///          OpenCL memory_order passed in \arg MemOrder
601 Value *transOCLMemOrderIntoSPIRVMemorySemantics(Value *MemOrder,
602                                                 Optional<int> DefaultCase,
603                                                 Instruction *InsertBefore);
604 
605 /// Performs conversion from SPIR-V Scope into OpenCL memory_scope.
606 ///
607 /// Supports both constant and non-constant values. To handle the latter case,
608 /// function with switch..case statement will be inserted into module which
609 /// \arg InsertBefore belongs to (in order to perform mapping at runtime)
610 ///
611 /// \param [in] MemScope Scope value which needs to be translated
612 /// \param [in] InsertBefore insertion point for call into conversion function
613 ///             which is generated if \arg MemScope is not a constant
614 /// \returns \c Value corresponding to  OpenCL memory_scope equivalent to SPIR-V
615 ///          Scope passed in \arg MemScope
616 Value *transSPIRVMemoryScopeIntoOCLMemoryScope(Value *MemScope,
617                                                Instruction *InsertBefore);
618 
619 /// Performs conversion from SPIR-V Memory Semantics into OpenCL memory_order.
620 ///
621 /// Supports both constant and non-constant values. To handle the latter case,
622 /// function with switch..case statement will be inserted into module which
623 /// \arg InsertBefore belongs to (in order to perform mapping at runtime)
624 ///
625 /// \param [in] MemorySemantics Memory Semantics value which needs to be
626 ///             translated
627 /// \param [in] InsertBefore insertion point for call into conversion function
628 ///             which is generated if \arg MemorySemantics is not a constant
629 /// \returns \c Value corresponding to  OpenCL memory_order equivalent to SPIR-V
630 ///          Memory Semantics passed in \arg MemorySemantics
631 Value *transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value *MemorySemantics,
632                                                    Instruction *InsertBefore);
633 
634 /// Performs conversion from SPIR-V Memory Semantics into OpenCL
635 /// mem_fence_flags.
636 ///
637 /// Supports both constant and non-constant values. To handle the latter case,
638 /// function with switch..case statement will be inserted into module which
639 /// \arg InsertBefore belongs to (in order to perform mapping at runtime)
640 ///
641 /// \param [in] MemorySemantics Memory Semantics value which needs to be
642 ///             translated
643 /// \param [in] InsertBefore insertion point for call into conversion function
644 ///             which is generated if \arg MemorySemantics is not a constant
645 /// \returns \c Value corresponding to  OpenCL mem_fence_flags equivalent to
646 ///          SPIR-V Memory Semantics passed in \arg MemorySemantics
647 Value *transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Value *MemorySemantics,
648                                                      Instruction *InsertBefore);
649 
650 class SPIRVSubgroupsAVCIntelInst;
651 typedef SPIRVMap<std::string, Op, SPIRVSubgroupsAVCIntelInst>
652     OCLSPIRVSubgroupAVCIntelBuiltinMap;
653 
654 typedef SPIRVMap<AtomicRMWInst::BinOp, Op> LLVMSPIRVAtomicRmwOpCodeMap;
655 
656 class SPIRVFixedPointIntelInst;
657 template <>
init()658 inline void SPIRVMap<std::string, Op, SPIRVFixedPointIntelInst>::init() {
659 #define _SPIRV_OP(x, y) add("intel_arbitrary_fixed_" #x, OpFixed##y##INTEL);
660   _SPIRV_OP(sqrt, Sqrt)
661   _SPIRV_OP(recip, Recip)
662   _SPIRV_OP(rsqrt, Rsqrt)
663   _SPIRV_OP(sin, Sin)
664   _SPIRV_OP(cos, Cos)
665   _SPIRV_OP(sincos, SinCos)
666   _SPIRV_OP(sinpi, SinPi)
667   _SPIRV_OP(cospi, CosPi)
668   _SPIRV_OP(sincospi, SinCosPi)
669   _SPIRV_OP(log, Log)
670   _SPIRV_OP(exp, Exp)
671 #undef _SPIRV_OP
672 }
673 typedef SPIRVMap<std::string, Op, SPIRVFixedPointIntelInst>
674     SPIRVFixedPointIntelMap;
675 
676 class SPIRVArbFloatIntelInst;
677 template <>
init()678 inline void SPIRVMap<std::string, Op, SPIRVArbFloatIntelInst>::init() {
679 #define _SPIRV_OP(x, y)                                                        \
680   add("intel_arbitrary_float_" #y, OpArbitraryFloat##x##INTEL);
681   _SPIRV_OP(Cast, cast)
682   _SPIRV_OP(CastFromInt, cast_from_int)
683   _SPIRV_OP(CastToInt, cast_to_int)
684   _SPIRV_OP(Add, add)
685   _SPIRV_OP(Sub, sub)
686   _SPIRV_OP(Mul, mul)
687   _SPIRV_OP(Div, div)
688   _SPIRV_OP(GT, gt)
689   _SPIRV_OP(GE, ge)
690   _SPIRV_OP(LT, lt)
691   _SPIRV_OP(LE, le)
692   _SPIRV_OP(EQ, eq)
693   _SPIRV_OP(Recip, recip)
694   _SPIRV_OP(RSqrt, rsqrt)
695   _SPIRV_OP(Cbrt, cbrt)
696   _SPIRV_OP(Hypot, hypot)
697   _SPIRV_OP(Sqrt, sqrt)
698   _SPIRV_OP(Log, log)
699   _SPIRV_OP(Log2, log2)
700   _SPIRV_OP(Log10, log10)
701   _SPIRV_OP(Log1p, log1p)
702   _SPIRV_OP(Exp, exp)
703   _SPIRV_OP(Exp2, exp2)
704   _SPIRV_OP(Exp10, exp10)
705   _SPIRV_OP(Expm1, expm1)
706   _SPIRV_OP(Sin, sin)
707   _SPIRV_OP(Cos, cos)
708   _SPIRV_OP(SinCos, sincos)
709   _SPIRV_OP(SinPi, sinpi)
710   _SPIRV_OP(CosPi, cospi)
711   _SPIRV_OP(SinCosPi, sincospi)
712   _SPIRV_OP(ASin, asin)
713   _SPIRV_OP(ASinPi, asinpi)
714   _SPIRV_OP(ACos, acos)
715   _SPIRV_OP(ACosPi, acospi)
716   _SPIRV_OP(ATan, atan)
717   _SPIRV_OP(ATanPi, atanpi)
718   _SPIRV_OP(ATan2, atan2)
719   _SPIRV_OP(Pow, pow)
720   _SPIRV_OP(PowR, powr)
721   _SPIRV_OP(PowN, pown)
722 #undef _SPIRV_OP
723 }
724 typedef SPIRVMap<std::string, Op, SPIRVArbFloatIntelInst> SPIRVArbFloatIntelMap;
725 
726 } // namespace SPIRV
727 
728 #endif // SPIRV_OCLUTIL_H
729