1 //===- OCLUtil.cpp - OCL Utilities ----------------------------------------===//
2 //
3 //                     The LLVM/SPIRV Translator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 // Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved.
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal with the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
16 //
17 // Redistributions of source code must retain the above copyright notice,
18 // this list of conditions and the following disclaimers.
19 // Redistributions in binary form must reproduce the above copyright notice,
20 // this list of conditions and the following disclaimers in the documentation
21 // and/or other materials provided with the distribution.
22 // Neither the names of Advanced Micro Devices, Inc., nor the names of its
23 // contributors may be used to endorse or promote products derived from this
24 // Software without specific prior written permission.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
31 // THE SOFTWARE.
32 //
33 //===----------------------------------------------------------------------===//
34 //
35 // This file implements OCL utility functions.
36 //
37 //===----------------------------------------------------------------------===//
38 #define DEBUG_TYPE "oclutil"
39 
40 #include "OCLUtil.h"
41 #include "SPIRVEntry.h"
42 #include "SPIRVFunction.h"
43 #include "SPIRVInstruction.h"
44 #include "SPIRVInternal.h"
45 #include "llvm/ADT/StringSwitch.h"
46 #include "llvm/IR/IRBuilder.h"
47 #include "llvm/IR/InstVisitor.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 
53 using namespace llvm;
54 using namespace SPIRV;
55 
56 namespace OCLUtil {
57 
58 #ifndef SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
59 #define SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE SPIRAS_Private
60 #endif
61 
62 #ifndef SPIRV_QUEUE_T_ADDR_SPACE
63 #define SPIRV_QUEUE_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
64 #endif
65 
66 #ifndef SPIRV_EVENT_T_ADDR_SPACE
67 #define SPIRV_EVENT_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
68 #endif
69 
70 #ifndef SPIRV_AVC_INTEL_T_ADDR_SPACE
71 #define SPIRV_AVC_INTEL_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
72 #endif
73 
74 #ifndef SPIRV_CLK_EVENT_T_ADDR_SPACE
75 #define SPIRV_CLK_EVENT_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
76 #endif
77 
78 #ifndef SPIRV_SAMPLER_T_ADDR_SPACE
79 #define SPIRV_SAMPLER_T_ADDR_SPACE SPIRAS_Constant
80 #endif
81 
82 #ifndef SPIRV_RESERVE_ID_T_ADDR_SPACE
83 #define SPIRV_RESERVE_ID_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
84 #endif
85 // Excerpt from SPIR 2.0 spec.:
86 //   Pipe objects are represented using pointers to the opaque %opencl.pipe LLVM
87 //   structure type which reside in the global address space.
88 #ifndef SPIRV_PIPE_ADDR_SPACE
89 #define SPIRV_PIPE_ADDR_SPACE SPIRAS_Global
90 #endif
91 // Excerpt from SPIR 2.0 spec.:
92 //   Note: Images data types reside in global memory and hence should be marked
93 //   as such in the "kernel arg addr space" metadata.
94 #ifndef SPIRV_IMAGE_ADDR_SPACE
95 #define SPIRV_IMAGE_ADDR_SPACE SPIRAS_Global
96 #endif
97 
98 } // namespace OCLUtil
99 
100 ///////////////////////////////////////////////////////////////////////////////
101 //
102 // Map definitions
103 //
104 ///////////////////////////////////////////////////////////////////////////////
105 
106 using namespace OCLUtil;
107 namespace SPIRV {
108 
init()109 template <> void SPIRVMap<OCLMemFenceKind, MemorySemanticsMask>::init() {
110   add(OCLMF_Local, MemorySemanticsWorkgroupMemoryMask);
111   add(OCLMF_Global, MemorySemanticsCrossWorkgroupMemoryMask);
112   add(OCLMF_Image, MemorySemanticsImageMemoryMask);
113 }
114 
115 template <>
init()116 void SPIRVMap<OCLMemFenceExtendedKind, MemorySemanticsMask>::init() {
117   add(OCLMFEx_Local, MemorySemanticsWorkgroupMemoryMask);
118   add(OCLMFEx_Global, MemorySemanticsCrossWorkgroupMemoryMask);
119   add(OCLMFEx_Local_Global, MemorySemanticsWorkgroupMemoryMask |
120                                 MemorySemanticsCrossWorkgroupMemoryMask);
121   add(OCLMFEx_Image, MemorySemanticsImageMemoryMask);
122   add(OCLMFEx_Image_Local,
123       MemorySemanticsWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
124   add(OCLMFEx_Image_Global,
125       MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
126   add(OCLMFEx_Image_Local_Global, MemorySemanticsWorkgroupMemoryMask |
127                                       MemorySemanticsCrossWorkgroupMemoryMask |
128                                       MemorySemanticsImageMemoryMask);
129 }
130 
131 template <>
init()132 void SPIRVMap<OCLMemOrderKind, unsigned, MemorySemanticsMask>::init() {
133   add(OCLMO_relaxed, MemorySemanticsMaskNone);
134   add(OCLMO_acquire, MemorySemanticsAcquireMask);
135   add(OCLMO_release, MemorySemanticsReleaseMask);
136   add(OCLMO_acq_rel, MemorySemanticsAcquireReleaseMask);
137   add(OCLMO_seq_cst, MemorySemanticsSequentiallyConsistentMask);
138 }
139 
init()140 template <> void SPIRVMap<OCLScopeKind, Scope>::init() {
141   add(OCLMS_work_item, ScopeInvocation);
142   add(OCLMS_work_group, ScopeWorkgroup);
143   add(OCLMS_device, ScopeDevice);
144   add(OCLMS_all_svm_devices, ScopeCrossDevice);
145   add(OCLMS_sub_group, ScopeSubgroup);
146 }
147 
init()148 template <> void SPIRVMap<std::string, SPIRVGroupOperationKind>::init() {
149   add("reduce", GroupOperationReduce);
150   add("scan_inclusive", GroupOperationInclusiveScan);
151   add("scan_exclusive", GroupOperationExclusiveScan);
152   add("ballot_bit_count", GroupOperationReduce);
153   add("ballot_inclusive_scan", GroupOperationInclusiveScan);
154   add("ballot_exclusive_scan", GroupOperationExclusiveScan);
155   add("non_uniform_reduce", GroupOperationReduce);
156   add("non_uniform_scan_inclusive", GroupOperationInclusiveScan);
157   add("non_uniform_scan_exclusive", GroupOperationExclusiveScan);
158   add("non_uniform_reduce_logical", GroupOperationReduce);
159   add("non_uniform_scan_inclusive_logical", GroupOperationInclusiveScan);
160   add("non_uniform_scan_exclusive_logical", GroupOperationExclusiveScan);
161   add("clustered_reduce", GroupOperationClusteredReduce);
162 }
163 
init()164 template <> void SPIRVMap<std::string, SPIRVFPRoundingModeKind>::init() {
165   add("rte", FPRoundingModeRTE);
166   add("rtz", FPRoundingModeRTZ);
167   add("rtp", FPRoundingModeRTP);
168   add("rtn", FPRoundingModeRTN);
169 }
170 
init()171 template <> void SPIRVMap<OclExt::Kind, std::string>::init() {
172 #define _SPIRV_OP(x) add(OclExt::x, #x);
173   _SPIRV_OP(cl_images)
174   _SPIRV_OP(cl_doubles)
175   _SPIRV_OP(cl_khr_int64_base_atomics)
176   _SPIRV_OP(cl_khr_int64_extended_atomics)
177   _SPIRV_OP(cl_khr_fp16)
178   _SPIRV_OP(cl_khr_gl_sharing)
179   _SPIRV_OP(cl_khr_gl_event)
180   _SPIRV_OP(cl_khr_d3d10_sharing)
181   _SPIRV_OP(cl_khr_media_sharing)
182   _SPIRV_OP(cl_khr_d3d11_sharing)
183   _SPIRV_OP(cl_khr_global_int32_base_atomics)
184   _SPIRV_OP(cl_khr_global_int32_extended_atomics)
185   _SPIRV_OP(cl_khr_local_int32_base_atomics)
186   _SPIRV_OP(cl_khr_local_int32_extended_atomics)
187   _SPIRV_OP(cl_khr_byte_addressable_store)
188   _SPIRV_OP(cl_khr_3d_image_writes)
189   _SPIRV_OP(cl_khr_gl_msaa_sharing)
190   _SPIRV_OP(cl_khr_depth_images)
191   _SPIRV_OP(cl_khr_gl_depth_images)
192   _SPIRV_OP(cl_khr_subgroups)
193   _SPIRV_OP(cl_khr_mipmap_image)
194   _SPIRV_OP(cl_khr_mipmap_image_writes)
195   _SPIRV_OP(cl_khr_egl_event)
196   _SPIRV_OP(cl_khr_srgb_image_writes)
197   _SPIRV_OP(cl_khr_extended_bit_ops)
198 #undef _SPIRV_OP
199 }
200 
init()201 template <> void SPIRVMap<OclExt::Kind, SPIRVCapabilityKind>::init() {
202   add(OclExt::cl_images, CapabilityImageBasic);
203   add(OclExt::cl_doubles, CapabilityFloat64);
204   add(OclExt::cl_khr_int64_base_atomics, CapabilityInt64Atomics);
205   add(OclExt::cl_khr_int64_extended_atomics, CapabilityInt64Atomics);
206   add(OclExt::cl_khr_fp16, CapabilityFloat16);
207   add(OclExt::cl_khr_subgroups, CapabilityGroups);
208   add(OclExt::cl_khr_mipmap_image, CapabilityImageMipmap);
209   add(OclExt::cl_khr_mipmap_image_writes, CapabilityImageMipmap);
210   add(OclExt::cl_khr_extended_bit_ops, CapabilityBitInstructions);
211 }
212 
213 /// Map OpenCL work functions to SPIR-V builtin variables.
init()214 template <> void SPIRVMap<std::string, SPIRVBuiltinVariableKind>::init() {
215   add("get_work_dim", BuiltInWorkDim);
216   add("get_global_size", BuiltInGlobalSize);
217   add("get_global_id", BuiltInGlobalInvocationId);
218   add("get_global_offset", BuiltInGlobalOffset);
219   add("get_local_size", BuiltInWorkgroupSize);
220   add("get_enqueued_local_size", BuiltInEnqueuedWorkgroupSize);
221   add("get_local_id", BuiltInLocalInvocationId);
222   add("get_num_groups", BuiltInNumWorkgroups);
223   add("get_group_id", BuiltInWorkgroupId);
224   add("get_global_linear_id", BuiltInGlobalLinearId);
225   add("get_local_linear_id", BuiltInLocalInvocationIndex);
226   // cl_khr_subgroups
227   add("get_sub_group_size", BuiltInSubgroupSize);
228   add("get_max_sub_group_size", BuiltInSubgroupMaxSize);
229   add("get_num_sub_groups", BuiltInNumSubgroups);
230   add("get_enqueued_num_sub_groups", BuiltInNumEnqueuedSubgroups);
231   add("get_sub_group_id", BuiltInSubgroupId);
232   add("get_sub_group_local_id", BuiltInSubgroupLocalInvocationId);
233   // cl_khr_subgroup_ballot
234   add("get_sub_group_eq_mask", BuiltInSubgroupEqMask);
235   add("get_sub_group_ge_mask", BuiltInSubgroupGeMask);
236   add("get_sub_group_gt_mask", BuiltInSubgroupGtMask);
237   add("get_sub_group_le_mask", BuiltInSubgroupLeMask);
238   add("get_sub_group_lt_mask", BuiltInSubgroupLtMask);
239 }
240 
241 // Maps uniqued OCL builtin function name to SPIR-V op code.
242 // A uniqued OCL builtin function name may be different from the real
243 // OCL builtin function name. e.g. instead of atomic_min, atomic_umin
244 // is used for atomic_min with unsigned integer parameter.
245 // work_group_ and sub_group_ functions are unified as group_ functions
246 // except work_group_barrier.
247 class SPIRVInstruction;
init()248 template <> void SPIRVMap<std::string, Op, SPIRVInstruction>::init() {
249 #define _SPIRV_OP(x, y) add("atom_" #x, OpAtomic##y);
250   // cl_khr_int64_base_atomics builtins
251   _SPIRV_OP(add, IAdd)
252   _SPIRV_OP(sub, ISub)
253   _SPIRV_OP(xchg, Exchange)
254   _SPIRV_OP(dec, IDecrement)
255   _SPIRV_OP(inc, IIncrement)
256   _SPIRV_OP(cmpxchg, CompareExchange)
257   // cl_khr_int64_extended_atomics builtins
258   _SPIRV_OP(min, SMin)
259   _SPIRV_OP(max, SMax)
260   _SPIRV_OP(and, And)
261   _SPIRV_OP(or, Or)
262   _SPIRV_OP(xor, Xor)
263 #undef _SPIRV_OP
264 #define _SPIRV_OP(x, y) add("atomic_" #x, Op##y);
265   // CL 2.0 atomic builtins
266   _SPIRV_OP(flag_test_and_set_explicit, AtomicFlagTestAndSet)
267   _SPIRV_OP(flag_clear_explicit, AtomicFlagClear)
268   _SPIRV_OP(load_explicit, AtomicLoad)
269   _SPIRV_OP(store_explicit, AtomicStore)
270   _SPIRV_OP(exchange_explicit, AtomicExchange)
271   _SPIRV_OP(compare_exchange_strong_explicit, AtomicCompareExchange)
272   _SPIRV_OP(compare_exchange_weak_explicit, AtomicCompareExchangeWeak)
273   _SPIRV_OP(inc, AtomicIIncrement)
274   _SPIRV_OP(dec, AtomicIDecrement)
275   _SPIRV_OP(fetch_add_explicit, AtomicIAdd)
276   _SPIRV_OP(fetch_sub_explicit, AtomicISub)
277   _SPIRV_OP(fetch_umin_explicit, AtomicUMin)
278   _SPIRV_OP(fetch_umax_explicit, AtomicUMax)
279   _SPIRV_OP(fetch_min_explicit, AtomicSMin)
280   _SPIRV_OP(fetch_max_explicit, AtomicSMax)
281   _SPIRV_OP(fetch_and_explicit, AtomicAnd)
282   _SPIRV_OP(fetch_or_explicit, AtomicOr)
283   _SPIRV_OP(fetch_xor_explicit, AtomicXor)
284 #undef _SPIRV_OP
285 #define _SPIRV_OP(x, y) add(#x, Op##y);
286   _SPIRV_OP(dot, Dot)
287   _SPIRV_OP(async_work_group_copy, GroupAsyncCopy)
288   _SPIRV_OP(async_work_group_strided_copy, GroupAsyncCopy)
289   _SPIRV_OP(wait_group_events, GroupWaitEvents)
290   _SPIRV_OP(isequal, FOrdEqual)
291   _SPIRV_OP(isnotequal, FUnordNotEqual)
292   _SPIRV_OP(isgreater, FOrdGreaterThan)
293   _SPIRV_OP(isgreaterequal, FOrdGreaterThanEqual)
294   _SPIRV_OP(isless, FOrdLessThan)
295   _SPIRV_OP(islessequal, FOrdLessThanEqual)
296   _SPIRV_OP(islessgreater, LessOrGreater)
297   _SPIRV_OP(isordered, Ordered)
298   _SPIRV_OP(isunordered, Unordered)
299   _SPIRV_OP(isfinite, IsFinite)
300   _SPIRV_OP(isinf, IsInf)
301   _SPIRV_OP(isnan, IsNan)
302   _SPIRV_OP(isnormal, IsNormal)
303   _SPIRV_OP(signbit, SignBitSet)
304   _SPIRV_OP(any, Any)
305   _SPIRV_OP(all, All)
306   _SPIRV_OP(popcount, BitCount)
307   _SPIRV_OP(get_fence, GenericPtrMemSemantics)
308   // CL 2.0 kernel enqueue builtins
309   _SPIRV_OP(enqueue_marker, EnqueueMarker)
310   _SPIRV_OP(enqueue_kernel, EnqueueKernel)
311   _SPIRV_OP(get_kernel_sub_group_count_for_ndrange_impl,
312             GetKernelNDrangeSubGroupCount)
313   _SPIRV_OP(get_kernel_max_sub_group_size_for_ndrange_impl,
314             GetKernelNDrangeMaxSubGroupSize)
315   _SPIRV_OP(get_kernel_work_group_size_impl, GetKernelWorkGroupSize)
316   _SPIRV_OP(get_kernel_preferred_work_group_size_multiple_impl,
317             GetKernelPreferredWorkGroupSizeMultiple)
318   _SPIRV_OP(retain_event, RetainEvent)
319   _SPIRV_OP(release_event, ReleaseEvent)
320   _SPIRV_OP(create_user_event, CreateUserEvent)
321   _SPIRV_OP(is_valid_event, IsValidEvent)
322   _SPIRV_OP(set_user_event_status, SetUserEventStatus)
323   _SPIRV_OP(capture_event_profiling_info, CaptureEventProfilingInfo)
324   _SPIRV_OP(get_default_queue, GetDefaultQueue)
325   _SPIRV_OP(ndrange_1D, BuildNDRange)
326   _SPIRV_OP(ndrange_2D, BuildNDRange)
327   _SPIRV_OP(ndrange_3D, BuildNDRange)
328   // Generic Address Space Casts
329   _SPIRV_OP(to_global, GenericCastToPtrExplicit)
330   _SPIRV_OP(to_local, GenericCastToPtrExplicit)
331   _SPIRV_OP(to_private, GenericCastToPtrExplicit)
332   // CL 2.0 pipe builtins
333   _SPIRV_OP(read_pipe_2, ReadPipe)
334   _SPIRV_OP(write_pipe_2, WritePipe)
335   _SPIRV_OP(read_pipe_2_bl, ReadPipeBlockingINTEL)
336   _SPIRV_OP(write_pipe_2_bl, WritePipeBlockingINTEL)
337   _SPIRV_OP(read_pipe_4, ReservedReadPipe)
338   _SPIRV_OP(write_pipe_4, ReservedWritePipe)
339   _SPIRV_OP(reserve_read_pipe, ReserveReadPipePackets)
340   _SPIRV_OP(reserve_write_pipe, ReserveWritePipePackets)
341   _SPIRV_OP(commit_read_pipe, CommitReadPipe)
342   _SPIRV_OP(commit_write_pipe, CommitWritePipe)
343   _SPIRV_OP(is_valid_reserve_id, IsValidReserveId)
344   _SPIRV_OP(group_reserve_read_pipe, GroupReserveReadPipePackets)
345   _SPIRV_OP(group_reserve_write_pipe, GroupReserveWritePipePackets)
346   _SPIRV_OP(group_commit_read_pipe, GroupCommitReadPipe)
347   _SPIRV_OP(group_commit_write_pipe, GroupCommitWritePipe)
348   _SPIRV_OP(get_pipe_num_packets_ro, GetNumPipePackets)
349   _SPIRV_OP(get_pipe_num_packets_wo, GetNumPipePackets)
350   _SPIRV_OP(get_pipe_max_packets_ro, GetMaxPipePackets)
351   _SPIRV_OP(get_pipe_max_packets_wo, GetMaxPipePackets)
352   // CL 2.0 workgroup builtins
353   _SPIRV_OP(group_all, GroupAll)
354   _SPIRV_OP(group_any, GroupAny)
355   _SPIRV_OP(group_broadcast, GroupBroadcast)
356   _SPIRV_OP(group_iadd, GroupIAdd)
357   _SPIRV_OP(group_fadd, GroupFAdd)
358   _SPIRV_OP(group_fmin, GroupFMin)
359   _SPIRV_OP(group_umin, GroupUMin)
360   _SPIRV_OP(group_smin, GroupSMin)
361   _SPIRV_OP(group_fmax, GroupFMax)
362   _SPIRV_OP(group_umax, GroupUMax)
363   _SPIRV_OP(group_smax, GroupSMax)
364   // CL image builtins
365   _SPIRV_OP(SampledImage, SampledImage)
366   _SPIRV_OP(ImageSampleExplicitLod, ImageSampleExplicitLod)
367   _SPIRV_OP(read_image, ImageRead)
368   _SPIRV_OP(write_image, ImageWrite)
369   _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat)
370   _SPIRV_OP(get_image_channel_order, ImageQueryOrder)
371   _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels)
372   _SPIRV_OP(get_image_num_samples, ImageQuerySamples)
373   // Intel Subgroups builtins
374   _SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL)
375   _SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL)
376   _SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL)
377   _SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL)
378   // Intel media_block_io builtins
379   _SPIRV_OP(intel_sub_group_media_block_read, SubgroupImageMediaBlockReadINTEL)
380   _SPIRV_OP(intel_sub_group_media_block_write,
381             SubgroupImageMediaBlockWriteINTEL)
382   // cl_khr_subgroup_non_uniform_vote
383   _SPIRV_OP(group_elect, GroupNonUniformElect)
384   _SPIRV_OP(group_non_uniform_all, GroupNonUniformAll)
385   _SPIRV_OP(group_non_uniform_any, GroupNonUniformAny)
386   _SPIRV_OP(group_non_uniform_all_equal, GroupNonUniformAllEqual)
387   // cl_khr_subgroup_ballot
388   _SPIRV_OP(group_non_uniform_broadcast, GroupNonUniformBroadcast)
389   _SPIRV_OP(group_broadcast_first, GroupNonUniformBroadcastFirst)
390   _SPIRV_OP(group_ballot, GroupNonUniformBallot)
391   _SPIRV_OP(group_inverse_ballot, GroupNonUniformInverseBallot)
392   _SPIRV_OP(group_ballot_bit_extract, GroupNonUniformBallotBitExtract)
393   _SPIRV_OP(group_ballot_bit_count_iadd, GroupNonUniformBallotBitCount)
394   _SPIRV_OP(group_ballot_find_lsb, GroupNonUniformBallotFindLSB)
395   _SPIRV_OP(group_ballot_find_msb, GroupNonUniformBallotFindMSB)
396   // cl_khr_subgroup_non_uniform_arithmetic
397   _SPIRV_OP(group_non_uniform_iadd, GroupNonUniformIAdd)
398   _SPIRV_OP(group_non_uniform_fadd, GroupNonUniformFAdd)
399   _SPIRV_OP(group_non_uniform_imul, GroupNonUniformIMul)
400   _SPIRV_OP(group_non_uniform_fmul, GroupNonUniformFMul)
401   _SPIRV_OP(group_non_uniform_smin, GroupNonUniformSMin)
402   _SPIRV_OP(group_non_uniform_umin, GroupNonUniformUMin)
403   _SPIRV_OP(group_non_uniform_fmin, GroupNonUniformFMin)
404   _SPIRV_OP(group_non_uniform_smax, GroupNonUniformSMax)
405   _SPIRV_OP(group_non_uniform_umax, GroupNonUniformUMax)
406   _SPIRV_OP(group_non_uniform_fmax, GroupNonUniformFMax)
407   _SPIRV_OP(group_non_uniform_iand, GroupNonUniformBitwiseAnd)
408   _SPIRV_OP(group_non_uniform_ior, GroupNonUniformBitwiseOr)
409   _SPIRV_OP(group_non_uniform_ixor, GroupNonUniformBitwiseXor)
410   _SPIRV_OP(group_non_uniform_logical_iand, GroupNonUniformLogicalAnd)
411   _SPIRV_OP(group_non_uniform_logical_ior, GroupNonUniformLogicalOr)
412   _SPIRV_OP(group_non_uniform_logical_ixor, GroupNonUniformLogicalXor)
413   // cl_khr_subgroup_shuffle
414   _SPIRV_OP(group_shuffle, GroupNonUniformShuffle)
415   _SPIRV_OP(group_shuffle_xor, GroupNonUniformShuffleXor)
416   // cl_khr_subgroup_shuffle_relative
417   _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp)
418   _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown)
419   // cl_khr_extended_bit_ops
420   _SPIRV_OP(bitfield_insert, BitFieldInsert)
421   _SPIRV_OP(bitfield_extract_signed, BitFieldSExtract)
422   _SPIRV_OP(bitfield_extract_unsigned, BitFieldUExtract)
423   _SPIRV_OP(bit_reverse, BitReverse)
424 #undef _SPIRV_OP
425 }
426 
init()427 template <> void SPIRVMap<std::string, Op, OCL12Builtin>::init() {
428 #define _SPIRV_OP(x, y) add(#x, Op##y);
429   _SPIRV_OP(add, AtomicIAdd)
430   _SPIRV_OP(sub, AtomicISub)
431   _SPIRV_OP(xchg, AtomicExchange)
432   _SPIRV_OP(cmpxchg, AtomicCompareExchange)
433   _SPIRV_OP(inc, AtomicIIncrement)
434   _SPIRV_OP(dec, AtomicIDecrement)
435   _SPIRV_OP(min, AtomicSMin)
436   _SPIRV_OP(max, AtomicSMax)
437   _SPIRV_OP(umin, AtomicUMin)
438   _SPIRV_OP(umax, AtomicUMax)
439   _SPIRV_OP(and, AtomicAnd)
440   _SPIRV_OP(or, AtomicOr)
441   _SPIRV_OP(xor, AtomicXor)
442 #undef _SPIRV_OP
443 }
444 
445 // SPV_INTEL_device_side_avc_motion_estimation extension builtins
446 class SPIRVSubgroupsAVCIntelInst;
init()447 template <> void SPIRVMap<std::string, Op, SPIRVSubgroupsAVCIntelInst>::init() {
448   // Here is a workaround for a bug in the specification:
449   // 'avc' missed in 'intel_sub_group_avc' prefix.
450   add("intel_sub_group_ime_ref_window_size",
451       OpSubgroupAvcImeRefWindowSizeINTEL);
452 
453 #define _SPIRV_OP(x, y) add("intel_sub_group_avc_" #x, OpSubgroupAvc##y##INTEL);
454   // Initialization phase functions
455   _SPIRV_OP(ime_initialize, ImeInitialize)
456   _SPIRV_OP(fme_initialize, FmeInitialize)
457   _SPIRV_OP(bme_initialize, BmeInitialize)
458   _SPIRV_OP(sic_initialize, SicInitialize)
459 
460   // Result and payload types conversion functions
461   _SPIRV_OP(mce_convert_to_ime_payload, MceConvertToImePayload)
462   _SPIRV_OP(mce_convert_to_ime_result, MceConvertToImeResult)
463   _SPIRV_OP(mce_convert_to_ref_payload, MceConvertToRefPayload)
464   _SPIRV_OP(mce_convert_to_ref_result, MceConvertToRefResult)
465   _SPIRV_OP(mce_convert_to_sic_payload, MceConvertToSicPayload)
466   _SPIRV_OP(mce_convert_to_sic_result, MceConvertToSicResult)
467   _SPIRV_OP(ime_convert_to_mce_payload, ImeConvertToMcePayload)
468   _SPIRV_OP(ime_convert_to_mce_result, ImeConvertToMceResult)
469   _SPIRV_OP(ref_convert_to_mce_payload, RefConvertToMcePayload)
470   _SPIRV_OP(ref_convert_to_mce_result, RefConvertToMceResult)
471   _SPIRV_OP(sic_convert_to_mce_payload, SicConvertToMcePayload)
472   _SPIRV_OP(sic_convert_to_mce_result, SicConvertToMceResult)
473 #undef _SPIRV_OP
474 
475 // MCE instructions
476 #define _SPIRV_OP(x, y)                                                        \
477   add("intel_sub_group_avc_mce_" #x, OpSubgroupAvcMce##y##INTEL);
478   _SPIRV_OP(get_default_inter_base_multi_reference_penalty,
479             GetDefaultInterBaseMultiReferencePenalty)
480   _SPIRV_OP(set_inter_base_multi_reference_penalty,
481             SetInterBaseMultiReferencePenalty)
482   _SPIRV_OP(get_default_inter_shape_penalty, GetDefaultInterShapePenalty)
483   _SPIRV_OP(set_inter_shape_penalty, SetInterShapePenalty)
484   _SPIRV_OP(get_default_inter_direction_penalty,
485             GetDefaultInterDirectionPenalty)
486   _SPIRV_OP(set_inter_direction_penalty, SetInterDirectionPenalty)
487   _SPIRV_OP(get_default_intra_luma_shape_penalty,
488             GetDefaultIntraLumaShapePenalty)
489   _SPIRV_OP(get_default_inter_motion_vector_cost_table,
490             GetDefaultInterMotionVectorCostTable)
491   _SPIRV_OP(get_default_high_penalty_cost_table, GetDefaultHighPenaltyCostTable)
492   _SPIRV_OP(get_default_medium_penalty_cost_table,
493             GetDefaultMediumPenaltyCostTable)
494   _SPIRV_OP(get_default_low_penalty_cost_table, GetDefaultLowPenaltyCostTable)
495   _SPIRV_OP(set_motion_vector_cost_function, SetMotionVectorCostFunction)
496   _SPIRV_OP(get_default_intra_luma_mode_penalty, GetDefaultIntraLumaModePenalty)
497   _SPIRV_OP(get_default_non_dc_luma_intra_penalty,
498             GetDefaultNonDcLumaIntraPenalty)
499   _SPIRV_OP(get_default_intra_chroma_mode_base_penalty,
500             GetDefaultIntraChromaModeBasePenalty)
501   _SPIRV_OP(set_ac_only_haar, SetAcOnlyHaar)
502   _SPIRV_OP(set_source_interlaced_field_polarity,
503             SetSourceInterlacedFieldPolarity)
504   _SPIRV_OP(set_single_reference_interlaced_field_polarity,
505             SetSingleReferenceInterlacedFieldPolarity)
506   _SPIRV_OP(set_dual_reference_interlaced_field_polarities,
507             SetDualReferenceInterlacedFieldPolarities)
508   _SPIRV_OP(get_motion_vectors, GetMotionVectors)
509   _SPIRV_OP(get_inter_distortions, GetInterDistortions)
510   _SPIRV_OP(get_best_inter_distortion, GetBestInterDistortions)
511   _SPIRV_OP(get_inter_major_shape, GetInterMajorShape)
512   _SPIRV_OP(get_inter_minor_shapes, GetInterMinorShape)
513   _SPIRV_OP(get_inter_directions, GetInterDirections)
514   _SPIRV_OP(get_inter_motion_vector_count, GetInterMotionVectorCount)
515   _SPIRV_OP(get_inter_reference_ids, GetInterReferenceIds)
516   _SPIRV_OP(get_inter_reference_interlaced_field_polarities,
517             GetInterReferenceInterlacedFieldPolarities)
518 #undef _SPIRV_OP
519 
520 // IME instructions
521 #define _SPIRV_OP(x, y)                                                        \
522   add("intel_sub_group_avc_ime_" #x, OpSubgroupAvcIme##y##INTEL);
523   _SPIRV_OP(set_single_reference, SetSingleReference)
524   _SPIRV_OP(set_dual_reference, SetDualReference)
525   _SPIRV_OP(ref_window_size, RefWindowSize)
526   _SPIRV_OP(adjust_ref_offset, AdjustRefOffset)
527   _SPIRV_OP(set_max_motion_vector_count, SetMaxMotionVectorCount)
528   _SPIRV_OP(set_unidirectional_mix_disable, SetUnidirectionalMixDisable)
529   _SPIRV_OP(set_early_search_termination_threshold,
530             SetEarlySearchTerminationThreshold)
531   _SPIRV_OP(set_weighted_sad, SetWeightedSad)
532   _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
533   _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
534   _SPIRV_OP(evaluate_with_single_reference_streamin,
535             EvaluateWithSingleReferenceStreamin)
536   _SPIRV_OP(evaluate_with_dual_reference_streamin,
537             EvaluateWithDualReferenceStreamin)
538   _SPIRV_OP(evaluate_with_single_reference_streamout,
539             EvaluateWithSingleReferenceStreamout)
540   _SPIRV_OP(evaluate_with_dual_reference_streamout,
541             EvaluateWithDualReferenceStreamout)
542   _SPIRV_OP(evaluate_with_single_reference_streaminout,
543             EvaluateWithSingleReferenceStreaminout)
544   _SPIRV_OP(evaluate_with_dual_reference_streaminout,
545             EvaluateWithDualReferenceStreaminout)
546   _SPIRV_OP(get_single_reference_streamin, GetSingleReferenceStreamin)
547   _SPIRV_OP(get_dual_reference_streamin, GetDualReferenceStreamin)
548   _SPIRV_OP(strip_single_reference_streamout, StripSingleReferenceStreamout)
549   _SPIRV_OP(strip_dual_reference_streamout, StripDualReferenceStreamout)
550   _SPIRV_OP(get_border_reached, GetBorderReached)
551   _SPIRV_OP(get_truncated_search_indication, GetTruncatedSearchIndication)
552   _SPIRV_OP(get_unidirectional_early_search_termination,
553             GetUnidirectionalEarlySearchTermination)
554   _SPIRV_OP(get_weighting_pattern_minimum_motion_vector,
555             GetWeightingPatternMinimumMotionVector)
556   _SPIRV_OP(get_weighting_pattern_minimum_distortion,
557             GetWeightingPatternMinimumDistortion)
558 #undef _SPIRV_OP
559 
560 #define _SPIRV_OP(x, y)                                                        \
561   add("intel_sub_group_avc_ime_get_streamout_major_shape_" #x,                 \
562       OpSubgroupAvcImeGetStreamout##y##INTEL);
563   _SPIRV_OP(motion_vectors_single_reference,
564             SingleReferenceMajorShapeMotionVectors)
565   _SPIRV_OP(distortions_single_reference, SingleReferenceMajorShapeDistortions)
566   _SPIRV_OP(reference_ids_single_reference,
567             SingleReferenceMajorShapeReferenceIds)
568   _SPIRV_OP(motion_vectors_dual_reference, DualReferenceMajorShapeMotionVectors)
569   _SPIRV_OP(distortions_dual_reference, DualReferenceMajorShapeDistortions)
570   _SPIRV_OP(reference_ids_dual_reference, DualReferenceMajorShapeReferenceIds)
571 #undef _SPIRV_OP
572 
573 // REF instructions
574 #define _SPIRV_OP(x, y)                                                        \
575   add("intel_sub_group_avc_ref_" #x, OpSubgroupAvcRef##y##INTEL);
576   _SPIRV_OP(set_bidirectional_mix_disable, SetBidirectionalMixDisable)
577   _SPIRV_OP(set_bilinear_filter_enable, SetBilinearFilterEnable)
578   _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
579   _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
580   _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
581   _SPIRV_OP(evaluate_with_multi_reference_interlaced,
582             EvaluateWithMultiReferenceInterlaced)
583 #undef _SPIRV_OP
584 
585 // SIC instructions
586 #define _SPIRV_OP(x, y)                                                        \
587   add("intel_sub_group_avc_sic_" #x, OpSubgroupAvcSic##y##INTEL);
588   _SPIRV_OP(configure_skc, ConfigureSkc)
589   _SPIRV_OP(configure_ipe_luma, ConfigureIpeLuma)
590   _SPIRV_OP(configure_ipe_luma_chroma, ConfigureIpeLumaChroma)
591   _SPIRV_OP(get_motion_vector_mask, GetMotionVectorMask)
592   _SPIRV_OP(set_intra_luma_shape_penalty, SetIntraLumaShapePenalty)
593   _SPIRV_OP(set_intra_luma_mode_cost_function, SetIntraLumaModeCostFunction)
594   _SPIRV_OP(set_intra_chroma_mode_cost_function, SetIntraChromaModeCostFunction)
595   _SPIRV_OP(set_skc_bilinear_filter_enable, SetBilinearFilterEnable)
596   _SPIRV_OP(set_skc_forward_transform_enable, SetSkcForwardTransformEnable)
597   _SPIRV_OP(set_block_based_raw_skip_sad, SetBlockBasedRawSkipSad)
598   _SPIRV_OP(evaluate_ipe, EvaluateIpe)
599   _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
600   _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
601   _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
602   _SPIRV_OP(evaluate_with_multi_reference_interlaced,
603             EvaluateWithMultiReferenceInterlaced)
604   _SPIRV_OP(get_ipe_luma_shape, GetIpeLumaShape)
605   _SPIRV_OP(get_best_ipe_luma_distortion, GetBestIpeLumaDistortion)
606   _SPIRV_OP(get_best_ipe_chroma_distortion, GetBestIpeChromaDistortion)
607   _SPIRV_OP(get_packed_ipe_luma_modes, GetPackedIpeLumaModes)
608   _SPIRV_OP(get_ipe_chroma_mode, GetIpeChromaMode)
609   _SPIRV_OP(get_packed_skc_luma_count_threshold, GetPackedSkcLumaCountThreshold)
610   _SPIRV_OP(get_packed_skc_luma_sum_threshold, GetPackedSkcLumaSumThreshold)
611   _SPIRV_OP(get_inter_raw_sads, GetInterRawSads)
612 #undef _SPIRV_OP
613 }
614 
init()615 template <> void SPIRVMap<std::string, Op, OCLOpaqueType>::init() {
616   add("opencl.event_t", OpTypeEvent);
617   add("opencl.pipe_t", OpTypePipe);
618   add("opencl.clk_event_t", OpTypeDeviceEvent);
619   add("opencl.reserve_id_t", OpTypeReserveId);
620   add("opencl.queue_t", OpTypeQueue);
621   add("opencl.sampler_t", OpTypeSampler);
622 }
623 
init()624 template <> void LLVMSPIRVAtomicRmwOpCodeMap::init() {
625   add(llvm::AtomicRMWInst::Xchg, OpAtomicExchange);
626   add(llvm::AtomicRMWInst::Add, OpAtomicIAdd);
627   add(llvm::AtomicRMWInst::Sub, OpAtomicISub);
628   add(llvm::AtomicRMWInst::And, OpAtomicAnd);
629   add(llvm::AtomicRMWInst::Or, OpAtomicOr);
630   add(llvm::AtomicRMWInst::Xor, OpAtomicXor);
631   add(llvm::AtomicRMWInst::Max, OpAtomicSMax);
632   add(llvm::AtomicRMWInst::Min, OpAtomicSMin);
633   add(llvm::AtomicRMWInst::UMax, OpAtomicUMax);
634   add(llvm::AtomicRMWInst::UMin, OpAtomicUMin);
635 }
636 
637 } // namespace SPIRV
638 
639 ///////////////////////////////////////////////////////////////////////////////
640 //
641 // Functions for getting builtin call info
642 //
643 ///////////////////////////////////////////////////////////////////////////////
644 
645 namespace OCLUtil {
646 
getAtomicWorkItemFenceLiterals(CallInst * CI)647 AtomicWorkItemFenceLiterals getAtomicWorkItemFenceLiterals(CallInst *CI) {
648   return std::make_tuple(getArgAsInt(CI, 0),
649                          static_cast<OCLMemOrderKind>(getArgAsInt(CI, 1)),
650                          static_cast<OCLScopeKind>(getArgAsInt(CI, 2)));
651 }
652 
getAtomicBuiltinNumMemoryOrderArgs(StringRef Name)653 size_t getAtomicBuiltinNumMemoryOrderArgs(StringRef Name) {
654   if (Name.startswith("atomic_compare_exchange"))
655     return 2;
656   return 1;
657 }
658 
getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC)659 size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC) {
660   if (OC == OpAtomicCompareExchange || OC == OpAtomicCompareExchangeWeak)
661     return 2;
662   return 1;
663 }
664 
isComputeAtomicOCLBuiltin(StringRef DemangledName)665 bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
666   if (!DemangledName.startswith(kOCLBuiltinName::AtomicPrefix) &&
667       !DemangledName.startswith(kOCLBuiltinName::AtomPrefix))
668     return false;
669 
670   return llvm::StringSwitch<bool>(DemangledName)
671       .EndsWith("add", true)
672       .EndsWith("sub", true)
673       .EndsWith("inc", true)
674       .EndsWith("dec", true)
675       .EndsWith("cmpxchg", true)
676       .EndsWith("min", true)
677       .EndsWith("max", true)
678       .EndsWith("and", true)
679       .EndsWith("or", true)
680       .EndsWith("xor", true)
681       .EndsWith("add_explicit", true)
682       .EndsWith("sub_explicit", true)
683       .EndsWith("or_explicit", true)
684       .EndsWith("xor_explicit", true)
685       .EndsWith("and_explicit", true)
686       .EndsWith("min_explicit", true)
687       .EndsWith("max_explicit", true)
688       .Default(false);
689 }
690 
getBarrierLiterals(CallInst * CI)691 BarrierLiterals getBarrierLiterals(CallInst *CI) {
692   auto N = CI->getNumArgOperands();
693   assert(N == 1 || N == 2);
694 
695   StringRef DemangledName;
696   assert(CI->getCalledFunction() && "Unexpected indirect call");
697   if (!oclIsBuiltin(CI->getCalledFunction()->getName(), DemangledName)) {
698     assert(0 &&
699            "call must a builtin (work_group_barrier or sub_group_barrier)");
700   }
701 
702   OCLScopeKind Scope = OCLMS_work_group;
703   if (DemangledName == kOCLBuiltinName::SubGroupBarrier) {
704     Scope = OCLMS_sub_group;
705   }
706 
707   return std::make_tuple(getArgAsInt(CI, 0),
708                          N == 1 ? OCLMS_work_group
709                                 : static_cast<OCLScopeKind>(getArgAsInt(CI, 1)),
710                          Scope);
711 }
712 
getExtOp(StringRef OrigName,StringRef GivenDemangledName)713 unsigned getExtOp(StringRef OrigName, StringRef GivenDemangledName) {
714   std::string DemangledName{GivenDemangledName};
715   if (DemangledName.empty() || !oclIsBuiltin(OrigName, GivenDemangledName))
716     return ~0U;
717   LLVM_DEBUG(dbgs() << "getExtOp: demangled name: " << DemangledName << '\n');
718   OCLExtOpKind EOC;
719   bool Found = OCLExtOpMap::rfind(DemangledName, &EOC);
720   if (!Found) {
721     std::string Prefix;
722     switch (lastFuncParamType(OrigName)) {
723     case ParamType::UNSIGNED:
724       Prefix = "u_";
725       break;
726     case ParamType::SIGNED:
727       Prefix = "s_";
728       break;
729     case ParamType::FLOAT:
730       Prefix = "f";
731       break;
732     case ParamType::UNKNOWN:
733       break;
734     }
735     Found = OCLExtOpMap::rfind(Prefix + DemangledName, &EOC);
736   }
737   if (Found)
738     return EOC;
739   else
740     return ~0U;
741 }
742 
743 ///////////////////////////////////////////////////////////////////////////////
744 //
745 // Functions for getting module info
746 //
747 ///////////////////////////////////////////////////////////////////////////////
748 
encodeOCLVer(unsigned short Major,unsigned char Minor,unsigned char Rev)749 unsigned encodeOCLVer(unsigned short Major, unsigned char Minor,
750                       unsigned char Rev) {
751   return (Major * 100 + Minor) * 1000 + Rev;
752 }
753 
754 std::tuple<unsigned short, unsigned char, unsigned char>
decodeOCLVer(unsigned Ver)755 decodeOCLVer(unsigned Ver) {
756   unsigned short Major = Ver / 100000;
757   unsigned char Minor = (Ver % 100000) / 1000;
758   unsigned char Rev = Ver % 1000;
759   return std::make_tuple(Major, Minor, Rev);
760 }
761 
getOCLVersion(Module * M,bool AllowMulti)762 unsigned getOCLVersion(Module *M, bool AllowMulti) {
763   NamedMDNode *NamedMD = M->getNamedMetadata(kSPIR2MD::OCLVer);
764   if (!NamedMD)
765     return 0;
766   assert(NamedMD->getNumOperands() > 0 && "Invalid SPIR");
767   if (!AllowMulti && NamedMD->getNumOperands() != 1)
768     report_fatal_error("Multiple OCL version metadata not allowed");
769 
770   // If the module was linked with another module, there may be multiple
771   // operands.
772   auto GetVer = [=](unsigned I) {
773     auto MD = NamedMD->getOperand(I);
774     return std::make_pair(getMDOperandAsInt(MD, 0), getMDOperandAsInt(MD, 1));
775   };
776   auto Ver = GetVer(0);
777   for (unsigned I = 1, E = NamedMD->getNumOperands(); I != E; ++I)
778     if (Ver != GetVer(I))
779       report_fatal_error("OCL version mismatch");
780 
781   return encodeOCLVer(Ver.first, Ver.second, 0);
782 }
783 
decodeMDNode(MDNode * N,unsigned & X,unsigned & Y,unsigned & Z)784 void decodeMDNode(MDNode *N, unsigned &X, unsigned &Y, unsigned &Z) {
785   if (N == NULL)
786     return;
787   X = getMDOperandAsInt(N, 0);
788   Y = getMDOperandAsInt(N, 1);
789   Z = getMDOperandAsInt(N, 2);
790 }
791 
792 /// Encode LLVM type by SPIR-V execution mode VecTypeHint
encodeVecTypeHint(Type * Ty)793 unsigned encodeVecTypeHint(Type *Ty) {
794   if (Ty->isHalfTy())
795     return 4;
796   if (Ty->isFloatTy())
797     return 5;
798   if (Ty->isDoubleTy())
799     return 6;
800   if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
801     switch (IntTy->getIntegerBitWidth()) {
802     case 8:
803       return 0;
804     case 16:
805       return 1;
806     case 32:
807       return 2;
808     case 64:
809       return 3;
810     default:
811       llvm_unreachable("invalid integer type");
812     }
813   }
814   if (FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty)) {
815     Type *EleTy = VecTy->getElementType();
816     unsigned Size = VecTy->getNumElements();
817     return Size << 16 | encodeVecTypeHint(EleTy);
818   }
819   llvm_unreachable("invalid type");
820   return ~0U;
821 }
822 
decodeVecTypeHint(LLVMContext & C,unsigned Code)823 Type *decodeVecTypeHint(LLVMContext &C, unsigned Code) {
824   unsigned VecWidth = Code >> 16;
825   unsigned Scalar = Code & 0xFFFF;
826   Type *ST = nullptr;
827   switch (Scalar) {
828   case 0:
829   case 1:
830   case 2:
831   case 3:
832     ST = IntegerType::get(C, 1 << (3 + Scalar));
833     break;
834   case 4:
835     ST = Type::getHalfTy(C);
836     break;
837   case 5:
838     ST = Type::getFloatTy(C);
839     break;
840   case 6:
841     ST = Type::getDoubleTy(C);
842     break;
843   default:
844     llvm_unreachable("Invalid vec type hint");
845     return nullptr;
846   }
847   if (VecWidth < 1)
848     return ST;
849   return FixedVectorType::get(ST, VecWidth);
850 }
851 
transVecTypeHint(MDNode * Node)852 unsigned transVecTypeHint(MDNode *Node) {
853   return encodeVecTypeHint(getMDOperandAsType(Node, 0));
854 }
855 
getOCLOpaqueTypeAddrSpace(Op OpCode)856 SPIRAddressSpace getOCLOpaqueTypeAddrSpace(Op OpCode) {
857   switch (OpCode) {
858   case OpTypeQueue:
859     return SPIRV_QUEUE_T_ADDR_SPACE;
860   case OpTypeEvent:
861     return SPIRV_EVENT_T_ADDR_SPACE;
862   case OpTypeDeviceEvent:
863     return SPIRV_CLK_EVENT_T_ADDR_SPACE;
864   case OpTypeReserveId:
865     return SPIRV_RESERVE_ID_T_ADDR_SPACE;
866   case OpTypePipe:
867   case OpTypePipeStorage:
868     return SPIRV_PIPE_ADDR_SPACE;
869   case OpTypeImage:
870   case OpTypeSampledImage:
871     return SPIRV_IMAGE_ADDR_SPACE;
872   case OpConstantSampler:
873   case OpTypeSampler:
874     return SPIRV_SAMPLER_T_ADDR_SPACE;
875   default:
876     if (isSubgroupAvcINTELTypeOpCode(OpCode))
877       return SPIRV_AVC_INTEL_T_ADDR_SPACE;
878     assert(false && "No address space is determined for some OCL type");
879     return SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE;
880   }
881 }
882 
mapAddrSpaceEnums(SPIRAddressSpace Addrspace)883 static SPIR::TypeAttributeEnum mapAddrSpaceEnums(SPIRAddressSpace Addrspace) {
884   switch (Addrspace) {
885   case SPIRAS_Private:
886     return SPIR::ATTR_PRIVATE;
887   case SPIRAS_Global:
888     return SPIR::ATTR_GLOBAL;
889   case SPIRAS_Constant:
890     return SPIR::ATTR_CONSTANT;
891   case SPIRAS_Local:
892     return SPIR::ATTR_LOCAL;
893   case SPIRAS_Generic:
894     return SPIR::ATTR_GENERIC;
895   case SPIRAS_GlobalDevice:
896     return SPIR::ATTR_GLOBAL_DEVICE;
897   case SPIRAS_GlobalHost:
898     return SPIR::ATTR_GLOBAL_HOST;
899   default:
900     llvm_unreachable("Invalid addrspace enum member");
901   }
902   return SPIR::ATTR_NONE;
903 }
904 
905 SPIR::TypeAttributeEnum
getOCLOpaqueTypeAddrSpace(SPIR::TypePrimitiveEnum Prim)906 getOCLOpaqueTypeAddrSpace(SPIR::TypePrimitiveEnum Prim) {
907   switch (Prim) {
908   case SPIR::PRIMITIVE_QUEUE_T:
909     return mapAddrSpaceEnums(SPIRV_QUEUE_T_ADDR_SPACE);
910   case SPIR::PRIMITIVE_EVENT_T:
911     return mapAddrSpaceEnums(SPIRV_EVENT_T_ADDR_SPACE);
912   case SPIR::PRIMITIVE_CLK_EVENT_T:
913     return mapAddrSpaceEnums(SPIRV_CLK_EVENT_T_ADDR_SPACE);
914   case SPIR::PRIMITIVE_RESERVE_ID_T:
915     return mapAddrSpaceEnums(SPIRV_RESERVE_ID_T_ADDR_SPACE);
916   case SPIR::PRIMITIVE_PIPE_RO_T:
917   case SPIR::PRIMITIVE_PIPE_WO_T:
918     return mapAddrSpaceEnums(SPIRV_PIPE_ADDR_SPACE);
919   case SPIR::PRIMITIVE_IMAGE1D_RO_T:
920   case SPIR::PRIMITIVE_IMAGE1D_ARRAY_RO_T:
921   case SPIR::PRIMITIVE_IMAGE1D_BUFFER_RO_T:
922   case SPIR::PRIMITIVE_IMAGE2D_RO_T:
923   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_RO_T:
924   case SPIR::PRIMITIVE_IMAGE2D_DEPTH_RO_T:
925   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_DEPTH_RO_T:
926   case SPIR::PRIMITIVE_IMAGE2D_MSAA_RO_T:
927   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_RO_T:
928   case SPIR::PRIMITIVE_IMAGE2D_MSAA_DEPTH_RO_T:
929   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_DEPTH_RO_T:
930   case SPIR::PRIMITIVE_IMAGE3D_RO_T:
931   case SPIR::PRIMITIVE_IMAGE1D_WO_T:
932   case SPIR::PRIMITIVE_IMAGE1D_ARRAY_WO_T:
933   case SPIR::PRIMITIVE_IMAGE1D_BUFFER_WO_T:
934   case SPIR::PRIMITIVE_IMAGE2D_WO_T:
935   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_WO_T:
936   case SPIR::PRIMITIVE_IMAGE2D_DEPTH_WO_T:
937   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_DEPTH_WO_T:
938   case SPIR::PRIMITIVE_IMAGE2D_MSAA_WO_T:
939   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_WO_T:
940   case SPIR::PRIMITIVE_IMAGE2D_MSAA_DEPTH_WO_T:
941   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_DEPTH_WO_T:
942   case SPIR::PRIMITIVE_IMAGE3D_WO_T:
943   case SPIR::PRIMITIVE_IMAGE1D_RW_T:
944   case SPIR::PRIMITIVE_IMAGE1D_ARRAY_RW_T:
945   case SPIR::PRIMITIVE_IMAGE1D_BUFFER_RW_T:
946   case SPIR::PRIMITIVE_IMAGE2D_RW_T:
947   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_RW_T:
948   case SPIR::PRIMITIVE_IMAGE2D_DEPTH_RW_T:
949   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_DEPTH_RW_T:
950   case SPIR::PRIMITIVE_IMAGE2D_MSAA_RW_T:
951   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_RW_T:
952   case SPIR::PRIMITIVE_IMAGE2D_MSAA_DEPTH_RW_T:
953   case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_DEPTH_RW_T:
954   case SPIR::PRIMITIVE_IMAGE3D_RW_T:
955     return mapAddrSpaceEnums(SPIRV_IMAGE_ADDR_SPACE);
956   default:
957     llvm_unreachable("No address space is determined for a SPIR primitive");
958   }
959   return SPIR::ATTR_NONE;
960 }
961 
962 // Fetch type of invoke function passed to device execution built-ins
getBlockInvokeTy(Function * F,unsigned BlockIdx)963 static FunctionType *getBlockInvokeTy(Function *F, unsigned BlockIdx) {
964   auto Params = F->getFunctionType()->params();
965   PointerType *FuncPtr = cast<PointerType>(Params[BlockIdx]);
966   return cast<FunctionType>(FuncPtr->getElementType());
967 }
968 
969 class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo {
970 public:
OCLBuiltinFuncMangleInfo(Function * F)971   OCLBuiltinFuncMangleInfo(Function *F) : F(F) {}
OCLBuiltinFuncMangleInfo(ArrayRef<Type * > ArgTypes)972   OCLBuiltinFuncMangleInfo(ArrayRef<Type *> ArgTypes)
973       : ArgTypes(ArgTypes.vec()) {}
getArgTy(unsigned I)974   Type *getArgTy(unsigned I) { return F->getFunctionType()->getParamType(I); }
init(StringRef UniqName)975   void init(StringRef UniqName) override {
976     // Make a local copy as we will modify the string in init function
977     std::string TempStorage = UniqName.str();
978     auto NameRef = StringRef(TempStorage);
979 
980     // Helper functions to erase substrings from NameRef (i.e. TempStorage)
981     auto EraseSubstring = [&NameRef, &TempStorage](const std::string &ToErase) {
982       size_t Pos = TempStorage.find(ToErase);
983       if (Pos != std::string::npos) {
984         TempStorage.erase(Pos, ToErase.length());
985         // re-take StringRef as TempStorage was updated
986         NameRef = StringRef(TempStorage);
987       }
988     };
989     auto EraseSymbol = [&NameRef, &TempStorage](size_t Index) {
990       TempStorage.erase(Index, 1);
991       // re-take StringRef as TempStorage was updated
992       NameRef = StringRef(TempStorage);
993     };
994 
995     if (NameRef.startswith("async_work_group")) {
996       addUnsignedArg(-1);
997       setArgAttr(1, SPIR::ATTR_CONST);
998     } else if (NameRef.startswith("printf"))
999       setVarArg(1);
1000     else if (NameRef.startswith("write_imageui"))
1001       addUnsignedArg(2);
1002     else if (NameRef.equals("prefetch")) {
1003       addUnsignedArg(1);
1004       setArgAttr(0, SPIR::ATTR_CONST);
1005     } else if (NameRef.equals("get_kernel_work_group_size") ||
1006                NameRef.equals(
1007                    "get_kernel_preferred_work_group_size_multiple")) {
1008       assert(F && "lack of necessary information");
1009       const size_t BlockArgIdx = 0;
1010       FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx);
1011       if (InvokeTy->getNumParams() > 1)
1012         setLocalArgBlock(BlockArgIdx);
1013     } else if (NameRef.equals("enqueue_kernel")) {
1014       assert(F && "lack of necessary information");
1015       setEnumArg(1, SPIR::PRIMITIVE_KERNEL_ENQUEUE_FLAGS_T);
1016       addUnsignedArg(3);
1017       setArgAttr(4, SPIR::ATTR_CONST);
1018       // If there are arguments other then block context then these are pointers
1019       // to local memory so this built-in must be mangled accordingly.
1020       const size_t BlockArgIdx = 6;
1021       FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx);
1022       if (InvokeTy->getNumParams() > 1) {
1023         setLocalArgBlock(BlockArgIdx);
1024         addUnsignedArg(BlockArgIdx + 1);
1025         setVarArg(BlockArgIdx + 2);
1026       }
1027     } else if (NameRef.startswith("get_") || NameRef.equals("nan") ||
1028                NameRef.equals("mem_fence") || NameRef.startswith("shuffle")) {
1029       addUnsignedArg(-1);
1030       if (NameRef.startswith(kOCLBuiltinName::GetFence)) {
1031         setArgAttr(0, SPIR::ATTR_CONST);
1032         addVoidPtrArg(0);
1033       }
1034     } else if (NameRef.contains("barrier")) {
1035       addUnsignedArg(0);
1036       if (NameRef.equals("work_group_barrier") ||
1037           NameRef.equals("sub_group_barrier"))
1038         setEnumArg(1, SPIR::PRIMITIVE_MEMORY_SCOPE);
1039     } else if (NameRef.startswith("atomic_work_item_fence")) {
1040       addUnsignedArg(0);
1041       setEnumArg(1, SPIR::PRIMITIVE_MEMORY_ORDER);
1042       setEnumArg(2, SPIR::PRIMITIVE_MEMORY_SCOPE);
1043     } else if (NameRef.startswith("atom_")) {
1044       setArgAttr(0, SPIR::ATTR_VOLATILE);
1045       if (NameRef.endswith("_umax") || NameRef.endswith("_umin")) {
1046         addUnsignedArg(-1);
1047         // We need to remove u to match OpenCL C built-in function name
1048         EraseSymbol(5);
1049       }
1050     } else if (NameRef.startswith("atomic")) {
1051       setArgAttr(0, SPIR::ATTR_VOLATILE);
1052       if (NameRef.contains("_umax") || NameRef.contains("_umin")) {
1053         addUnsignedArg(-1);
1054         // We need to remove u to match OpenCL C built-in function name
1055         if (NameRef.contains("_fetch"))
1056           EraseSymbol(13);
1057         else
1058           EraseSymbol(7);
1059       }
1060       if (NameRef.contains("store_explicit") ||
1061           NameRef.contains("exchange_explicit") ||
1062           (NameRef.startswith("atomic_fetch") &&
1063            NameRef.contains("explicit"))) {
1064         setEnumArg(2, SPIR::PRIMITIVE_MEMORY_ORDER);
1065         setEnumArg(3, SPIR::PRIMITIVE_MEMORY_SCOPE);
1066       } else if (NameRef.contains("load_explicit") ||
1067                  (NameRef.startswith("atomic_flag") &&
1068                   NameRef.contains("explicit"))) {
1069         setEnumArg(1, SPIR::PRIMITIVE_MEMORY_ORDER);
1070         setEnumArg(2, SPIR::PRIMITIVE_MEMORY_SCOPE);
1071       } else if (NameRef.endswith("compare_exchange_strong_explicit") ||
1072                  NameRef.endswith("compare_exchange_weak_explicit")) {
1073         setEnumArg(3, SPIR::PRIMITIVE_MEMORY_ORDER);
1074         setEnumArg(4, SPIR::PRIMITIVE_MEMORY_ORDER);
1075         setEnumArg(5, SPIR::PRIMITIVE_MEMORY_SCOPE);
1076       }
1077       // Don't set atomic property to the first argument of 1.2 atomic
1078       // built-ins.
1079       if (!NameRef.endswith("xchg") && // covers _cmpxchg too
1080           (NameRef.contains("fetch") ||
1081            !(NameRef.endswith("_add") || NameRef.endswith("_sub") ||
1082              NameRef.endswith("_inc") || NameRef.endswith("_dec") ||
1083              NameRef.endswith("_min") || NameRef.endswith("_max") ||
1084              NameRef.endswith("_and") || NameRef.endswith("_or") ||
1085              NameRef.endswith("_xor")))) {
1086         addAtomicArg(0);
1087       }
1088     } else if (NameRef.startswith("uconvert_")) {
1089       addUnsignedArg(0);
1090       NameRef = NameRef.drop_front(1);
1091       UnmangledName.erase(0, 1);
1092     } else if (NameRef.startswith("s_")) {
1093       if (NameRef.equals("s_upsample"))
1094         addUnsignedArg(1);
1095       NameRef = NameRef.drop_front(2);
1096     } else if (NameRef.startswith("u_")) {
1097       addUnsignedArg(-1);
1098       NameRef = NameRef.drop_front(2);
1099     } else if (NameRef.equals("fclamp")) {
1100       NameRef = NameRef.drop_front(1);
1101     }
1102     // handle [read|write]pipe builtins (plus two i32 literal args
1103     // required by SPIR 2.0 provisional specification):
1104     else if (NameRef.equals("read_pipe_2") || NameRef.equals("write_pipe_2")) {
1105       // with 2 arguments (plus two i32 literals):
1106       // int read_pipe (read_only pipe gentype p, gentype *ptr)
1107       // int write_pipe (write_only pipe gentype p, const gentype *ptr)
1108       addVoidPtrArg(1);
1109       addUnsignedArg(2);
1110       addUnsignedArg(3);
1111       // OpenCL-like representation of blocking pipes
1112     } else if (NameRef.equals("read_pipe_2_bl") ||
1113                NameRef.equals("write_pipe_2_bl")) {
1114       // with 2 arguments (plus two i32 literals):
1115       // int read_pipe_bl (read_only pipe gentype p, gentype *ptr)
1116       // int write_pipe_bl (write_only pipe gentype p, const gentype *ptr)
1117       addVoidPtrArg(1);
1118       addUnsignedArg(2);
1119       addUnsignedArg(3);
1120     } else if (NameRef.equals("read_pipe_4") ||
1121                NameRef.equals("write_pipe_4")) {
1122       // with 4 arguments (plus two i32 literals):
1123       // int read_pipe (read_only pipe gentype p, reserve_id_t reserve_id, uint
1124       // index, gentype *ptr) int write_pipe (write_only pipe gentype p,
1125       // reserve_id_t reserve_id, uint index, const gentype *ptr)
1126       addUnsignedArg(2);
1127       addVoidPtrArg(3);
1128       addUnsignedArg(4);
1129       addUnsignedArg(5);
1130     } else if (NameRef.contains("reserve_read_pipe") ||
1131                NameRef.contains("reserve_write_pipe")) {
1132       // process [|work_group|sub_group]reserve[read|write]pipe builtins
1133       addUnsignedArg(1);
1134       addUnsignedArg(2);
1135       addUnsignedArg(3);
1136     } else if (NameRef.contains("commit_read_pipe") ||
1137                NameRef.contains("commit_write_pipe")) {
1138       // process [|work_group|sub_group]commit[read|write]pipe builtins
1139       addUnsignedArg(2);
1140       addUnsignedArg(3);
1141     } else if (NameRef.equals("capture_event_profiling_info")) {
1142       addVoidPtrArg(2);
1143       setEnumArg(1, SPIR::PRIMITIVE_CLK_PROFILING_INFO);
1144     } else if (NameRef.equals("enqueue_marker")) {
1145       setArgAttr(2, SPIR::ATTR_CONST);
1146       addUnsignedArg(1);
1147     } else if (NameRef.startswith("vload")) {
1148       addUnsignedArg(0);
1149       setArgAttr(1, SPIR::ATTR_CONST);
1150     } else if (NameRef.startswith("vstore")) {
1151       addUnsignedArg(1);
1152     } else if (NameRef.startswith("ndrange_")) {
1153       addUnsignedArg(-1);
1154       if (NameRef[8] == '2' || NameRef[8] == '3') {
1155         setArgAttr(-1, SPIR::ATTR_CONST);
1156       }
1157     } else if (NameRef.contains("umax")) {
1158       addUnsignedArg(-1);
1159       EraseSymbol(NameRef.find("umax"));
1160     } else if (NameRef.contains("umin")) {
1161       addUnsignedArg(-1);
1162       EraseSymbol(NameRef.find("umin"));
1163     } else if (NameRef.contains("broadcast")) {
1164       addUnsignedArg(-1);
1165     } else if (NameRef.startswith(kOCLBuiltinName::SampledReadImage)) {
1166       NameRef.consume_front(kOCLBuiltinName::Sampled);
1167       addSamplerArg(1);
1168     } else if (NameRef.contains(kOCLSubgroupsAVCIntel::Prefix)) {
1169       if (NameRef.contains("evaluate_ipe"))
1170         addSamplerArg(1);
1171       else if (NameRef.contains("evaluate_with_single_reference"))
1172         addSamplerArg(2);
1173       else if (NameRef.contains("evaluate_with_multi_reference")) {
1174         addUnsignedArg(1);
1175         std::string PostFix = "_interlaced";
1176         if (NameRef.contains(PostFix)) {
1177           addUnsignedArg(2);
1178           addSamplerArg(3);
1179           EraseSubstring(PostFix);
1180         } else
1181           addSamplerArg(2);
1182       } else if (NameRef.contains("evaluate_with_dual_reference"))
1183         addSamplerArg(3);
1184       else if (NameRef.contains("fme_initialize"))
1185         addUnsignedArgs(0, 6);
1186       else if (NameRef.contains("bme_initialize"))
1187         addUnsignedArgs(0, 7);
1188       else if (NameRef.contains("set_inter_base_multi_reference_penalty") ||
1189                NameRef.contains("set_inter_shape_penalty") ||
1190                NameRef.contains("set_inter_direction_penalty"))
1191         addUnsignedArg(0);
1192       else if (NameRef.contains("set_motion_vector_cost_function"))
1193         addUnsignedArgs(0, 2);
1194       else if (NameRef.contains("interlaced_field_polarity"))
1195         addUnsignedArg(0);
1196       else if (NameRef.contains("interlaced_field_polarities"))
1197         addUnsignedArgs(0, 1);
1198       else if (NameRef.contains(kOCLSubgroupsAVCIntel::MCEPrefix)) {
1199         if (NameRef.contains("get_default"))
1200           addUnsignedArgs(0, 1);
1201       } else if (NameRef.contains(kOCLSubgroupsAVCIntel::IMEPrefix)) {
1202         if (NameRef.contains("initialize"))
1203           addUnsignedArgs(0, 2);
1204         else if (NameRef.contains("set_single_reference"))
1205           addUnsignedArg(1);
1206         else if (NameRef.contains("set_dual_reference"))
1207           addUnsignedArg(2);
1208         else if (NameRef.contains("set_weighted_sad") ||
1209                  NameRef.contains("set_early_search_termination_threshold"))
1210           addUnsignedArg(0);
1211         else if (NameRef.contains("adjust_ref_offset"))
1212           addUnsignedArgs(1, 3);
1213         else if (NameRef.contains("set_max_motion_vector_count") ||
1214                  NameRef.contains("get_border_reached"))
1215           addUnsignedArg(0);
1216         else if (NameRef.contains("shape_distortions") ||
1217                  NameRef.contains("shape_motion_vectors") ||
1218                  NameRef.contains("shape_reference_ids")) {
1219           if (NameRef.contains("single_reference")) {
1220             addUnsignedArg(1);
1221             EraseSubstring("_single_reference");
1222           } else if (NameRef.contains("dual_reference")) {
1223             addUnsignedArgs(1, 2);
1224             EraseSubstring("_dual_reference");
1225           }
1226         } else if (NameRef.contains("ref_window_size"))
1227           addUnsignedArg(0);
1228       } else if (NameRef.contains(kOCLSubgroupsAVCIntel::SICPrefix)) {
1229         if (NameRef.contains("initialize") ||
1230             NameRef.contains("set_intra_luma_shape_penalty"))
1231           addUnsignedArg(0);
1232         else if (NameRef.contains("configure_ipe")) {
1233           if (NameRef.contains("_luma")) {
1234             addUnsignedArgs(0, 6);
1235             EraseSubstring("_luma");
1236           }
1237           if (NameRef.contains("_chroma")) {
1238             addUnsignedArgs(7, 9);
1239             EraseSubstring("_chroma");
1240           }
1241         } else if (NameRef.contains("configure_skc"))
1242           addUnsignedArgs(0, 4);
1243         else if (NameRef.contains("set_skc")) {
1244           if (NameRef.contains("forward_transform_enable"))
1245             addUnsignedArg(0);
1246         } else if (NameRef.contains("set_block")) {
1247           if (NameRef.contains("based_raw_skip_sad"))
1248             addUnsignedArg(0);
1249         } else if (NameRef.contains("get_motion_vector_mask")) {
1250           addUnsignedArgs(0, 1);
1251         } else if (NameRef.contains("luma_mode_cost_function"))
1252           addUnsignedArgs(0, 2);
1253         else if (NameRef.contains("chroma_mode_cost_function"))
1254           addUnsignedArg(0);
1255       }
1256     } else if (NameRef.startswith("intel_sub_group_shuffle")) {
1257       if (NameRef.endswith("_down") || NameRef.endswith("_up"))
1258         addUnsignedArg(2);
1259       else
1260         addUnsignedArg(1);
1261     } else if (NameRef.startswith("intel_sub_group_block_write")) {
1262       // distinguish write to image and other data types as position
1263       // of uint argument is different though name is the same.
1264       auto *Arg0Ty = getArgTy(0);
1265       if (Arg0Ty->isPointerTy() &&
1266           Arg0Ty->getPointerElementType()->isIntegerTy()) {
1267         addUnsignedArg(0);
1268         addUnsignedArg(1);
1269       } else {
1270         addUnsignedArg(2);
1271       }
1272     } else if (NameRef.startswith("intel_sub_group_block_read")) {
1273       // distinguish read from image and other data types as position
1274       // of uint argument is different though name is the same.
1275       auto *Arg0Ty = getArgTy(0);
1276       if (Arg0Ty->isPointerTy() &&
1277           Arg0Ty->getPointerElementType()->isIntegerTy()) {
1278         setArgAttr(0, SPIR::ATTR_CONST);
1279         addUnsignedArg(0);
1280       }
1281     } else if (NameRef.startswith("intel_sub_group_media_block_write")) {
1282       addUnsignedArg(3);
1283     } else if (NameRef.startswith(kOCLBuiltinName::SubGroupPrefix)) {
1284       if (NameRef.contains("ballot")) {
1285         if (NameRef.contains("inverse") || NameRef.contains("bit_count") ||
1286             NameRef.contains("inclusive_scan") ||
1287             NameRef.contains("exclusive_scan") ||
1288             NameRef.contains("find_lsb") || NameRef.contains("find_msb"))
1289           addUnsignedArg(0);
1290         else if (NameRef.contains("bit_extract")) {
1291           addUnsignedArgs(0, 1);
1292         }
1293       } else if (NameRef.contains("shuffle") || NameRef.contains("clustered"))
1294         addUnsignedArg(1);
1295     } else if (NameRef.startswith("bitfield_insert")) {
1296       addUnsignedArgs(2, 3);
1297     } else if (NameRef.startswith("bitfield_extract_signed") ||
1298                NameRef.startswith("bitfield_extract_unsigned")) {
1299       addUnsignedArgs(1, 2);
1300     }
1301 
1302     // Store the final version of a function name
1303     UnmangledName = NameRef.str();
1304   }
1305   // Auxiliarry information, it is expected that it is relevant at the moment
1306   // the init method is called.
1307   Function *F;                  // SPIRV decorated function
1308   // TODO: ArgTypes argument should get removed once all SPV-IR related issues
1309   // are resolved
1310   std::vector<Type *> ArgTypes; // Arguments of OCL builtin
1311 };
1312 
mutateCallInstOCL(Module * M,CallInst * CI,std::function<std::string (CallInst *,std::vector<Value * > &)> ArgMutate,AttributeList * Attrs)1313 CallInst *mutateCallInstOCL(
1314     Module *M, CallInst *CI,
1315     std::function<std::string(CallInst *, std::vector<Value *> &)> ArgMutate,
1316     AttributeList *Attrs) {
1317   OCLBuiltinFuncMangleInfo BtnInfo(CI->getCalledFunction());
1318   return mutateCallInst(M, CI, ArgMutate, &BtnInfo, Attrs);
1319 }
1320 
mutateCallInstOCL(Module * M,CallInst * CI,std::function<std::string (CallInst *,std::vector<Value * > &,Type * & RetTy)> ArgMutate,std::function<Instruction * (CallInst *)> RetMutate,AttributeList * Attrs,bool TakeFuncName)1321 Instruction *mutateCallInstOCL(
1322     Module *M, CallInst *CI,
1323     std::function<std::string(CallInst *, std::vector<Value *> &, Type *&RetTy)>
1324         ArgMutate,
1325     std::function<Instruction *(CallInst *)> RetMutate, AttributeList *Attrs,
1326     bool TakeFuncName) {
1327   OCLBuiltinFuncMangleInfo BtnInfo(CI->getCalledFunction());
1328   return mutateCallInst(M, CI, ArgMutate, RetMutate, &BtnInfo, Attrs,
1329                         TakeFuncName);
1330 }
1331 
1332 static std::pair<StringRef, StringRef>
getSrcAndDstElememntTypeName(BitCastInst * BIC)1333 getSrcAndDstElememntTypeName(BitCastInst *BIC) {
1334   if (!BIC)
1335     return std::pair<StringRef, StringRef>("", "");
1336 
1337   Type *SrcTy = BIC->getSrcTy();
1338   Type *DstTy = BIC->getDestTy();
1339   if (SrcTy->isPointerTy())
1340     SrcTy = SrcTy->getPointerElementType();
1341   if (DstTy->isPointerTy())
1342     DstTy = DstTy->getPointerElementType();
1343   auto SrcST = dyn_cast<StructType>(SrcTy);
1344   auto DstST = dyn_cast<StructType>(DstTy);
1345   if (!DstST || !DstST->hasName() || !SrcST || !SrcST->hasName())
1346     return std::pair<StringRef, StringRef>("", "");
1347 
1348   return std::make_pair(SrcST->getName(), DstST->getName());
1349 }
1350 
isSamplerInitializer(Instruction * Inst)1351 bool isSamplerInitializer(Instruction *Inst) {
1352   BitCastInst *BIC = dyn_cast<BitCastInst>(Inst);
1353   auto Names = getSrcAndDstElememntTypeName(BIC);
1354   if (Names.second == getSPIRVTypeName(kSPIRVTypeName::Sampler) &&
1355       Names.first == getSPIRVTypeName(kSPIRVTypeName::ConstantSampler))
1356     return true;
1357 
1358   return false;
1359 }
1360 
isPipeStorageInitializer(Instruction * Inst)1361 bool isPipeStorageInitializer(Instruction *Inst) {
1362   BitCastInst *BIC = dyn_cast<BitCastInst>(Inst);
1363   auto Names = getSrcAndDstElememntTypeName(BIC);
1364   if (Names.second == getSPIRVTypeName(kSPIRVTypeName::PipeStorage) &&
1365       Names.first == getSPIRVTypeName(kSPIRVTypeName::ConstantPipeStorage))
1366     return true;
1367 
1368   return false;
1369 }
1370 
isSpecialTypeInitializer(Instruction * Inst)1371 bool isSpecialTypeInitializer(Instruction *Inst) {
1372   return isSamplerInitializer(Inst) || isPipeStorageInitializer(Inst);
1373 }
1374 
isSamplerTy(Type * Ty)1375 bool isSamplerTy(Type *Ty) {
1376   auto PTy = dyn_cast<PointerType>(Ty);
1377   if (!PTy)
1378     return false;
1379 
1380   auto STy = dyn_cast<StructType>(PTy->getElementType());
1381   return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler;
1382 }
1383 
isPipeOrAddressSpaceCastBI(const StringRef MangledName)1384 bool isPipeOrAddressSpaceCastBI(const StringRef MangledName) {
1385   return MangledName == "write_pipe_2" || MangledName == "read_pipe_2" ||
1386          MangledName == "write_pipe_2_bl" || MangledName == "read_pipe_2_bl" ||
1387          MangledName == "write_pipe_4" || MangledName == "read_pipe_4" ||
1388          MangledName == "reserve_write_pipe" ||
1389          MangledName == "reserve_read_pipe" ||
1390          MangledName == "commit_write_pipe" ||
1391          MangledName == "commit_read_pipe" ||
1392          MangledName == "work_group_reserve_write_pipe" ||
1393          MangledName == "work_group_reserve_read_pipe" ||
1394          MangledName == "work_group_commit_write_pipe" ||
1395          MangledName == "work_group_commit_read_pipe" ||
1396          MangledName == "get_pipe_num_packets_ro" ||
1397          MangledName == "get_pipe_max_packets_ro" ||
1398          MangledName == "get_pipe_num_packets_wo" ||
1399          MangledName == "get_pipe_max_packets_wo" ||
1400          MangledName == "sub_group_reserve_write_pipe" ||
1401          MangledName == "sub_group_reserve_read_pipe" ||
1402          MangledName == "sub_group_commit_write_pipe" ||
1403          MangledName == "sub_group_commit_read_pipe" ||
1404          MangledName == "to_global" || MangledName == "to_local" ||
1405          MangledName == "to_private";
1406 }
1407 
isEnqueueKernelBI(const StringRef MangledName)1408 bool isEnqueueKernelBI(const StringRef MangledName) {
1409   return MangledName == "__enqueue_kernel_basic" ||
1410          MangledName == "__enqueue_kernel_basic_events" ||
1411          MangledName == "__enqueue_kernel_varargs" ||
1412          MangledName == "__enqueue_kernel_events_varargs";
1413 }
1414 
isKernelQueryBI(const StringRef MangledName)1415 bool isKernelQueryBI(const StringRef MangledName) {
1416   return MangledName == "__get_kernel_work_group_size_impl" ||
1417          MangledName == "__get_kernel_sub_group_count_for_ndrange_impl" ||
1418          MangledName == "__get_kernel_max_sub_group_size_for_ndrange_impl" ||
1419          MangledName == "__get_kernel_preferred_work_group_size_multiple_impl";
1420 }
1421 
1422 // isUnfusedMulAdd checks if we have the following (most common for fp
1423 // contranction) pattern in LLVM IR:
1424 //
1425 //   %mul = fmul float %a, %b
1426 //   %add = fadd float %mul, %c
1427 //
1428 // This pattern indicates that fp contraction could have been disabled by
1429 // #pragma OPENCL FP_CONTRACT OFF. When contraction is enabled (by a pragma or
1430 // by clang's -ffp-contract=fast), clang would generate:
1431 //
1432 //   %0 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1433 //
1434 // or
1435 //
1436 //   %mul = fmul contract float %a, %b
1437 //   %add = fadd contract float %mul, %c
1438 //
1439 // Note that optimizations may form an unfused fmuladd from fadd+load or
1440 // fadd+call, so this check is quite restrictive (see the comment below).
1441 //
isUnfusedMulAdd(BinaryOperator * B)1442 bool isUnfusedMulAdd(BinaryOperator *B) {
1443   if (B->getOpcode() != Instruction::FAdd &&
1444       B->getOpcode() != Instruction::FSub)
1445     return false;
1446 
1447   if (B->hasAllowContract()) {
1448     // If this fadd or fsub itself has a contract flag, the operation can be
1449     // contracted regardless of the operands.
1450     return false;
1451   }
1452 
1453   // Otherwise, we cannot easily tell if the operation can be a candidate for
1454   // contraction or not. Consider the following cases:
1455   //
1456   //   %mul = alloca float
1457   //   %t1 = fmul float %a, %b
1458   //   store float* %mul, float %t
1459   //   %t2 = load %mul
1460   //   %r = fadd float %t2, %c
1461   //
1462   // LLVM IR does not allow %r to be contracted. However, after an optimization
1463   // it becomes a candidate for contraction if ContractionOFF is not set in
1464   // SPIR-V:
1465   //
1466   //   %t1 = fmul float %a, %b
1467   //   %r = fadd float %t1, %c
1468   //
1469   // To be on a safe side, we disallow everything that is even remotely similar
1470   // to fmul + fadd.
1471   return true;
1472 }
1473 
getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,unsigned VectorNumElements)1474 std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,
1475                                              unsigned VectorNumElements) {
1476   std::ostringstream OSS;
1477   switch (ElementBitSize) {
1478   case 8:
1479     OSS << "_uc";
1480     break;
1481   case 16:
1482     OSS << "_us";
1483     break;
1484   case 32:
1485     // Intentionally does nothing since _ui variant is only an alias.
1486     break;
1487   case 64:
1488     OSS << "_ul";
1489     break;
1490   default:
1491     llvm_unreachable(
1492         "Incorrect data bitsize for intel_subgroup_block builtins");
1493   }
1494   switch (VectorNumElements) {
1495   case 1:
1496     break;
1497   case 2:
1498   case 4:
1499   case 8:
1500     OSS << VectorNumElements;
1501     break;
1502   case 16:
1503     assert(ElementBitSize == 8 &&
1504            "16 elements vector allowed only for char builtins");
1505     OSS << VectorNumElements;
1506     break;
1507   default:
1508     llvm_unreachable(
1509         "Incorrect vector length for intel_subgroup_block builtins");
1510   }
1511   return OSS.str();
1512 }
1513 
insertImageNameAccessQualifier(SPIRVAccessQualifierKind Acc,std::string & Name)1514 void insertImageNameAccessQualifier(SPIRVAccessQualifierKind Acc,
1515                                     std::string &Name) {
1516   std::string QName = rmap<std::string>(Acc);
1517   // transform: read_only -> ro, write_only -> wo, read_write -> rw
1518   QName = QName.substr(0, 1) + QName.substr(QName.find("_") + 1, 1) + "_";
1519   assert(!Name.empty() && "image name should not be empty");
1520   Name.insert(Name.size() - 1, QName);
1521 }
1522 } // namespace OCLUtil
1523 
transOCLMemScopeIntoSPIRVScope(Value * MemScope,Optional<int> DefaultCase,Instruction * InsertBefore)1524 Value *SPIRV::transOCLMemScopeIntoSPIRVScope(Value *MemScope,
1525                                              Optional<int> DefaultCase,
1526                                              Instruction *InsertBefore) {
1527   if (auto *C = dyn_cast<ConstantInt>(MemScope)) {
1528     return ConstantInt::get(
1529         C->getType(), map<Scope>(static_cast<OCLScopeKind>(C->getZExtValue())));
1530   }
1531 
1532   // If memory_scope is not a constant, then we have to insert dynamic mapping:
1533   return getOrCreateSwitchFunc(kSPIRVName::TranslateOCLMemScope, MemScope,
1534                                OCLMemScopeMap::getMap(), /* IsReverse */ false,
1535                                DefaultCase, InsertBefore);
1536 }
1537 
transOCLMemOrderIntoSPIRVMemorySemantics(Value * MemOrder,Optional<int> DefaultCase,Instruction * InsertBefore)1538 Value *SPIRV::transOCLMemOrderIntoSPIRVMemorySemantics(
1539     Value *MemOrder, Optional<int> DefaultCase, Instruction *InsertBefore) {
1540   if (auto *C = dyn_cast<ConstantInt>(MemOrder)) {
1541     return ConstantInt::get(
1542         C->getType(), mapOCLMemSemanticToSPIRV(
1543                           0, static_cast<OCLMemOrderKind>(C->getZExtValue())));
1544   }
1545 
1546   return getOrCreateSwitchFunc(kSPIRVName::TranslateOCLMemOrder, MemOrder,
1547                                OCLMemOrderMap::getMap(), /* IsReverse */ false,
1548                                DefaultCase, InsertBefore);
1549 }
1550 
1551 Value *
transSPIRVMemoryScopeIntoOCLMemoryScope(Value * MemScope,Instruction * InsertBefore)1552 SPIRV::transSPIRVMemoryScopeIntoOCLMemoryScope(Value *MemScope,
1553                                                Instruction *InsertBefore) {
1554   if (auto *C = dyn_cast<ConstantInt>(MemScope)) {
1555     return ConstantInt::get(C->getType(), rmap<OCLScopeKind>(static_cast<Scope>(
1556                                               C->getZExtValue())));
1557   }
1558 
1559   if (auto *CI = dyn_cast<CallInst>(MemScope)) {
1560     Function *F = CI->getCalledFunction();
1561     if (F && F->getName().equals(kSPIRVName::TranslateOCLMemScope)) {
1562       // In case the SPIR-V module was created from an OpenCL program by
1563       // *this* SPIR-V generator, we know that the value passed to
1564       // __translate_ocl_memory_scope is what we should pass to the
1565       // OpenCL builtin now.
1566       return CI->getArgOperand(0);
1567     }
1568   }
1569 
1570   return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemScope, MemScope,
1571                                OCLMemScopeMap::getRMap(),
1572                                /* IsReverse */ true, None, InsertBefore);
1573 }
1574 
1575 Value *
transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value * MemorySemantics,Instruction * InsertBefore)1576 SPIRV::transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value *MemorySemantics,
1577                                                    Instruction *InsertBefore) {
1578   if (auto *C = dyn_cast<ConstantInt>(MemorySemantics)) {
1579     return ConstantInt::get(C->getType(),
1580                             mapSPIRVMemSemanticToOCL(C->getZExtValue()).second);
1581   }
1582 
1583   if (auto *CI = dyn_cast<CallInst>(MemorySemantics)) {
1584     Function *F = CI->getCalledFunction();
1585     if (F && F->getName().equals(kSPIRVName::TranslateOCLMemOrder)) {
1586       // In case the SPIR-V module was created from an OpenCL program by
1587       // *this* SPIR-V generator, we know that the value passed to
1588       // __translate_ocl_memory_order is what we should pass to the
1589       // OpenCL builtin now.
1590       return CI->getArgOperand(0);
1591     }
1592   }
1593 
1594   // SPIR-V MemorySemantics contains both OCL mem_fence_flags and mem_order and
1595   // therefore, we need to apply mask
1596   int Mask = MemorySemanticsMaskNone | MemorySemanticsAcquireMask |
1597              MemorySemanticsReleaseMask | MemorySemanticsAcquireReleaseMask |
1598              MemorySemanticsSequentiallyConsistentMask;
1599   return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemOrder,
1600                                MemorySemantics, OCLMemOrderMap::getRMap(),
1601                                /* IsReverse */ true, None, InsertBefore, Mask);
1602 }
1603 
transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Value * MemorySemantics,Instruction * InsertBefore)1604 Value *SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags(
1605     Value *MemorySemantics, Instruction *InsertBefore) {
1606   if (auto *C = dyn_cast<ConstantInt>(MemorySemantics)) {
1607     return ConstantInt::get(C->getType(),
1608                             mapSPIRVMemSemanticToOCL(C->getZExtValue()).first);
1609   }
1610 
1611   // TODO: any possible optimizations?
1612   // SPIR-V MemorySemantics contains both OCL mem_fence_flags and mem_order and
1613   // therefore, we need to apply mask
1614   int Mask = MemorySemanticsWorkgroupMemoryMask |
1615              MemorySemanticsCrossWorkgroupMemoryMask |
1616              MemorySemanticsImageMemoryMask;
1617   return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemFence,
1618                                MemorySemantics,
1619                                OCLMemFenceExtendedMap::getRMap(),
1620                                /* IsReverse */ true, None, InsertBefore, Mask);
1621 }
1622 
mangleOpenClBuiltin(const std::string & UniqName,ArrayRef<Type * > ArgTypes,std::string & MangledName)1623 void llvm::mangleOpenClBuiltin(const std::string &UniqName,
1624                                ArrayRef<Type *> ArgTypes,
1625                                std::string &MangledName) {
1626   OCLUtil::OCLBuiltinFuncMangleInfo BtnInfo(ArgTypes);
1627   MangledName = SPIRV::mangleBuiltin(UniqName, ArgTypes, &BtnInfo);
1628 }
1629