1 //===- OCLUtil.cpp - OCL Utilities ----------------------------------------===//
2 //
3 // The LLVM/SPIRV Translator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 // Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved.
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal with the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
16 //
17 // Redistributions of source code must retain the above copyright notice,
18 // this list of conditions and the following disclaimers.
19 // Redistributions in binary form must reproduce the above copyright notice,
20 // this list of conditions and the following disclaimers in the documentation
21 // and/or other materials provided with the distribution.
22 // Neither the names of Advanced Micro Devices, Inc., nor the names of its
23 // contributors may be used to endorse or promote products derived from this
24 // Software without specific prior written permission.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
31 // THE SOFTWARE.
32 //
33 //===----------------------------------------------------------------------===//
34 //
35 // This file implements OCL utility functions.
36 //
37 //===----------------------------------------------------------------------===//
38 #define DEBUG_TYPE "oclutil"
39
40 #include "OCLUtil.h"
41 #include "SPIRVEntry.h"
42 #include "SPIRVFunction.h"
43 #include "SPIRVInstruction.h"
44 #include "SPIRVInternal.h"
45 #include "llvm/ADT/StringSwitch.h"
46 #include "llvm/IR/IRBuilder.h"
47 #include "llvm/IR/InstVisitor.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52
53 using namespace llvm;
54 using namespace SPIRV;
55
56 namespace OCLUtil {
57
58 #ifndef SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
59 #define SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE SPIRAS_Private
60 #endif
61
62 #ifndef SPIRV_QUEUE_T_ADDR_SPACE
63 #define SPIRV_QUEUE_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
64 #endif
65
66 #ifndef SPIRV_EVENT_T_ADDR_SPACE
67 #define SPIRV_EVENT_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
68 #endif
69
70 #ifndef SPIRV_AVC_INTEL_T_ADDR_SPACE
71 #define SPIRV_AVC_INTEL_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
72 #endif
73
74 #ifndef SPIRV_CLK_EVENT_T_ADDR_SPACE
75 #define SPIRV_CLK_EVENT_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
76 #endif
77
78 #ifndef SPIRV_SAMPLER_T_ADDR_SPACE
79 #define SPIRV_SAMPLER_T_ADDR_SPACE SPIRAS_Constant
80 #endif
81
82 #ifndef SPIRV_RESERVE_ID_T_ADDR_SPACE
83 #define SPIRV_RESERVE_ID_T_ADDR_SPACE SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE
84 #endif
85 // Excerpt from SPIR 2.0 spec.:
86 // Pipe objects are represented using pointers to the opaque %opencl.pipe LLVM
87 // structure type which reside in the global address space.
88 #ifndef SPIRV_PIPE_ADDR_SPACE
89 #define SPIRV_PIPE_ADDR_SPACE SPIRAS_Global
90 #endif
91 // Excerpt from SPIR 2.0 spec.:
92 // Note: Images data types reside in global memory and hence should be marked
93 // as such in the "kernel arg addr space" metadata.
94 #ifndef SPIRV_IMAGE_ADDR_SPACE
95 #define SPIRV_IMAGE_ADDR_SPACE SPIRAS_Global
96 #endif
97
98 } // namespace OCLUtil
99
100 ///////////////////////////////////////////////////////////////////////////////
101 //
102 // Map definitions
103 //
104 ///////////////////////////////////////////////////////////////////////////////
105
106 using namespace OCLUtil;
107 namespace SPIRV {
108
init()109 template <> void SPIRVMap<OCLMemFenceKind, MemorySemanticsMask>::init() {
110 add(OCLMF_Local, MemorySemanticsWorkgroupMemoryMask);
111 add(OCLMF_Global, MemorySemanticsCrossWorkgroupMemoryMask);
112 add(OCLMF_Image, MemorySemanticsImageMemoryMask);
113 }
114
115 template <>
init()116 void SPIRVMap<OCLMemFenceExtendedKind, MemorySemanticsMask>::init() {
117 add(OCLMFEx_Local, MemorySemanticsWorkgroupMemoryMask);
118 add(OCLMFEx_Global, MemorySemanticsCrossWorkgroupMemoryMask);
119 add(OCLMFEx_Local_Global, MemorySemanticsWorkgroupMemoryMask |
120 MemorySemanticsCrossWorkgroupMemoryMask);
121 add(OCLMFEx_Image, MemorySemanticsImageMemoryMask);
122 add(OCLMFEx_Image_Local,
123 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
124 add(OCLMFEx_Image_Global,
125 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
126 add(OCLMFEx_Image_Local_Global, MemorySemanticsWorkgroupMemoryMask |
127 MemorySemanticsCrossWorkgroupMemoryMask |
128 MemorySemanticsImageMemoryMask);
129 }
130
131 template <>
init()132 void SPIRVMap<OCLMemOrderKind, unsigned, MemorySemanticsMask>::init() {
133 add(OCLMO_relaxed, MemorySemanticsMaskNone);
134 add(OCLMO_acquire, MemorySemanticsAcquireMask);
135 add(OCLMO_release, MemorySemanticsReleaseMask);
136 add(OCLMO_acq_rel, MemorySemanticsAcquireReleaseMask);
137 add(OCLMO_seq_cst, MemorySemanticsSequentiallyConsistentMask);
138 }
139
init()140 template <> void SPIRVMap<OCLScopeKind, Scope>::init() {
141 add(OCLMS_work_item, ScopeInvocation);
142 add(OCLMS_work_group, ScopeWorkgroup);
143 add(OCLMS_device, ScopeDevice);
144 add(OCLMS_all_svm_devices, ScopeCrossDevice);
145 add(OCLMS_sub_group, ScopeSubgroup);
146 }
147
init()148 template <> void SPIRVMap<std::string, SPIRVGroupOperationKind>::init() {
149 add("reduce", GroupOperationReduce);
150 add("scan_inclusive", GroupOperationInclusiveScan);
151 add("scan_exclusive", GroupOperationExclusiveScan);
152 add("ballot_bit_count", GroupOperationReduce);
153 add("ballot_inclusive_scan", GroupOperationInclusiveScan);
154 add("ballot_exclusive_scan", GroupOperationExclusiveScan);
155 add("non_uniform_reduce", GroupOperationReduce);
156 add("non_uniform_scan_inclusive", GroupOperationInclusiveScan);
157 add("non_uniform_scan_exclusive", GroupOperationExclusiveScan);
158 add("non_uniform_reduce_logical", GroupOperationReduce);
159 add("non_uniform_scan_inclusive_logical", GroupOperationInclusiveScan);
160 add("non_uniform_scan_exclusive_logical", GroupOperationExclusiveScan);
161 add("clustered_reduce", GroupOperationClusteredReduce);
162 }
163
init()164 template <> void SPIRVMap<std::string, SPIRVFPRoundingModeKind>::init() {
165 add("rte", FPRoundingModeRTE);
166 add("rtz", FPRoundingModeRTZ);
167 add("rtp", FPRoundingModeRTP);
168 add("rtn", FPRoundingModeRTN);
169 }
170
init()171 template <> void SPIRVMap<OclExt::Kind, std::string>::init() {
172 #define _SPIRV_OP(x) add(OclExt::x, #x);
173 _SPIRV_OP(cl_images)
174 _SPIRV_OP(cl_doubles)
175 _SPIRV_OP(cl_khr_int64_base_atomics)
176 _SPIRV_OP(cl_khr_int64_extended_atomics)
177 _SPIRV_OP(cl_khr_fp16)
178 _SPIRV_OP(cl_khr_gl_sharing)
179 _SPIRV_OP(cl_khr_gl_event)
180 _SPIRV_OP(cl_khr_d3d10_sharing)
181 _SPIRV_OP(cl_khr_media_sharing)
182 _SPIRV_OP(cl_khr_d3d11_sharing)
183 _SPIRV_OP(cl_khr_global_int32_base_atomics)
184 _SPIRV_OP(cl_khr_global_int32_extended_atomics)
185 _SPIRV_OP(cl_khr_local_int32_base_atomics)
186 _SPIRV_OP(cl_khr_local_int32_extended_atomics)
187 _SPIRV_OP(cl_khr_byte_addressable_store)
188 _SPIRV_OP(cl_khr_3d_image_writes)
189 _SPIRV_OP(cl_khr_gl_msaa_sharing)
190 _SPIRV_OP(cl_khr_depth_images)
191 _SPIRV_OP(cl_khr_gl_depth_images)
192 _SPIRV_OP(cl_khr_subgroups)
193 _SPIRV_OP(cl_khr_mipmap_image)
194 _SPIRV_OP(cl_khr_mipmap_image_writes)
195 _SPIRV_OP(cl_khr_egl_event)
196 _SPIRV_OP(cl_khr_srgb_image_writes)
197 _SPIRV_OP(cl_khr_extended_bit_ops)
198 #undef _SPIRV_OP
199 }
200
init()201 template <> void SPIRVMap<OclExt::Kind, SPIRVCapabilityKind>::init() {
202 add(OclExt::cl_images, CapabilityImageBasic);
203 add(OclExt::cl_doubles, CapabilityFloat64);
204 add(OclExt::cl_khr_int64_base_atomics, CapabilityInt64Atomics);
205 add(OclExt::cl_khr_int64_extended_atomics, CapabilityInt64Atomics);
206 add(OclExt::cl_khr_fp16, CapabilityFloat16);
207 add(OclExt::cl_khr_subgroups, CapabilityGroups);
208 add(OclExt::cl_khr_mipmap_image, CapabilityImageMipmap);
209 add(OclExt::cl_khr_mipmap_image_writes, CapabilityImageMipmap);
210 add(OclExt::cl_khr_extended_bit_ops, CapabilityBitInstructions);
211 }
212
213 /// Map OpenCL work functions to SPIR-V builtin variables.
init()214 template <> void SPIRVMap<std::string, SPIRVBuiltinVariableKind>::init() {
215 add("get_work_dim", BuiltInWorkDim);
216 add("get_global_size", BuiltInGlobalSize);
217 add("get_global_id", BuiltInGlobalInvocationId);
218 add("get_global_offset", BuiltInGlobalOffset);
219 add("get_local_size", BuiltInWorkgroupSize);
220 add("get_enqueued_local_size", BuiltInEnqueuedWorkgroupSize);
221 add("get_local_id", BuiltInLocalInvocationId);
222 add("get_num_groups", BuiltInNumWorkgroups);
223 add("get_group_id", BuiltInWorkgroupId);
224 add("get_global_linear_id", BuiltInGlobalLinearId);
225 add("get_local_linear_id", BuiltInLocalInvocationIndex);
226 // cl_khr_subgroups
227 add("get_sub_group_size", BuiltInSubgroupSize);
228 add("get_max_sub_group_size", BuiltInSubgroupMaxSize);
229 add("get_num_sub_groups", BuiltInNumSubgroups);
230 add("get_enqueued_num_sub_groups", BuiltInNumEnqueuedSubgroups);
231 add("get_sub_group_id", BuiltInSubgroupId);
232 add("get_sub_group_local_id", BuiltInSubgroupLocalInvocationId);
233 // cl_khr_subgroup_ballot
234 add("get_sub_group_eq_mask", BuiltInSubgroupEqMask);
235 add("get_sub_group_ge_mask", BuiltInSubgroupGeMask);
236 add("get_sub_group_gt_mask", BuiltInSubgroupGtMask);
237 add("get_sub_group_le_mask", BuiltInSubgroupLeMask);
238 add("get_sub_group_lt_mask", BuiltInSubgroupLtMask);
239 }
240
241 // Maps uniqued OCL builtin function name to SPIR-V op code.
242 // A uniqued OCL builtin function name may be different from the real
243 // OCL builtin function name. e.g. instead of atomic_min, atomic_umin
244 // is used for atomic_min with unsigned integer parameter.
245 // work_group_ and sub_group_ functions are unified as group_ functions
246 // except work_group_barrier.
247 class SPIRVInstruction;
init()248 template <> void SPIRVMap<std::string, Op, SPIRVInstruction>::init() {
249 #define _SPIRV_OP(x, y) add("atom_" #x, OpAtomic##y);
250 // cl_khr_int64_base_atomics builtins
251 _SPIRV_OP(add, IAdd)
252 _SPIRV_OP(sub, ISub)
253 _SPIRV_OP(xchg, Exchange)
254 _SPIRV_OP(dec, IDecrement)
255 _SPIRV_OP(inc, IIncrement)
256 _SPIRV_OP(cmpxchg, CompareExchange)
257 // cl_khr_int64_extended_atomics builtins
258 _SPIRV_OP(min, SMin)
259 _SPIRV_OP(max, SMax)
260 _SPIRV_OP(and, And)
261 _SPIRV_OP(or, Or)
262 _SPIRV_OP(xor, Xor)
263 #undef _SPIRV_OP
264 #define _SPIRV_OP(x, y) add("atomic_" #x, Op##y);
265 // CL 2.0 atomic builtins
266 _SPIRV_OP(flag_test_and_set_explicit, AtomicFlagTestAndSet)
267 _SPIRV_OP(flag_clear_explicit, AtomicFlagClear)
268 _SPIRV_OP(load_explicit, AtomicLoad)
269 _SPIRV_OP(store_explicit, AtomicStore)
270 _SPIRV_OP(exchange_explicit, AtomicExchange)
271 _SPIRV_OP(compare_exchange_strong_explicit, AtomicCompareExchange)
272 _SPIRV_OP(compare_exchange_weak_explicit, AtomicCompareExchangeWeak)
273 _SPIRV_OP(inc, AtomicIIncrement)
274 _SPIRV_OP(dec, AtomicIDecrement)
275 _SPIRV_OP(fetch_add_explicit, AtomicIAdd)
276 _SPIRV_OP(fetch_sub_explicit, AtomicISub)
277 _SPIRV_OP(fetch_umin_explicit, AtomicUMin)
278 _SPIRV_OP(fetch_umax_explicit, AtomicUMax)
279 _SPIRV_OP(fetch_min_explicit, AtomicSMin)
280 _SPIRV_OP(fetch_max_explicit, AtomicSMax)
281 _SPIRV_OP(fetch_and_explicit, AtomicAnd)
282 _SPIRV_OP(fetch_or_explicit, AtomicOr)
283 _SPIRV_OP(fetch_xor_explicit, AtomicXor)
284 #undef _SPIRV_OP
285 #define _SPIRV_OP(x, y) add(#x, Op##y);
286 _SPIRV_OP(dot, Dot)
287 _SPIRV_OP(async_work_group_copy, GroupAsyncCopy)
288 _SPIRV_OP(async_work_group_strided_copy, GroupAsyncCopy)
289 _SPIRV_OP(wait_group_events, GroupWaitEvents)
290 _SPIRV_OP(isequal, FOrdEqual)
291 _SPIRV_OP(isnotequal, FUnordNotEqual)
292 _SPIRV_OP(isgreater, FOrdGreaterThan)
293 _SPIRV_OP(isgreaterequal, FOrdGreaterThanEqual)
294 _SPIRV_OP(isless, FOrdLessThan)
295 _SPIRV_OP(islessequal, FOrdLessThanEqual)
296 _SPIRV_OP(islessgreater, LessOrGreater)
297 _SPIRV_OP(isordered, Ordered)
298 _SPIRV_OP(isunordered, Unordered)
299 _SPIRV_OP(isfinite, IsFinite)
300 _SPIRV_OP(isinf, IsInf)
301 _SPIRV_OP(isnan, IsNan)
302 _SPIRV_OP(isnormal, IsNormal)
303 _SPIRV_OP(signbit, SignBitSet)
304 _SPIRV_OP(any, Any)
305 _SPIRV_OP(all, All)
306 _SPIRV_OP(popcount, BitCount)
307 _SPIRV_OP(get_fence, GenericPtrMemSemantics)
308 // CL 2.0 kernel enqueue builtins
309 _SPIRV_OP(enqueue_marker, EnqueueMarker)
310 _SPIRV_OP(enqueue_kernel, EnqueueKernel)
311 _SPIRV_OP(get_kernel_sub_group_count_for_ndrange_impl,
312 GetKernelNDrangeSubGroupCount)
313 _SPIRV_OP(get_kernel_max_sub_group_size_for_ndrange_impl,
314 GetKernelNDrangeMaxSubGroupSize)
315 _SPIRV_OP(get_kernel_work_group_size_impl, GetKernelWorkGroupSize)
316 _SPIRV_OP(get_kernel_preferred_work_group_size_multiple_impl,
317 GetKernelPreferredWorkGroupSizeMultiple)
318 _SPIRV_OP(retain_event, RetainEvent)
319 _SPIRV_OP(release_event, ReleaseEvent)
320 _SPIRV_OP(create_user_event, CreateUserEvent)
321 _SPIRV_OP(is_valid_event, IsValidEvent)
322 _SPIRV_OP(set_user_event_status, SetUserEventStatus)
323 _SPIRV_OP(capture_event_profiling_info, CaptureEventProfilingInfo)
324 _SPIRV_OP(get_default_queue, GetDefaultQueue)
325 _SPIRV_OP(ndrange_1D, BuildNDRange)
326 _SPIRV_OP(ndrange_2D, BuildNDRange)
327 _SPIRV_OP(ndrange_3D, BuildNDRange)
328 // Generic Address Space Casts
329 _SPIRV_OP(to_global, GenericCastToPtrExplicit)
330 _SPIRV_OP(to_local, GenericCastToPtrExplicit)
331 _SPIRV_OP(to_private, GenericCastToPtrExplicit)
332 // CL 2.0 pipe builtins
333 _SPIRV_OP(read_pipe_2, ReadPipe)
334 _SPIRV_OP(write_pipe_2, WritePipe)
335 _SPIRV_OP(read_pipe_2_bl, ReadPipeBlockingINTEL)
336 _SPIRV_OP(write_pipe_2_bl, WritePipeBlockingINTEL)
337 _SPIRV_OP(read_pipe_4, ReservedReadPipe)
338 _SPIRV_OP(write_pipe_4, ReservedWritePipe)
339 _SPIRV_OP(reserve_read_pipe, ReserveReadPipePackets)
340 _SPIRV_OP(reserve_write_pipe, ReserveWritePipePackets)
341 _SPIRV_OP(commit_read_pipe, CommitReadPipe)
342 _SPIRV_OP(commit_write_pipe, CommitWritePipe)
343 _SPIRV_OP(is_valid_reserve_id, IsValidReserveId)
344 _SPIRV_OP(group_reserve_read_pipe, GroupReserveReadPipePackets)
345 _SPIRV_OP(group_reserve_write_pipe, GroupReserveWritePipePackets)
346 _SPIRV_OP(group_commit_read_pipe, GroupCommitReadPipe)
347 _SPIRV_OP(group_commit_write_pipe, GroupCommitWritePipe)
348 _SPIRV_OP(get_pipe_num_packets_ro, GetNumPipePackets)
349 _SPIRV_OP(get_pipe_num_packets_wo, GetNumPipePackets)
350 _SPIRV_OP(get_pipe_max_packets_ro, GetMaxPipePackets)
351 _SPIRV_OP(get_pipe_max_packets_wo, GetMaxPipePackets)
352 // CL 2.0 workgroup builtins
353 _SPIRV_OP(group_all, GroupAll)
354 _SPIRV_OP(group_any, GroupAny)
355 _SPIRV_OP(group_broadcast, GroupBroadcast)
356 _SPIRV_OP(group_iadd, GroupIAdd)
357 _SPIRV_OP(group_fadd, GroupFAdd)
358 _SPIRV_OP(group_fmin, GroupFMin)
359 _SPIRV_OP(group_umin, GroupUMin)
360 _SPIRV_OP(group_smin, GroupSMin)
361 _SPIRV_OP(group_fmax, GroupFMax)
362 _SPIRV_OP(group_umax, GroupUMax)
363 _SPIRV_OP(group_smax, GroupSMax)
364 // CL image builtins
365 _SPIRV_OP(SampledImage, SampledImage)
366 _SPIRV_OP(ImageSampleExplicitLod, ImageSampleExplicitLod)
367 _SPIRV_OP(read_image, ImageRead)
368 _SPIRV_OP(write_image, ImageWrite)
369 _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat)
370 _SPIRV_OP(get_image_channel_order, ImageQueryOrder)
371 _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels)
372 _SPIRV_OP(get_image_num_samples, ImageQuerySamples)
373 // Intel Subgroups builtins
374 _SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL)
375 _SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL)
376 _SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL)
377 _SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL)
378 // Intel media_block_io builtins
379 _SPIRV_OP(intel_sub_group_media_block_read, SubgroupImageMediaBlockReadINTEL)
380 _SPIRV_OP(intel_sub_group_media_block_write,
381 SubgroupImageMediaBlockWriteINTEL)
382 // cl_khr_subgroup_non_uniform_vote
383 _SPIRV_OP(group_elect, GroupNonUniformElect)
384 _SPIRV_OP(group_non_uniform_all, GroupNonUniformAll)
385 _SPIRV_OP(group_non_uniform_any, GroupNonUniformAny)
386 _SPIRV_OP(group_non_uniform_all_equal, GroupNonUniformAllEqual)
387 // cl_khr_subgroup_ballot
388 _SPIRV_OP(group_non_uniform_broadcast, GroupNonUniformBroadcast)
389 _SPIRV_OP(group_broadcast_first, GroupNonUniformBroadcastFirst)
390 _SPIRV_OP(group_ballot, GroupNonUniformBallot)
391 _SPIRV_OP(group_inverse_ballot, GroupNonUniformInverseBallot)
392 _SPIRV_OP(group_ballot_bit_extract, GroupNonUniformBallotBitExtract)
393 _SPIRV_OP(group_ballot_bit_count_iadd, GroupNonUniformBallotBitCount)
394 _SPIRV_OP(group_ballot_find_lsb, GroupNonUniformBallotFindLSB)
395 _SPIRV_OP(group_ballot_find_msb, GroupNonUniformBallotFindMSB)
396 // cl_khr_subgroup_non_uniform_arithmetic
397 _SPIRV_OP(group_non_uniform_iadd, GroupNonUniformIAdd)
398 _SPIRV_OP(group_non_uniform_fadd, GroupNonUniformFAdd)
399 _SPIRV_OP(group_non_uniform_imul, GroupNonUniformIMul)
400 _SPIRV_OP(group_non_uniform_fmul, GroupNonUniformFMul)
401 _SPIRV_OP(group_non_uniform_smin, GroupNonUniformSMin)
402 _SPIRV_OP(group_non_uniform_umin, GroupNonUniformUMin)
403 _SPIRV_OP(group_non_uniform_fmin, GroupNonUniformFMin)
404 _SPIRV_OP(group_non_uniform_smax, GroupNonUniformSMax)
405 _SPIRV_OP(group_non_uniform_umax, GroupNonUniformUMax)
406 _SPIRV_OP(group_non_uniform_fmax, GroupNonUniformFMax)
407 _SPIRV_OP(group_non_uniform_iand, GroupNonUniformBitwiseAnd)
408 _SPIRV_OP(group_non_uniform_ior, GroupNonUniformBitwiseOr)
409 _SPIRV_OP(group_non_uniform_ixor, GroupNonUniformBitwiseXor)
410 _SPIRV_OP(group_non_uniform_logical_iand, GroupNonUniformLogicalAnd)
411 _SPIRV_OP(group_non_uniform_logical_ior, GroupNonUniformLogicalOr)
412 _SPIRV_OP(group_non_uniform_logical_ixor, GroupNonUniformLogicalXor)
413 // cl_khr_subgroup_shuffle
414 _SPIRV_OP(group_shuffle, GroupNonUniformShuffle)
415 _SPIRV_OP(group_shuffle_xor, GroupNonUniformShuffleXor)
416 // cl_khr_subgroup_shuffle_relative
417 _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp)
418 _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown)
419 // cl_khr_extended_bit_ops
420 _SPIRV_OP(bitfield_insert, BitFieldInsert)
421 _SPIRV_OP(bitfield_extract_signed, BitFieldSExtract)
422 _SPIRV_OP(bitfield_extract_unsigned, BitFieldUExtract)
423 _SPIRV_OP(bit_reverse, BitReverse)
424 #undef _SPIRV_OP
425 }
426
init()427 template <> void SPIRVMap<std::string, Op, OCL12Builtin>::init() {
428 #define _SPIRV_OP(x, y) add(#x, Op##y);
429 _SPIRV_OP(add, AtomicIAdd)
430 _SPIRV_OP(sub, AtomicISub)
431 _SPIRV_OP(xchg, AtomicExchange)
432 _SPIRV_OP(cmpxchg, AtomicCompareExchange)
433 _SPIRV_OP(inc, AtomicIIncrement)
434 _SPIRV_OP(dec, AtomicIDecrement)
435 _SPIRV_OP(min, AtomicSMin)
436 _SPIRV_OP(max, AtomicSMax)
437 _SPIRV_OP(umin, AtomicUMin)
438 _SPIRV_OP(umax, AtomicUMax)
439 _SPIRV_OP(and, AtomicAnd)
440 _SPIRV_OP(or, AtomicOr)
441 _SPIRV_OP(xor, AtomicXor)
442 #undef _SPIRV_OP
443 }
444
445 // SPV_INTEL_device_side_avc_motion_estimation extension builtins
446 class SPIRVSubgroupsAVCIntelInst;
init()447 template <> void SPIRVMap<std::string, Op, SPIRVSubgroupsAVCIntelInst>::init() {
448 // Here is a workaround for a bug in the specification:
449 // 'avc' missed in 'intel_sub_group_avc' prefix.
450 add("intel_sub_group_ime_ref_window_size",
451 OpSubgroupAvcImeRefWindowSizeINTEL);
452
453 #define _SPIRV_OP(x, y) add("intel_sub_group_avc_" #x, OpSubgroupAvc##y##INTEL);
454 // Initialization phase functions
455 _SPIRV_OP(ime_initialize, ImeInitialize)
456 _SPIRV_OP(fme_initialize, FmeInitialize)
457 _SPIRV_OP(bme_initialize, BmeInitialize)
458 _SPIRV_OP(sic_initialize, SicInitialize)
459
460 // Result and payload types conversion functions
461 _SPIRV_OP(mce_convert_to_ime_payload, MceConvertToImePayload)
462 _SPIRV_OP(mce_convert_to_ime_result, MceConvertToImeResult)
463 _SPIRV_OP(mce_convert_to_ref_payload, MceConvertToRefPayload)
464 _SPIRV_OP(mce_convert_to_ref_result, MceConvertToRefResult)
465 _SPIRV_OP(mce_convert_to_sic_payload, MceConvertToSicPayload)
466 _SPIRV_OP(mce_convert_to_sic_result, MceConvertToSicResult)
467 _SPIRV_OP(ime_convert_to_mce_payload, ImeConvertToMcePayload)
468 _SPIRV_OP(ime_convert_to_mce_result, ImeConvertToMceResult)
469 _SPIRV_OP(ref_convert_to_mce_payload, RefConvertToMcePayload)
470 _SPIRV_OP(ref_convert_to_mce_result, RefConvertToMceResult)
471 _SPIRV_OP(sic_convert_to_mce_payload, SicConvertToMcePayload)
472 _SPIRV_OP(sic_convert_to_mce_result, SicConvertToMceResult)
473 #undef _SPIRV_OP
474
475 // MCE instructions
476 #define _SPIRV_OP(x, y) \
477 add("intel_sub_group_avc_mce_" #x, OpSubgroupAvcMce##y##INTEL);
478 _SPIRV_OP(get_default_inter_base_multi_reference_penalty,
479 GetDefaultInterBaseMultiReferencePenalty)
480 _SPIRV_OP(set_inter_base_multi_reference_penalty,
481 SetInterBaseMultiReferencePenalty)
482 _SPIRV_OP(get_default_inter_shape_penalty, GetDefaultInterShapePenalty)
483 _SPIRV_OP(set_inter_shape_penalty, SetInterShapePenalty)
484 _SPIRV_OP(get_default_inter_direction_penalty,
485 GetDefaultInterDirectionPenalty)
486 _SPIRV_OP(set_inter_direction_penalty, SetInterDirectionPenalty)
487 _SPIRV_OP(get_default_intra_luma_shape_penalty,
488 GetDefaultIntraLumaShapePenalty)
489 _SPIRV_OP(get_default_inter_motion_vector_cost_table,
490 GetDefaultInterMotionVectorCostTable)
491 _SPIRV_OP(get_default_high_penalty_cost_table, GetDefaultHighPenaltyCostTable)
492 _SPIRV_OP(get_default_medium_penalty_cost_table,
493 GetDefaultMediumPenaltyCostTable)
494 _SPIRV_OP(get_default_low_penalty_cost_table, GetDefaultLowPenaltyCostTable)
495 _SPIRV_OP(set_motion_vector_cost_function, SetMotionVectorCostFunction)
496 _SPIRV_OP(get_default_intra_luma_mode_penalty, GetDefaultIntraLumaModePenalty)
497 _SPIRV_OP(get_default_non_dc_luma_intra_penalty,
498 GetDefaultNonDcLumaIntraPenalty)
499 _SPIRV_OP(get_default_intra_chroma_mode_base_penalty,
500 GetDefaultIntraChromaModeBasePenalty)
501 _SPIRV_OP(set_ac_only_haar, SetAcOnlyHaar)
502 _SPIRV_OP(set_source_interlaced_field_polarity,
503 SetSourceInterlacedFieldPolarity)
504 _SPIRV_OP(set_single_reference_interlaced_field_polarity,
505 SetSingleReferenceInterlacedFieldPolarity)
506 _SPIRV_OP(set_dual_reference_interlaced_field_polarities,
507 SetDualReferenceInterlacedFieldPolarities)
508 _SPIRV_OP(get_motion_vectors, GetMotionVectors)
509 _SPIRV_OP(get_inter_distortions, GetInterDistortions)
510 _SPIRV_OP(get_best_inter_distortion, GetBestInterDistortions)
511 _SPIRV_OP(get_inter_major_shape, GetInterMajorShape)
512 _SPIRV_OP(get_inter_minor_shapes, GetInterMinorShape)
513 _SPIRV_OP(get_inter_directions, GetInterDirections)
514 _SPIRV_OP(get_inter_motion_vector_count, GetInterMotionVectorCount)
515 _SPIRV_OP(get_inter_reference_ids, GetInterReferenceIds)
516 _SPIRV_OP(get_inter_reference_interlaced_field_polarities,
517 GetInterReferenceInterlacedFieldPolarities)
518 #undef _SPIRV_OP
519
520 // IME instructions
521 #define _SPIRV_OP(x, y) \
522 add("intel_sub_group_avc_ime_" #x, OpSubgroupAvcIme##y##INTEL);
523 _SPIRV_OP(set_single_reference, SetSingleReference)
524 _SPIRV_OP(set_dual_reference, SetDualReference)
525 _SPIRV_OP(ref_window_size, RefWindowSize)
526 _SPIRV_OP(adjust_ref_offset, AdjustRefOffset)
527 _SPIRV_OP(set_max_motion_vector_count, SetMaxMotionVectorCount)
528 _SPIRV_OP(set_unidirectional_mix_disable, SetUnidirectionalMixDisable)
529 _SPIRV_OP(set_early_search_termination_threshold,
530 SetEarlySearchTerminationThreshold)
531 _SPIRV_OP(set_weighted_sad, SetWeightedSad)
532 _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
533 _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
534 _SPIRV_OP(evaluate_with_single_reference_streamin,
535 EvaluateWithSingleReferenceStreamin)
536 _SPIRV_OP(evaluate_with_dual_reference_streamin,
537 EvaluateWithDualReferenceStreamin)
538 _SPIRV_OP(evaluate_with_single_reference_streamout,
539 EvaluateWithSingleReferenceStreamout)
540 _SPIRV_OP(evaluate_with_dual_reference_streamout,
541 EvaluateWithDualReferenceStreamout)
542 _SPIRV_OP(evaluate_with_single_reference_streaminout,
543 EvaluateWithSingleReferenceStreaminout)
544 _SPIRV_OP(evaluate_with_dual_reference_streaminout,
545 EvaluateWithDualReferenceStreaminout)
546 _SPIRV_OP(get_single_reference_streamin, GetSingleReferenceStreamin)
547 _SPIRV_OP(get_dual_reference_streamin, GetDualReferenceStreamin)
548 _SPIRV_OP(strip_single_reference_streamout, StripSingleReferenceStreamout)
549 _SPIRV_OP(strip_dual_reference_streamout, StripDualReferenceStreamout)
550 _SPIRV_OP(get_border_reached, GetBorderReached)
551 _SPIRV_OP(get_truncated_search_indication, GetTruncatedSearchIndication)
552 _SPIRV_OP(get_unidirectional_early_search_termination,
553 GetUnidirectionalEarlySearchTermination)
554 _SPIRV_OP(get_weighting_pattern_minimum_motion_vector,
555 GetWeightingPatternMinimumMotionVector)
556 _SPIRV_OP(get_weighting_pattern_minimum_distortion,
557 GetWeightingPatternMinimumDistortion)
558 #undef _SPIRV_OP
559
560 #define _SPIRV_OP(x, y) \
561 add("intel_sub_group_avc_ime_get_streamout_major_shape_" #x, \
562 OpSubgroupAvcImeGetStreamout##y##INTEL);
563 _SPIRV_OP(motion_vectors_single_reference,
564 SingleReferenceMajorShapeMotionVectors)
565 _SPIRV_OP(distortions_single_reference, SingleReferenceMajorShapeDistortions)
566 _SPIRV_OP(reference_ids_single_reference,
567 SingleReferenceMajorShapeReferenceIds)
568 _SPIRV_OP(motion_vectors_dual_reference, DualReferenceMajorShapeMotionVectors)
569 _SPIRV_OP(distortions_dual_reference, DualReferenceMajorShapeDistortions)
570 _SPIRV_OP(reference_ids_dual_reference, DualReferenceMajorShapeReferenceIds)
571 #undef _SPIRV_OP
572
573 // REF instructions
574 #define _SPIRV_OP(x, y) \
575 add("intel_sub_group_avc_ref_" #x, OpSubgroupAvcRef##y##INTEL);
576 _SPIRV_OP(set_bidirectional_mix_disable, SetBidirectionalMixDisable)
577 _SPIRV_OP(set_bilinear_filter_enable, SetBilinearFilterEnable)
578 _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
579 _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
580 _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
581 _SPIRV_OP(evaluate_with_multi_reference_interlaced,
582 EvaluateWithMultiReferenceInterlaced)
583 #undef _SPIRV_OP
584
585 // SIC instructions
586 #define _SPIRV_OP(x, y) \
587 add("intel_sub_group_avc_sic_" #x, OpSubgroupAvcSic##y##INTEL);
588 _SPIRV_OP(configure_skc, ConfigureSkc)
589 _SPIRV_OP(configure_ipe_luma, ConfigureIpeLuma)
590 _SPIRV_OP(configure_ipe_luma_chroma, ConfigureIpeLumaChroma)
591 _SPIRV_OP(get_motion_vector_mask, GetMotionVectorMask)
592 _SPIRV_OP(set_intra_luma_shape_penalty, SetIntraLumaShapePenalty)
593 _SPIRV_OP(set_intra_luma_mode_cost_function, SetIntraLumaModeCostFunction)
594 _SPIRV_OP(set_intra_chroma_mode_cost_function, SetIntraChromaModeCostFunction)
595 _SPIRV_OP(set_skc_bilinear_filter_enable, SetBilinearFilterEnable)
596 _SPIRV_OP(set_skc_forward_transform_enable, SetSkcForwardTransformEnable)
597 _SPIRV_OP(set_block_based_raw_skip_sad, SetBlockBasedRawSkipSad)
598 _SPIRV_OP(evaluate_ipe, EvaluateIpe)
599 _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
600 _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
601 _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
602 _SPIRV_OP(evaluate_with_multi_reference_interlaced,
603 EvaluateWithMultiReferenceInterlaced)
604 _SPIRV_OP(get_ipe_luma_shape, GetIpeLumaShape)
605 _SPIRV_OP(get_best_ipe_luma_distortion, GetBestIpeLumaDistortion)
606 _SPIRV_OP(get_best_ipe_chroma_distortion, GetBestIpeChromaDistortion)
607 _SPIRV_OP(get_packed_ipe_luma_modes, GetPackedIpeLumaModes)
608 _SPIRV_OP(get_ipe_chroma_mode, GetIpeChromaMode)
609 _SPIRV_OP(get_packed_skc_luma_count_threshold, GetPackedSkcLumaCountThreshold)
610 _SPIRV_OP(get_packed_skc_luma_sum_threshold, GetPackedSkcLumaSumThreshold)
611 _SPIRV_OP(get_inter_raw_sads, GetInterRawSads)
612 #undef _SPIRV_OP
613 }
614
init()615 template <> void SPIRVMap<std::string, Op, OCLOpaqueType>::init() {
616 add("opencl.event_t", OpTypeEvent);
617 add("opencl.pipe_t", OpTypePipe);
618 add("opencl.clk_event_t", OpTypeDeviceEvent);
619 add("opencl.reserve_id_t", OpTypeReserveId);
620 add("opencl.queue_t", OpTypeQueue);
621 add("opencl.sampler_t", OpTypeSampler);
622 }
623
init()624 template <> void LLVMSPIRVAtomicRmwOpCodeMap::init() {
625 add(llvm::AtomicRMWInst::Xchg, OpAtomicExchange);
626 add(llvm::AtomicRMWInst::Add, OpAtomicIAdd);
627 add(llvm::AtomicRMWInst::Sub, OpAtomicISub);
628 add(llvm::AtomicRMWInst::And, OpAtomicAnd);
629 add(llvm::AtomicRMWInst::Or, OpAtomicOr);
630 add(llvm::AtomicRMWInst::Xor, OpAtomicXor);
631 add(llvm::AtomicRMWInst::Max, OpAtomicSMax);
632 add(llvm::AtomicRMWInst::Min, OpAtomicSMin);
633 add(llvm::AtomicRMWInst::UMax, OpAtomicUMax);
634 add(llvm::AtomicRMWInst::UMin, OpAtomicUMin);
635 }
636
637 } // namespace SPIRV
638
639 ///////////////////////////////////////////////////////////////////////////////
640 //
641 // Functions for getting builtin call info
642 //
643 ///////////////////////////////////////////////////////////////////////////////
644
645 namespace OCLUtil {
646
getAtomicWorkItemFenceLiterals(CallInst * CI)647 AtomicWorkItemFenceLiterals getAtomicWorkItemFenceLiterals(CallInst *CI) {
648 return std::make_tuple(getArgAsInt(CI, 0),
649 static_cast<OCLMemOrderKind>(getArgAsInt(CI, 1)),
650 static_cast<OCLScopeKind>(getArgAsInt(CI, 2)));
651 }
652
getAtomicBuiltinNumMemoryOrderArgs(StringRef Name)653 size_t getAtomicBuiltinNumMemoryOrderArgs(StringRef Name) {
654 if (Name.startswith("atomic_compare_exchange"))
655 return 2;
656 return 1;
657 }
658
getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC)659 size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC) {
660 if (OC == OpAtomicCompareExchange || OC == OpAtomicCompareExchangeWeak)
661 return 2;
662 return 1;
663 }
664
isComputeAtomicOCLBuiltin(StringRef DemangledName)665 bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
666 if (!DemangledName.startswith(kOCLBuiltinName::AtomicPrefix) &&
667 !DemangledName.startswith(kOCLBuiltinName::AtomPrefix))
668 return false;
669
670 return llvm::StringSwitch<bool>(DemangledName)
671 .EndsWith("add", true)
672 .EndsWith("sub", true)
673 .EndsWith("inc", true)
674 .EndsWith("dec", true)
675 .EndsWith("cmpxchg", true)
676 .EndsWith("min", true)
677 .EndsWith("max", true)
678 .EndsWith("and", true)
679 .EndsWith("or", true)
680 .EndsWith("xor", true)
681 .EndsWith("add_explicit", true)
682 .EndsWith("sub_explicit", true)
683 .EndsWith("or_explicit", true)
684 .EndsWith("xor_explicit", true)
685 .EndsWith("and_explicit", true)
686 .EndsWith("min_explicit", true)
687 .EndsWith("max_explicit", true)
688 .Default(false);
689 }
690
getBarrierLiterals(CallInst * CI)691 BarrierLiterals getBarrierLiterals(CallInst *CI) {
692 auto N = CI->getNumArgOperands();
693 assert(N == 1 || N == 2);
694
695 StringRef DemangledName;
696 assert(CI->getCalledFunction() && "Unexpected indirect call");
697 if (!oclIsBuiltin(CI->getCalledFunction()->getName(), DemangledName)) {
698 assert(0 &&
699 "call must a builtin (work_group_barrier or sub_group_barrier)");
700 }
701
702 OCLScopeKind Scope = OCLMS_work_group;
703 if (DemangledName == kOCLBuiltinName::SubGroupBarrier) {
704 Scope = OCLMS_sub_group;
705 }
706
707 return std::make_tuple(getArgAsInt(CI, 0),
708 N == 1 ? OCLMS_work_group
709 : static_cast<OCLScopeKind>(getArgAsInt(CI, 1)),
710 Scope);
711 }
712
getExtOp(StringRef OrigName,StringRef GivenDemangledName)713 unsigned getExtOp(StringRef OrigName, StringRef GivenDemangledName) {
714 std::string DemangledName{GivenDemangledName};
715 if (DemangledName.empty() || !oclIsBuiltin(OrigName, GivenDemangledName))
716 return ~0U;
717 LLVM_DEBUG(dbgs() << "getExtOp: demangled name: " << DemangledName << '\n');
718 OCLExtOpKind EOC;
719 bool Found = OCLExtOpMap::rfind(DemangledName, &EOC);
720 if (!Found) {
721 std::string Prefix;
722 switch (lastFuncParamType(OrigName)) {
723 case ParamType::UNSIGNED:
724 Prefix = "u_";
725 break;
726 case ParamType::SIGNED:
727 Prefix = "s_";
728 break;
729 case ParamType::FLOAT:
730 Prefix = "f";
731 break;
732 case ParamType::UNKNOWN:
733 break;
734 }
735 Found = OCLExtOpMap::rfind(Prefix + DemangledName, &EOC);
736 }
737 if (Found)
738 return EOC;
739 else
740 return ~0U;
741 }
742
743 ///////////////////////////////////////////////////////////////////////////////
744 //
745 // Functions for getting module info
746 //
747 ///////////////////////////////////////////////////////////////////////////////
748
encodeOCLVer(unsigned short Major,unsigned char Minor,unsigned char Rev)749 unsigned encodeOCLVer(unsigned short Major, unsigned char Minor,
750 unsigned char Rev) {
751 return (Major * 100 + Minor) * 1000 + Rev;
752 }
753
754 std::tuple<unsigned short, unsigned char, unsigned char>
decodeOCLVer(unsigned Ver)755 decodeOCLVer(unsigned Ver) {
756 unsigned short Major = Ver / 100000;
757 unsigned char Minor = (Ver % 100000) / 1000;
758 unsigned char Rev = Ver % 1000;
759 return std::make_tuple(Major, Minor, Rev);
760 }
761
getOCLVersion(Module * M,bool AllowMulti)762 unsigned getOCLVersion(Module *M, bool AllowMulti) {
763 NamedMDNode *NamedMD = M->getNamedMetadata(kSPIR2MD::OCLVer);
764 if (!NamedMD)
765 return 0;
766 assert(NamedMD->getNumOperands() > 0 && "Invalid SPIR");
767 if (!AllowMulti && NamedMD->getNumOperands() != 1)
768 report_fatal_error("Multiple OCL version metadata not allowed");
769
770 // If the module was linked with another module, there may be multiple
771 // operands.
772 auto GetVer = [=](unsigned I) {
773 auto MD = NamedMD->getOperand(I);
774 return std::make_pair(getMDOperandAsInt(MD, 0), getMDOperandAsInt(MD, 1));
775 };
776 auto Ver = GetVer(0);
777 for (unsigned I = 1, E = NamedMD->getNumOperands(); I != E; ++I)
778 if (Ver != GetVer(I))
779 report_fatal_error("OCL version mismatch");
780
781 return encodeOCLVer(Ver.first, Ver.second, 0);
782 }
783
decodeMDNode(MDNode * N,unsigned & X,unsigned & Y,unsigned & Z)784 void decodeMDNode(MDNode *N, unsigned &X, unsigned &Y, unsigned &Z) {
785 if (N == NULL)
786 return;
787 X = getMDOperandAsInt(N, 0);
788 Y = getMDOperandAsInt(N, 1);
789 Z = getMDOperandAsInt(N, 2);
790 }
791
792 /// Encode LLVM type by SPIR-V execution mode VecTypeHint
encodeVecTypeHint(Type * Ty)793 unsigned encodeVecTypeHint(Type *Ty) {
794 if (Ty->isHalfTy())
795 return 4;
796 if (Ty->isFloatTy())
797 return 5;
798 if (Ty->isDoubleTy())
799 return 6;
800 if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
801 switch (IntTy->getIntegerBitWidth()) {
802 case 8:
803 return 0;
804 case 16:
805 return 1;
806 case 32:
807 return 2;
808 case 64:
809 return 3;
810 default:
811 llvm_unreachable("invalid integer type");
812 }
813 }
814 if (FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty)) {
815 Type *EleTy = VecTy->getElementType();
816 unsigned Size = VecTy->getNumElements();
817 return Size << 16 | encodeVecTypeHint(EleTy);
818 }
819 llvm_unreachable("invalid type");
820 return ~0U;
821 }
822
decodeVecTypeHint(LLVMContext & C,unsigned Code)823 Type *decodeVecTypeHint(LLVMContext &C, unsigned Code) {
824 unsigned VecWidth = Code >> 16;
825 unsigned Scalar = Code & 0xFFFF;
826 Type *ST = nullptr;
827 switch (Scalar) {
828 case 0:
829 case 1:
830 case 2:
831 case 3:
832 ST = IntegerType::get(C, 1 << (3 + Scalar));
833 break;
834 case 4:
835 ST = Type::getHalfTy(C);
836 break;
837 case 5:
838 ST = Type::getFloatTy(C);
839 break;
840 case 6:
841 ST = Type::getDoubleTy(C);
842 break;
843 default:
844 llvm_unreachable("Invalid vec type hint");
845 return nullptr;
846 }
847 if (VecWidth < 1)
848 return ST;
849 return FixedVectorType::get(ST, VecWidth);
850 }
851
transVecTypeHint(MDNode * Node)852 unsigned transVecTypeHint(MDNode *Node) {
853 return encodeVecTypeHint(getMDOperandAsType(Node, 0));
854 }
855
getOCLOpaqueTypeAddrSpace(Op OpCode)856 SPIRAddressSpace getOCLOpaqueTypeAddrSpace(Op OpCode) {
857 switch (OpCode) {
858 case OpTypeQueue:
859 return SPIRV_QUEUE_T_ADDR_SPACE;
860 case OpTypeEvent:
861 return SPIRV_EVENT_T_ADDR_SPACE;
862 case OpTypeDeviceEvent:
863 return SPIRV_CLK_EVENT_T_ADDR_SPACE;
864 case OpTypeReserveId:
865 return SPIRV_RESERVE_ID_T_ADDR_SPACE;
866 case OpTypePipe:
867 case OpTypePipeStorage:
868 return SPIRV_PIPE_ADDR_SPACE;
869 case OpTypeImage:
870 case OpTypeSampledImage:
871 return SPIRV_IMAGE_ADDR_SPACE;
872 case OpConstantSampler:
873 case OpTypeSampler:
874 return SPIRV_SAMPLER_T_ADDR_SPACE;
875 default:
876 if (isSubgroupAvcINTELTypeOpCode(OpCode))
877 return SPIRV_AVC_INTEL_T_ADDR_SPACE;
878 assert(false && "No address space is determined for some OCL type");
879 return SPIRV_OCL_SPECIAL_TYPES_DEFAULT_ADDR_SPACE;
880 }
881 }
882
mapAddrSpaceEnums(SPIRAddressSpace Addrspace)883 static SPIR::TypeAttributeEnum mapAddrSpaceEnums(SPIRAddressSpace Addrspace) {
884 switch (Addrspace) {
885 case SPIRAS_Private:
886 return SPIR::ATTR_PRIVATE;
887 case SPIRAS_Global:
888 return SPIR::ATTR_GLOBAL;
889 case SPIRAS_Constant:
890 return SPIR::ATTR_CONSTANT;
891 case SPIRAS_Local:
892 return SPIR::ATTR_LOCAL;
893 case SPIRAS_Generic:
894 return SPIR::ATTR_GENERIC;
895 case SPIRAS_GlobalDevice:
896 return SPIR::ATTR_GLOBAL_DEVICE;
897 case SPIRAS_GlobalHost:
898 return SPIR::ATTR_GLOBAL_HOST;
899 default:
900 llvm_unreachable("Invalid addrspace enum member");
901 }
902 return SPIR::ATTR_NONE;
903 }
904
905 SPIR::TypeAttributeEnum
getOCLOpaqueTypeAddrSpace(SPIR::TypePrimitiveEnum Prim)906 getOCLOpaqueTypeAddrSpace(SPIR::TypePrimitiveEnum Prim) {
907 switch (Prim) {
908 case SPIR::PRIMITIVE_QUEUE_T:
909 return mapAddrSpaceEnums(SPIRV_QUEUE_T_ADDR_SPACE);
910 case SPIR::PRIMITIVE_EVENT_T:
911 return mapAddrSpaceEnums(SPIRV_EVENT_T_ADDR_SPACE);
912 case SPIR::PRIMITIVE_CLK_EVENT_T:
913 return mapAddrSpaceEnums(SPIRV_CLK_EVENT_T_ADDR_SPACE);
914 case SPIR::PRIMITIVE_RESERVE_ID_T:
915 return mapAddrSpaceEnums(SPIRV_RESERVE_ID_T_ADDR_SPACE);
916 case SPIR::PRIMITIVE_PIPE_RO_T:
917 case SPIR::PRIMITIVE_PIPE_WO_T:
918 return mapAddrSpaceEnums(SPIRV_PIPE_ADDR_SPACE);
919 case SPIR::PRIMITIVE_IMAGE1D_RO_T:
920 case SPIR::PRIMITIVE_IMAGE1D_ARRAY_RO_T:
921 case SPIR::PRIMITIVE_IMAGE1D_BUFFER_RO_T:
922 case SPIR::PRIMITIVE_IMAGE2D_RO_T:
923 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_RO_T:
924 case SPIR::PRIMITIVE_IMAGE2D_DEPTH_RO_T:
925 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_DEPTH_RO_T:
926 case SPIR::PRIMITIVE_IMAGE2D_MSAA_RO_T:
927 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_RO_T:
928 case SPIR::PRIMITIVE_IMAGE2D_MSAA_DEPTH_RO_T:
929 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_DEPTH_RO_T:
930 case SPIR::PRIMITIVE_IMAGE3D_RO_T:
931 case SPIR::PRIMITIVE_IMAGE1D_WO_T:
932 case SPIR::PRIMITIVE_IMAGE1D_ARRAY_WO_T:
933 case SPIR::PRIMITIVE_IMAGE1D_BUFFER_WO_T:
934 case SPIR::PRIMITIVE_IMAGE2D_WO_T:
935 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_WO_T:
936 case SPIR::PRIMITIVE_IMAGE2D_DEPTH_WO_T:
937 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_DEPTH_WO_T:
938 case SPIR::PRIMITIVE_IMAGE2D_MSAA_WO_T:
939 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_WO_T:
940 case SPIR::PRIMITIVE_IMAGE2D_MSAA_DEPTH_WO_T:
941 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_DEPTH_WO_T:
942 case SPIR::PRIMITIVE_IMAGE3D_WO_T:
943 case SPIR::PRIMITIVE_IMAGE1D_RW_T:
944 case SPIR::PRIMITIVE_IMAGE1D_ARRAY_RW_T:
945 case SPIR::PRIMITIVE_IMAGE1D_BUFFER_RW_T:
946 case SPIR::PRIMITIVE_IMAGE2D_RW_T:
947 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_RW_T:
948 case SPIR::PRIMITIVE_IMAGE2D_DEPTH_RW_T:
949 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_DEPTH_RW_T:
950 case SPIR::PRIMITIVE_IMAGE2D_MSAA_RW_T:
951 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_RW_T:
952 case SPIR::PRIMITIVE_IMAGE2D_MSAA_DEPTH_RW_T:
953 case SPIR::PRIMITIVE_IMAGE2D_ARRAY_MSAA_DEPTH_RW_T:
954 case SPIR::PRIMITIVE_IMAGE3D_RW_T:
955 return mapAddrSpaceEnums(SPIRV_IMAGE_ADDR_SPACE);
956 default:
957 llvm_unreachable("No address space is determined for a SPIR primitive");
958 }
959 return SPIR::ATTR_NONE;
960 }
961
962 // Fetch type of invoke function passed to device execution built-ins
getBlockInvokeTy(Function * F,unsigned BlockIdx)963 static FunctionType *getBlockInvokeTy(Function *F, unsigned BlockIdx) {
964 auto Params = F->getFunctionType()->params();
965 PointerType *FuncPtr = cast<PointerType>(Params[BlockIdx]);
966 return cast<FunctionType>(FuncPtr->getElementType());
967 }
968
969 class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo {
970 public:
OCLBuiltinFuncMangleInfo(Function * F)971 OCLBuiltinFuncMangleInfo(Function *F) : F(F) {}
OCLBuiltinFuncMangleInfo(ArrayRef<Type * > ArgTypes)972 OCLBuiltinFuncMangleInfo(ArrayRef<Type *> ArgTypes)
973 : ArgTypes(ArgTypes.vec()) {}
getArgTy(unsigned I)974 Type *getArgTy(unsigned I) { return F->getFunctionType()->getParamType(I); }
init(StringRef UniqName)975 void init(StringRef UniqName) override {
976 // Make a local copy as we will modify the string in init function
977 std::string TempStorage = UniqName.str();
978 auto NameRef = StringRef(TempStorage);
979
980 // Helper functions to erase substrings from NameRef (i.e. TempStorage)
981 auto EraseSubstring = [&NameRef, &TempStorage](const std::string &ToErase) {
982 size_t Pos = TempStorage.find(ToErase);
983 if (Pos != std::string::npos) {
984 TempStorage.erase(Pos, ToErase.length());
985 // re-take StringRef as TempStorage was updated
986 NameRef = StringRef(TempStorage);
987 }
988 };
989 auto EraseSymbol = [&NameRef, &TempStorage](size_t Index) {
990 TempStorage.erase(Index, 1);
991 // re-take StringRef as TempStorage was updated
992 NameRef = StringRef(TempStorage);
993 };
994
995 if (NameRef.startswith("async_work_group")) {
996 addUnsignedArg(-1);
997 setArgAttr(1, SPIR::ATTR_CONST);
998 } else if (NameRef.startswith("printf"))
999 setVarArg(1);
1000 else if (NameRef.startswith("write_imageui"))
1001 addUnsignedArg(2);
1002 else if (NameRef.equals("prefetch")) {
1003 addUnsignedArg(1);
1004 setArgAttr(0, SPIR::ATTR_CONST);
1005 } else if (NameRef.equals("get_kernel_work_group_size") ||
1006 NameRef.equals(
1007 "get_kernel_preferred_work_group_size_multiple")) {
1008 assert(F && "lack of necessary information");
1009 const size_t BlockArgIdx = 0;
1010 FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx);
1011 if (InvokeTy->getNumParams() > 1)
1012 setLocalArgBlock(BlockArgIdx);
1013 } else if (NameRef.equals("enqueue_kernel")) {
1014 assert(F && "lack of necessary information");
1015 setEnumArg(1, SPIR::PRIMITIVE_KERNEL_ENQUEUE_FLAGS_T);
1016 addUnsignedArg(3);
1017 setArgAttr(4, SPIR::ATTR_CONST);
1018 // If there are arguments other then block context then these are pointers
1019 // to local memory so this built-in must be mangled accordingly.
1020 const size_t BlockArgIdx = 6;
1021 FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx);
1022 if (InvokeTy->getNumParams() > 1) {
1023 setLocalArgBlock(BlockArgIdx);
1024 addUnsignedArg(BlockArgIdx + 1);
1025 setVarArg(BlockArgIdx + 2);
1026 }
1027 } else if (NameRef.startswith("get_") || NameRef.equals("nan") ||
1028 NameRef.equals("mem_fence") || NameRef.startswith("shuffle")) {
1029 addUnsignedArg(-1);
1030 if (NameRef.startswith(kOCLBuiltinName::GetFence)) {
1031 setArgAttr(0, SPIR::ATTR_CONST);
1032 addVoidPtrArg(0);
1033 }
1034 } else if (NameRef.contains("barrier")) {
1035 addUnsignedArg(0);
1036 if (NameRef.equals("work_group_barrier") ||
1037 NameRef.equals("sub_group_barrier"))
1038 setEnumArg(1, SPIR::PRIMITIVE_MEMORY_SCOPE);
1039 } else if (NameRef.startswith("atomic_work_item_fence")) {
1040 addUnsignedArg(0);
1041 setEnumArg(1, SPIR::PRIMITIVE_MEMORY_ORDER);
1042 setEnumArg(2, SPIR::PRIMITIVE_MEMORY_SCOPE);
1043 } else if (NameRef.startswith("atom_")) {
1044 setArgAttr(0, SPIR::ATTR_VOLATILE);
1045 if (NameRef.endswith("_umax") || NameRef.endswith("_umin")) {
1046 addUnsignedArg(-1);
1047 // We need to remove u to match OpenCL C built-in function name
1048 EraseSymbol(5);
1049 }
1050 } else if (NameRef.startswith("atomic")) {
1051 setArgAttr(0, SPIR::ATTR_VOLATILE);
1052 if (NameRef.contains("_umax") || NameRef.contains("_umin")) {
1053 addUnsignedArg(-1);
1054 // We need to remove u to match OpenCL C built-in function name
1055 if (NameRef.contains("_fetch"))
1056 EraseSymbol(13);
1057 else
1058 EraseSymbol(7);
1059 }
1060 if (NameRef.contains("store_explicit") ||
1061 NameRef.contains("exchange_explicit") ||
1062 (NameRef.startswith("atomic_fetch") &&
1063 NameRef.contains("explicit"))) {
1064 setEnumArg(2, SPIR::PRIMITIVE_MEMORY_ORDER);
1065 setEnumArg(3, SPIR::PRIMITIVE_MEMORY_SCOPE);
1066 } else if (NameRef.contains("load_explicit") ||
1067 (NameRef.startswith("atomic_flag") &&
1068 NameRef.contains("explicit"))) {
1069 setEnumArg(1, SPIR::PRIMITIVE_MEMORY_ORDER);
1070 setEnumArg(2, SPIR::PRIMITIVE_MEMORY_SCOPE);
1071 } else if (NameRef.endswith("compare_exchange_strong_explicit") ||
1072 NameRef.endswith("compare_exchange_weak_explicit")) {
1073 setEnumArg(3, SPIR::PRIMITIVE_MEMORY_ORDER);
1074 setEnumArg(4, SPIR::PRIMITIVE_MEMORY_ORDER);
1075 setEnumArg(5, SPIR::PRIMITIVE_MEMORY_SCOPE);
1076 }
1077 // Don't set atomic property to the first argument of 1.2 atomic
1078 // built-ins.
1079 if (!NameRef.endswith("xchg") && // covers _cmpxchg too
1080 (NameRef.contains("fetch") ||
1081 !(NameRef.endswith("_add") || NameRef.endswith("_sub") ||
1082 NameRef.endswith("_inc") || NameRef.endswith("_dec") ||
1083 NameRef.endswith("_min") || NameRef.endswith("_max") ||
1084 NameRef.endswith("_and") || NameRef.endswith("_or") ||
1085 NameRef.endswith("_xor")))) {
1086 addAtomicArg(0);
1087 }
1088 } else if (NameRef.startswith("uconvert_")) {
1089 addUnsignedArg(0);
1090 NameRef = NameRef.drop_front(1);
1091 UnmangledName.erase(0, 1);
1092 } else if (NameRef.startswith("s_")) {
1093 if (NameRef.equals("s_upsample"))
1094 addUnsignedArg(1);
1095 NameRef = NameRef.drop_front(2);
1096 } else if (NameRef.startswith("u_")) {
1097 addUnsignedArg(-1);
1098 NameRef = NameRef.drop_front(2);
1099 } else if (NameRef.equals("fclamp")) {
1100 NameRef = NameRef.drop_front(1);
1101 }
1102 // handle [read|write]pipe builtins (plus two i32 literal args
1103 // required by SPIR 2.0 provisional specification):
1104 else if (NameRef.equals("read_pipe_2") || NameRef.equals("write_pipe_2")) {
1105 // with 2 arguments (plus two i32 literals):
1106 // int read_pipe (read_only pipe gentype p, gentype *ptr)
1107 // int write_pipe (write_only pipe gentype p, const gentype *ptr)
1108 addVoidPtrArg(1);
1109 addUnsignedArg(2);
1110 addUnsignedArg(3);
1111 // OpenCL-like representation of blocking pipes
1112 } else if (NameRef.equals("read_pipe_2_bl") ||
1113 NameRef.equals("write_pipe_2_bl")) {
1114 // with 2 arguments (plus two i32 literals):
1115 // int read_pipe_bl (read_only pipe gentype p, gentype *ptr)
1116 // int write_pipe_bl (write_only pipe gentype p, const gentype *ptr)
1117 addVoidPtrArg(1);
1118 addUnsignedArg(2);
1119 addUnsignedArg(3);
1120 } else if (NameRef.equals("read_pipe_4") ||
1121 NameRef.equals("write_pipe_4")) {
1122 // with 4 arguments (plus two i32 literals):
1123 // int read_pipe (read_only pipe gentype p, reserve_id_t reserve_id, uint
1124 // index, gentype *ptr) int write_pipe (write_only pipe gentype p,
1125 // reserve_id_t reserve_id, uint index, const gentype *ptr)
1126 addUnsignedArg(2);
1127 addVoidPtrArg(3);
1128 addUnsignedArg(4);
1129 addUnsignedArg(5);
1130 } else if (NameRef.contains("reserve_read_pipe") ||
1131 NameRef.contains("reserve_write_pipe")) {
1132 // process [|work_group|sub_group]reserve[read|write]pipe builtins
1133 addUnsignedArg(1);
1134 addUnsignedArg(2);
1135 addUnsignedArg(3);
1136 } else if (NameRef.contains("commit_read_pipe") ||
1137 NameRef.contains("commit_write_pipe")) {
1138 // process [|work_group|sub_group]commit[read|write]pipe builtins
1139 addUnsignedArg(2);
1140 addUnsignedArg(3);
1141 } else if (NameRef.equals("capture_event_profiling_info")) {
1142 addVoidPtrArg(2);
1143 setEnumArg(1, SPIR::PRIMITIVE_CLK_PROFILING_INFO);
1144 } else if (NameRef.equals("enqueue_marker")) {
1145 setArgAttr(2, SPIR::ATTR_CONST);
1146 addUnsignedArg(1);
1147 } else if (NameRef.startswith("vload")) {
1148 addUnsignedArg(0);
1149 setArgAttr(1, SPIR::ATTR_CONST);
1150 } else if (NameRef.startswith("vstore")) {
1151 addUnsignedArg(1);
1152 } else if (NameRef.startswith("ndrange_")) {
1153 addUnsignedArg(-1);
1154 if (NameRef[8] == '2' || NameRef[8] == '3') {
1155 setArgAttr(-1, SPIR::ATTR_CONST);
1156 }
1157 } else if (NameRef.contains("umax")) {
1158 addUnsignedArg(-1);
1159 EraseSymbol(NameRef.find("umax"));
1160 } else if (NameRef.contains("umin")) {
1161 addUnsignedArg(-1);
1162 EraseSymbol(NameRef.find("umin"));
1163 } else if (NameRef.contains("broadcast")) {
1164 addUnsignedArg(-1);
1165 } else if (NameRef.startswith(kOCLBuiltinName::SampledReadImage)) {
1166 NameRef.consume_front(kOCLBuiltinName::Sampled);
1167 addSamplerArg(1);
1168 } else if (NameRef.contains(kOCLSubgroupsAVCIntel::Prefix)) {
1169 if (NameRef.contains("evaluate_ipe"))
1170 addSamplerArg(1);
1171 else if (NameRef.contains("evaluate_with_single_reference"))
1172 addSamplerArg(2);
1173 else if (NameRef.contains("evaluate_with_multi_reference")) {
1174 addUnsignedArg(1);
1175 std::string PostFix = "_interlaced";
1176 if (NameRef.contains(PostFix)) {
1177 addUnsignedArg(2);
1178 addSamplerArg(3);
1179 EraseSubstring(PostFix);
1180 } else
1181 addSamplerArg(2);
1182 } else if (NameRef.contains("evaluate_with_dual_reference"))
1183 addSamplerArg(3);
1184 else if (NameRef.contains("fme_initialize"))
1185 addUnsignedArgs(0, 6);
1186 else if (NameRef.contains("bme_initialize"))
1187 addUnsignedArgs(0, 7);
1188 else if (NameRef.contains("set_inter_base_multi_reference_penalty") ||
1189 NameRef.contains("set_inter_shape_penalty") ||
1190 NameRef.contains("set_inter_direction_penalty"))
1191 addUnsignedArg(0);
1192 else if (NameRef.contains("set_motion_vector_cost_function"))
1193 addUnsignedArgs(0, 2);
1194 else if (NameRef.contains("interlaced_field_polarity"))
1195 addUnsignedArg(0);
1196 else if (NameRef.contains("interlaced_field_polarities"))
1197 addUnsignedArgs(0, 1);
1198 else if (NameRef.contains(kOCLSubgroupsAVCIntel::MCEPrefix)) {
1199 if (NameRef.contains("get_default"))
1200 addUnsignedArgs(0, 1);
1201 } else if (NameRef.contains(kOCLSubgroupsAVCIntel::IMEPrefix)) {
1202 if (NameRef.contains("initialize"))
1203 addUnsignedArgs(0, 2);
1204 else if (NameRef.contains("set_single_reference"))
1205 addUnsignedArg(1);
1206 else if (NameRef.contains("set_dual_reference"))
1207 addUnsignedArg(2);
1208 else if (NameRef.contains("set_weighted_sad") ||
1209 NameRef.contains("set_early_search_termination_threshold"))
1210 addUnsignedArg(0);
1211 else if (NameRef.contains("adjust_ref_offset"))
1212 addUnsignedArgs(1, 3);
1213 else if (NameRef.contains("set_max_motion_vector_count") ||
1214 NameRef.contains("get_border_reached"))
1215 addUnsignedArg(0);
1216 else if (NameRef.contains("shape_distortions") ||
1217 NameRef.contains("shape_motion_vectors") ||
1218 NameRef.contains("shape_reference_ids")) {
1219 if (NameRef.contains("single_reference")) {
1220 addUnsignedArg(1);
1221 EraseSubstring("_single_reference");
1222 } else if (NameRef.contains("dual_reference")) {
1223 addUnsignedArgs(1, 2);
1224 EraseSubstring("_dual_reference");
1225 }
1226 } else if (NameRef.contains("ref_window_size"))
1227 addUnsignedArg(0);
1228 } else if (NameRef.contains(kOCLSubgroupsAVCIntel::SICPrefix)) {
1229 if (NameRef.contains("initialize") ||
1230 NameRef.contains("set_intra_luma_shape_penalty"))
1231 addUnsignedArg(0);
1232 else if (NameRef.contains("configure_ipe")) {
1233 if (NameRef.contains("_luma")) {
1234 addUnsignedArgs(0, 6);
1235 EraseSubstring("_luma");
1236 }
1237 if (NameRef.contains("_chroma")) {
1238 addUnsignedArgs(7, 9);
1239 EraseSubstring("_chroma");
1240 }
1241 } else if (NameRef.contains("configure_skc"))
1242 addUnsignedArgs(0, 4);
1243 else if (NameRef.contains("set_skc")) {
1244 if (NameRef.contains("forward_transform_enable"))
1245 addUnsignedArg(0);
1246 } else if (NameRef.contains("set_block")) {
1247 if (NameRef.contains("based_raw_skip_sad"))
1248 addUnsignedArg(0);
1249 } else if (NameRef.contains("get_motion_vector_mask")) {
1250 addUnsignedArgs(0, 1);
1251 } else if (NameRef.contains("luma_mode_cost_function"))
1252 addUnsignedArgs(0, 2);
1253 else if (NameRef.contains("chroma_mode_cost_function"))
1254 addUnsignedArg(0);
1255 }
1256 } else if (NameRef.startswith("intel_sub_group_shuffle")) {
1257 if (NameRef.endswith("_down") || NameRef.endswith("_up"))
1258 addUnsignedArg(2);
1259 else
1260 addUnsignedArg(1);
1261 } else if (NameRef.startswith("intel_sub_group_block_write")) {
1262 // distinguish write to image and other data types as position
1263 // of uint argument is different though name is the same.
1264 auto *Arg0Ty = getArgTy(0);
1265 if (Arg0Ty->isPointerTy() &&
1266 Arg0Ty->getPointerElementType()->isIntegerTy()) {
1267 addUnsignedArg(0);
1268 addUnsignedArg(1);
1269 } else {
1270 addUnsignedArg(2);
1271 }
1272 } else if (NameRef.startswith("intel_sub_group_block_read")) {
1273 // distinguish read from image and other data types as position
1274 // of uint argument is different though name is the same.
1275 auto *Arg0Ty = getArgTy(0);
1276 if (Arg0Ty->isPointerTy() &&
1277 Arg0Ty->getPointerElementType()->isIntegerTy()) {
1278 setArgAttr(0, SPIR::ATTR_CONST);
1279 addUnsignedArg(0);
1280 }
1281 } else if (NameRef.startswith("intel_sub_group_media_block_write")) {
1282 addUnsignedArg(3);
1283 } else if (NameRef.startswith(kOCLBuiltinName::SubGroupPrefix)) {
1284 if (NameRef.contains("ballot")) {
1285 if (NameRef.contains("inverse") || NameRef.contains("bit_count") ||
1286 NameRef.contains("inclusive_scan") ||
1287 NameRef.contains("exclusive_scan") ||
1288 NameRef.contains("find_lsb") || NameRef.contains("find_msb"))
1289 addUnsignedArg(0);
1290 else if (NameRef.contains("bit_extract")) {
1291 addUnsignedArgs(0, 1);
1292 }
1293 } else if (NameRef.contains("shuffle") || NameRef.contains("clustered"))
1294 addUnsignedArg(1);
1295 } else if (NameRef.startswith("bitfield_insert")) {
1296 addUnsignedArgs(2, 3);
1297 } else if (NameRef.startswith("bitfield_extract_signed") ||
1298 NameRef.startswith("bitfield_extract_unsigned")) {
1299 addUnsignedArgs(1, 2);
1300 }
1301
1302 // Store the final version of a function name
1303 UnmangledName = NameRef.str();
1304 }
1305 // Auxiliarry information, it is expected that it is relevant at the moment
1306 // the init method is called.
1307 Function *F; // SPIRV decorated function
1308 // TODO: ArgTypes argument should get removed once all SPV-IR related issues
1309 // are resolved
1310 std::vector<Type *> ArgTypes; // Arguments of OCL builtin
1311 };
1312
mutateCallInstOCL(Module * M,CallInst * CI,std::function<std::string (CallInst *,std::vector<Value * > &)> ArgMutate,AttributeList * Attrs)1313 CallInst *mutateCallInstOCL(
1314 Module *M, CallInst *CI,
1315 std::function<std::string(CallInst *, std::vector<Value *> &)> ArgMutate,
1316 AttributeList *Attrs) {
1317 OCLBuiltinFuncMangleInfo BtnInfo(CI->getCalledFunction());
1318 return mutateCallInst(M, CI, ArgMutate, &BtnInfo, Attrs);
1319 }
1320
mutateCallInstOCL(Module * M,CallInst * CI,std::function<std::string (CallInst *,std::vector<Value * > &,Type * & RetTy)> ArgMutate,std::function<Instruction * (CallInst *)> RetMutate,AttributeList * Attrs,bool TakeFuncName)1321 Instruction *mutateCallInstOCL(
1322 Module *M, CallInst *CI,
1323 std::function<std::string(CallInst *, std::vector<Value *> &, Type *&RetTy)>
1324 ArgMutate,
1325 std::function<Instruction *(CallInst *)> RetMutate, AttributeList *Attrs,
1326 bool TakeFuncName) {
1327 OCLBuiltinFuncMangleInfo BtnInfo(CI->getCalledFunction());
1328 return mutateCallInst(M, CI, ArgMutate, RetMutate, &BtnInfo, Attrs,
1329 TakeFuncName);
1330 }
1331
1332 static std::pair<StringRef, StringRef>
getSrcAndDstElememntTypeName(BitCastInst * BIC)1333 getSrcAndDstElememntTypeName(BitCastInst *BIC) {
1334 if (!BIC)
1335 return std::pair<StringRef, StringRef>("", "");
1336
1337 Type *SrcTy = BIC->getSrcTy();
1338 Type *DstTy = BIC->getDestTy();
1339 if (SrcTy->isPointerTy())
1340 SrcTy = SrcTy->getPointerElementType();
1341 if (DstTy->isPointerTy())
1342 DstTy = DstTy->getPointerElementType();
1343 auto SrcST = dyn_cast<StructType>(SrcTy);
1344 auto DstST = dyn_cast<StructType>(DstTy);
1345 if (!DstST || !DstST->hasName() || !SrcST || !SrcST->hasName())
1346 return std::pair<StringRef, StringRef>("", "");
1347
1348 return std::make_pair(SrcST->getName(), DstST->getName());
1349 }
1350
isSamplerInitializer(Instruction * Inst)1351 bool isSamplerInitializer(Instruction *Inst) {
1352 BitCastInst *BIC = dyn_cast<BitCastInst>(Inst);
1353 auto Names = getSrcAndDstElememntTypeName(BIC);
1354 if (Names.second == getSPIRVTypeName(kSPIRVTypeName::Sampler) &&
1355 Names.first == getSPIRVTypeName(kSPIRVTypeName::ConstantSampler))
1356 return true;
1357
1358 return false;
1359 }
1360
isPipeStorageInitializer(Instruction * Inst)1361 bool isPipeStorageInitializer(Instruction *Inst) {
1362 BitCastInst *BIC = dyn_cast<BitCastInst>(Inst);
1363 auto Names = getSrcAndDstElememntTypeName(BIC);
1364 if (Names.second == getSPIRVTypeName(kSPIRVTypeName::PipeStorage) &&
1365 Names.first == getSPIRVTypeName(kSPIRVTypeName::ConstantPipeStorage))
1366 return true;
1367
1368 return false;
1369 }
1370
isSpecialTypeInitializer(Instruction * Inst)1371 bool isSpecialTypeInitializer(Instruction *Inst) {
1372 return isSamplerInitializer(Inst) || isPipeStorageInitializer(Inst);
1373 }
1374
isSamplerTy(Type * Ty)1375 bool isSamplerTy(Type *Ty) {
1376 auto PTy = dyn_cast<PointerType>(Ty);
1377 if (!PTy)
1378 return false;
1379
1380 auto STy = dyn_cast<StructType>(PTy->getElementType());
1381 return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler;
1382 }
1383
isPipeOrAddressSpaceCastBI(const StringRef MangledName)1384 bool isPipeOrAddressSpaceCastBI(const StringRef MangledName) {
1385 return MangledName == "write_pipe_2" || MangledName == "read_pipe_2" ||
1386 MangledName == "write_pipe_2_bl" || MangledName == "read_pipe_2_bl" ||
1387 MangledName == "write_pipe_4" || MangledName == "read_pipe_4" ||
1388 MangledName == "reserve_write_pipe" ||
1389 MangledName == "reserve_read_pipe" ||
1390 MangledName == "commit_write_pipe" ||
1391 MangledName == "commit_read_pipe" ||
1392 MangledName == "work_group_reserve_write_pipe" ||
1393 MangledName == "work_group_reserve_read_pipe" ||
1394 MangledName == "work_group_commit_write_pipe" ||
1395 MangledName == "work_group_commit_read_pipe" ||
1396 MangledName == "get_pipe_num_packets_ro" ||
1397 MangledName == "get_pipe_max_packets_ro" ||
1398 MangledName == "get_pipe_num_packets_wo" ||
1399 MangledName == "get_pipe_max_packets_wo" ||
1400 MangledName == "sub_group_reserve_write_pipe" ||
1401 MangledName == "sub_group_reserve_read_pipe" ||
1402 MangledName == "sub_group_commit_write_pipe" ||
1403 MangledName == "sub_group_commit_read_pipe" ||
1404 MangledName == "to_global" || MangledName == "to_local" ||
1405 MangledName == "to_private";
1406 }
1407
isEnqueueKernelBI(const StringRef MangledName)1408 bool isEnqueueKernelBI(const StringRef MangledName) {
1409 return MangledName == "__enqueue_kernel_basic" ||
1410 MangledName == "__enqueue_kernel_basic_events" ||
1411 MangledName == "__enqueue_kernel_varargs" ||
1412 MangledName == "__enqueue_kernel_events_varargs";
1413 }
1414
isKernelQueryBI(const StringRef MangledName)1415 bool isKernelQueryBI(const StringRef MangledName) {
1416 return MangledName == "__get_kernel_work_group_size_impl" ||
1417 MangledName == "__get_kernel_sub_group_count_for_ndrange_impl" ||
1418 MangledName == "__get_kernel_max_sub_group_size_for_ndrange_impl" ||
1419 MangledName == "__get_kernel_preferred_work_group_size_multiple_impl";
1420 }
1421
1422 // isUnfusedMulAdd checks if we have the following (most common for fp
1423 // contranction) pattern in LLVM IR:
1424 //
1425 // %mul = fmul float %a, %b
1426 // %add = fadd float %mul, %c
1427 //
1428 // This pattern indicates that fp contraction could have been disabled by
1429 // #pragma OPENCL FP_CONTRACT OFF. When contraction is enabled (by a pragma or
1430 // by clang's -ffp-contract=fast), clang would generate:
1431 //
1432 // %0 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1433 //
1434 // or
1435 //
1436 // %mul = fmul contract float %a, %b
1437 // %add = fadd contract float %mul, %c
1438 //
1439 // Note that optimizations may form an unfused fmuladd from fadd+load or
1440 // fadd+call, so this check is quite restrictive (see the comment below).
1441 //
isUnfusedMulAdd(BinaryOperator * B)1442 bool isUnfusedMulAdd(BinaryOperator *B) {
1443 if (B->getOpcode() != Instruction::FAdd &&
1444 B->getOpcode() != Instruction::FSub)
1445 return false;
1446
1447 if (B->hasAllowContract()) {
1448 // If this fadd or fsub itself has a contract flag, the operation can be
1449 // contracted regardless of the operands.
1450 return false;
1451 }
1452
1453 // Otherwise, we cannot easily tell if the operation can be a candidate for
1454 // contraction or not. Consider the following cases:
1455 //
1456 // %mul = alloca float
1457 // %t1 = fmul float %a, %b
1458 // store float* %mul, float %t
1459 // %t2 = load %mul
1460 // %r = fadd float %t2, %c
1461 //
1462 // LLVM IR does not allow %r to be contracted. However, after an optimization
1463 // it becomes a candidate for contraction if ContractionOFF is not set in
1464 // SPIR-V:
1465 //
1466 // %t1 = fmul float %a, %b
1467 // %r = fadd float %t1, %c
1468 //
1469 // To be on a safe side, we disallow everything that is even remotely similar
1470 // to fmul + fadd.
1471 return true;
1472 }
1473
getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,unsigned VectorNumElements)1474 std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,
1475 unsigned VectorNumElements) {
1476 std::ostringstream OSS;
1477 switch (ElementBitSize) {
1478 case 8:
1479 OSS << "_uc";
1480 break;
1481 case 16:
1482 OSS << "_us";
1483 break;
1484 case 32:
1485 // Intentionally does nothing since _ui variant is only an alias.
1486 break;
1487 case 64:
1488 OSS << "_ul";
1489 break;
1490 default:
1491 llvm_unreachable(
1492 "Incorrect data bitsize for intel_subgroup_block builtins");
1493 }
1494 switch (VectorNumElements) {
1495 case 1:
1496 break;
1497 case 2:
1498 case 4:
1499 case 8:
1500 OSS << VectorNumElements;
1501 break;
1502 case 16:
1503 assert(ElementBitSize == 8 &&
1504 "16 elements vector allowed only for char builtins");
1505 OSS << VectorNumElements;
1506 break;
1507 default:
1508 llvm_unreachable(
1509 "Incorrect vector length for intel_subgroup_block builtins");
1510 }
1511 return OSS.str();
1512 }
1513
insertImageNameAccessQualifier(SPIRVAccessQualifierKind Acc,std::string & Name)1514 void insertImageNameAccessQualifier(SPIRVAccessQualifierKind Acc,
1515 std::string &Name) {
1516 std::string QName = rmap<std::string>(Acc);
1517 // transform: read_only -> ro, write_only -> wo, read_write -> rw
1518 QName = QName.substr(0, 1) + QName.substr(QName.find("_") + 1, 1) + "_";
1519 assert(!Name.empty() && "image name should not be empty");
1520 Name.insert(Name.size() - 1, QName);
1521 }
1522 } // namespace OCLUtil
1523
transOCLMemScopeIntoSPIRVScope(Value * MemScope,Optional<int> DefaultCase,Instruction * InsertBefore)1524 Value *SPIRV::transOCLMemScopeIntoSPIRVScope(Value *MemScope,
1525 Optional<int> DefaultCase,
1526 Instruction *InsertBefore) {
1527 if (auto *C = dyn_cast<ConstantInt>(MemScope)) {
1528 return ConstantInt::get(
1529 C->getType(), map<Scope>(static_cast<OCLScopeKind>(C->getZExtValue())));
1530 }
1531
1532 // If memory_scope is not a constant, then we have to insert dynamic mapping:
1533 return getOrCreateSwitchFunc(kSPIRVName::TranslateOCLMemScope, MemScope,
1534 OCLMemScopeMap::getMap(), /* IsReverse */ false,
1535 DefaultCase, InsertBefore);
1536 }
1537
transOCLMemOrderIntoSPIRVMemorySemantics(Value * MemOrder,Optional<int> DefaultCase,Instruction * InsertBefore)1538 Value *SPIRV::transOCLMemOrderIntoSPIRVMemorySemantics(
1539 Value *MemOrder, Optional<int> DefaultCase, Instruction *InsertBefore) {
1540 if (auto *C = dyn_cast<ConstantInt>(MemOrder)) {
1541 return ConstantInt::get(
1542 C->getType(), mapOCLMemSemanticToSPIRV(
1543 0, static_cast<OCLMemOrderKind>(C->getZExtValue())));
1544 }
1545
1546 return getOrCreateSwitchFunc(kSPIRVName::TranslateOCLMemOrder, MemOrder,
1547 OCLMemOrderMap::getMap(), /* IsReverse */ false,
1548 DefaultCase, InsertBefore);
1549 }
1550
1551 Value *
transSPIRVMemoryScopeIntoOCLMemoryScope(Value * MemScope,Instruction * InsertBefore)1552 SPIRV::transSPIRVMemoryScopeIntoOCLMemoryScope(Value *MemScope,
1553 Instruction *InsertBefore) {
1554 if (auto *C = dyn_cast<ConstantInt>(MemScope)) {
1555 return ConstantInt::get(C->getType(), rmap<OCLScopeKind>(static_cast<Scope>(
1556 C->getZExtValue())));
1557 }
1558
1559 if (auto *CI = dyn_cast<CallInst>(MemScope)) {
1560 Function *F = CI->getCalledFunction();
1561 if (F && F->getName().equals(kSPIRVName::TranslateOCLMemScope)) {
1562 // In case the SPIR-V module was created from an OpenCL program by
1563 // *this* SPIR-V generator, we know that the value passed to
1564 // __translate_ocl_memory_scope is what we should pass to the
1565 // OpenCL builtin now.
1566 return CI->getArgOperand(0);
1567 }
1568 }
1569
1570 return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemScope, MemScope,
1571 OCLMemScopeMap::getRMap(),
1572 /* IsReverse */ true, None, InsertBefore);
1573 }
1574
1575 Value *
transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value * MemorySemantics,Instruction * InsertBefore)1576 SPIRV::transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value *MemorySemantics,
1577 Instruction *InsertBefore) {
1578 if (auto *C = dyn_cast<ConstantInt>(MemorySemantics)) {
1579 return ConstantInt::get(C->getType(),
1580 mapSPIRVMemSemanticToOCL(C->getZExtValue()).second);
1581 }
1582
1583 if (auto *CI = dyn_cast<CallInst>(MemorySemantics)) {
1584 Function *F = CI->getCalledFunction();
1585 if (F && F->getName().equals(kSPIRVName::TranslateOCLMemOrder)) {
1586 // In case the SPIR-V module was created from an OpenCL program by
1587 // *this* SPIR-V generator, we know that the value passed to
1588 // __translate_ocl_memory_order is what we should pass to the
1589 // OpenCL builtin now.
1590 return CI->getArgOperand(0);
1591 }
1592 }
1593
1594 // SPIR-V MemorySemantics contains both OCL mem_fence_flags and mem_order and
1595 // therefore, we need to apply mask
1596 int Mask = MemorySemanticsMaskNone | MemorySemanticsAcquireMask |
1597 MemorySemanticsReleaseMask | MemorySemanticsAcquireReleaseMask |
1598 MemorySemanticsSequentiallyConsistentMask;
1599 return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemOrder,
1600 MemorySemantics, OCLMemOrderMap::getRMap(),
1601 /* IsReverse */ true, None, InsertBefore, Mask);
1602 }
1603
transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Value * MemorySemantics,Instruction * InsertBefore)1604 Value *SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags(
1605 Value *MemorySemantics, Instruction *InsertBefore) {
1606 if (auto *C = dyn_cast<ConstantInt>(MemorySemantics)) {
1607 return ConstantInt::get(C->getType(),
1608 mapSPIRVMemSemanticToOCL(C->getZExtValue()).first);
1609 }
1610
1611 // TODO: any possible optimizations?
1612 // SPIR-V MemorySemantics contains both OCL mem_fence_flags and mem_order and
1613 // therefore, we need to apply mask
1614 int Mask = MemorySemanticsWorkgroupMemoryMask |
1615 MemorySemanticsCrossWorkgroupMemoryMask |
1616 MemorySemanticsImageMemoryMask;
1617 return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemFence,
1618 MemorySemantics,
1619 OCLMemFenceExtendedMap::getRMap(),
1620 /* IsReverse */ true, None, InsertBefore, Mask);
1621 }
1622
mangleOpenClBuiltin(const std::string & UniqName,ArrayRef<Type * > ArgTypes,std::string & MangledName)1623 void llvm::mangleOpenClBuiltin(const std::string &UniqName,
1624 ArrayRef<Type *> ArgTypes,
1625 std::string &MangledName) {
1626 OCLUtil::OCLBuiltinFuncMangleInfo BtnInfo(ArgTypes);
1627 MangledName = SPIRV::mangleBuiltin(UniqName, ArgTypes, &BtnInfo);
1628 }
1629