1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/Support/ScaledNumber.h>
11 #include "llvm/ADT/PostOrderIterator.h"
12 #include <llvm/IR/LLVMContext.h>
13 #include <llvm/IR/Verifier.h>
14 #include <llvm/Analysis/CFGPrinter.h>
15 #include <llvm/Analysis/Passes.h>
16 #include <llvm/Pass.h>
17 #include <llvm/IR/PassManager.h>
18 #include <llvm/Transforms/IPO.h>
19 #include <llvm/Transforms/IPO/AlwaysInliner.h>
20 #include <llvm/Transforms/Scalar.h>
21 #include <llvm/IR/Module.h>
22 #include <llvm/IR/Function.h>
23 #include <llvm/Analysis/TargetLibraryInfo.h>
24 #include <llvm/Transforms/InstCombine/InstCombineWorklist.h>
25 #include <llvm/Transforms/InstCombine/InstCombine.h>
26
27 #include <llvmWrapper/Transforms/Utils.h>
28
29 #include "common/LLVMWarningsPop.hpp"
30
31 #include "AdaptorCommon/AddImplicitArgs.hpp"
32 #include "AdaptorCommon/ProcessFuncAttributes.h"
33 #include "AdaptorCommon/LegalizeFunctionSignatures.h"
34 #include "AdaptorCommon/TypesLegalizationPass.hpp"
35 #include "common/LLVMUtils.h"
36
37 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
38 #include "Compiler/CISACodeGen/EstimateFunctionSize.h"
39 #include "Compiler/CISACodeGen/FixAddrSpaceCast.h"
40 #include "Compiler/CISACodeGen/ResolveGAS.h"
41 #include "Compiler/CISACodeGen/ResolvePredefinedConstant.h"
42 #include "Compiler/CISACodeGen/SimplifyConstant.h"
43 #include "Compiler/CISACodeGen/FoldKnownWorkGroupSizes.h"
44
45 #include "Compiler/HandleFRemInstructions.hpp"
46 #include "Compiler/Optimizer/BuiltInFuncImport.h"
47 #include "Compiler/Optimizer/CodeAssumption.hpp"
48 #include "Compiler/Optimizer/Scalarizer.h"
49 #include "Compiler/Optimizer/OpenCLPasses/DebuggerSupport/ImplicitGIDPass.hpp"
50 #include "Compiler/Optimizer/OpenCLPasses/ExtenstionFuncs/ExtensionArgAnalysis.hpp"
51 #include "Compiler/Optimizer/OpenCLPasses/ExtenstionFuncs/ExtensionFuncsAnalysis.hpp"
52 #include "Compiler/Optimizer/OpenCLPasses/ExtenstionFuncs/ExtensionFuncResolution.hpp"
53 #include "Compiler/Optimizer/OpenCLPasses/ImageFuncs/ImageFuncsAnalysis.hpp"
54 #include "Compiler/Optimizer/OpenCLPasses/ImageFuncs/ImageFuncResolution.hpp"
55 #include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryUsageAnalysis.hpp"
56 #include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.hpp"
57 #include "Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantAnalysis.hpp"
58 #include "Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantResolution.hpp"
59 #include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncsAnalysis.hpp"
60 #include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncResolution.hpp"
61 #include "Compiler/Optimizer/OpenCLPasses/ResourceAllocator/ResourceAllocator.hpp"
62 #include "Compiler/Optimizer/OpenCLPasses/BreakConstantExpr/BreakConstantExpr.hpp"
63 #include "Compiler/Optimizer/OpenCLPasses/LocalBuffers/InlineLocalsResolution.hpp"
64 #include "Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.hpp"
65 #include "Compiler/Optimizer/OpenCLPasses/Atomics/ResolveOCLAtomics.hpp"
66 #include "Compiler/Optimizer/OpenCLPasses/WGFuncs/WGFuncResolution.hpp"
67 #include "Compiler/Optimizer/OpenCLPasses/AlignmentAnalysis/AlignmentAnalysis.hpp"
68 #include "Compiler/Optimizer/PreCompiledFuncImport.hpp"
69 #include "Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfAnalysis.hpp"
70 #include "Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfResolution.hpp"
71 #include "Compiler/Optimizer/OpenCLPasses/AggregateArguments/AggregateArguments.hpp"
72 #include "Compiler/Optimizer/OCLBIConverter.h"
73 #include "Compiler/Optimizer/OpenCLPasses/SetFastMathFlags/SetFastMathFlags.hpp"
74 #include "Compiler/Optimizer/OpenCLPasses/CorrectlyRoundedDivSqrt/CorrectlyRoundedDivSqrt.hpp"
75 #include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
76 #include "Compiler/Optimizer/OpenCLPasses/AddressSpaceAliasAnalysis/AddressSpaceAliasAnalysis.h"
77 #include "Compiler/Optimizer/OpenCLPasses/DeviceEnqueueFuncs/DeviceEnqueue.hpp"
78 #include "Compiler/Optimizer/OpenCLPasses/DeviceEnqueueFuncs/TransformBlocks.hpp"
79 #include "Compiler/Optimizer/OpenCLPasses/UndefinedReferences/UndefinedReferencesPass.hpp"
80 #include "Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.hpp"
81 #include "Compiler/Optimizer/OpenCLPasses/BIFTransforms/BIFTransforms.hpp"
82 #include "Compiler/Optimizer/OpenCLPasses/BreakdownIntrinsic.h"
83 #include "Compiler/Optimizer/OpenCLPasses/TransformUnmaskedFunctionsPass.h"
84 #include "Compiler/Optimizer/OpenCLPasses/StatelessToStatefull/StatelessToStatefull.hpp"
85 #include "Compiler/Optimizer/OpenCLPasses/KernelFunctionCloning.h"
86 #include "Compiler/Legalizer/TypeLegalizerPass.h"
87 #include "Compiler/Optimizer/OpenCLPasses/ClampLoopUnroll/ClampLoopUnroll.hpp"
88 #include "Compiler/Optimizer/OpenCLPasses/Image3dToImage2darray/Image3dToImage2darray.hpp"
89 #include "Compiler/Optimizer/OpenCLPasses/RewriteLocalSize/RewriteLocalSize.hpp"
90 #include "Compiler/MetaDataApi/PurgeMetaDataUtils.hpp"
91 #include "Compiler/MetaDataUtilsWrapper.h"
92 #include "Compiler/SPIRMetaDataTranslation.h"
93 #include "Compiler/Optimizer/OpenCLPasses/ErrorCheckPass.h"
94 #include "Compiler/Optimizer/OpenCLPasses/JointMatrixFuncsResolutionPass.h"
95 #include "Compiler/MetaDataApi/IGCMetaDataHelper.h"
96 #include "Compiler/CodeGenContextWrapper.hpp"
97 #include "Compiler/FixResourcePtr.hpp"
98 #include "Compiler/InitializePasses.h"
99 #include "Compiler/MetaDataApi/SpirMetaDataApi.h"
100 #include "Compiler/Optimizer/FixFastMathFlags.hpp"
101 #include "Compiler/CustomUnsafeOptPass.hpp"
102 #include "MoveStaticAllocas.h"
103 #include "PreprocessSPVIR.h"
104 #include "LowerInvokeSIMD.hpp"
105 #include "Compiler/Optimizer/IGCInstCombiner/IGCInstructionCombining.hpp"
106
107 #include "common/debug/Debug.hpp"
108 #include "common/igc_regkeys.hpp"
109 #include "common/debug/Dump.hpp"
110 #include "common/MemStats.h"
111
112 #include <iStdLib/utility.h>
113
114 #include "Compiler/CISACodeGen/DebugInfo.hpp"
115 #include "Compiler/CISACodeGen/TimeStatsCounter.h"
116 #include "Compiler/DebugInfo/ScalarVISAModule.h"
117 #include "Compiler/DebugInfo/Utils.h"
118 #include "DebugInfo/VISADebugEmitter.hpp"
119
120 #include <string>
121 #include <algorithm>
122
123
124 #include <Metrics/IGCMetric.h>
125
126
127
128 using namespace llvm;
129 using namespace IGC::IGCMD;
130 using namespace IGC::Debug;
131
132 namespace IGC
133 {
getOCLMajorVersion(const SPIRMD::SpirMetaDataUtils & spirMDUtils)134 int getOCLMajorVersion(const SPIRMD::SpirMetaDataUtils &spirMDUtils)
135 {
136 int oclMajor = 0, oclMinor = 0;
137 if (spirMDUtils.isOpenCLVersionsHasValue())
138 {
139 SPIRMD::VersionMetaDataHandle oclVersion = spirMDUtils.getOpenCLVersionsItem(0);
140 oclMajor = oclVersion->getMajor();
141 oclMinor = oclVersion->getMinor();
142 }
143 else
144 {
145 if (!spirMDUtils.empty_CompilerOptions())
146 {
147 // check compiler options
148 for (auto i = spirMDUtils.getCompilerOptionsItem(0)->begin(), e = spirMDUtils.getCompilerOptionsItem(0)->end(); i != e; ++i)
149 {
150 if (StringRef(*i).startswith("-cl-std=CL") && i->length() >= 13)
151 {
152 oclMajor = i->at(10) - '0';
153 oclMinor = i->at(12) - '0';
154 break;
155 }
156 }
157 }
158 // default is 1.2
159 if (!isLegalOCLVersion(oclMajor, oclMinor))
160 {
161 oclMajor = 1;
162 oclMinor = 2;
163 }
164 }
165 return oclMajor;
166 }
167
CommonOCLBasedPasses(OpenCLProgramContext * pContext,std::unique_ptr<llvm::Module> BuiltinGenericModule,std::unique_ptr<llvm::Module> BuiltinSizeModule)168 static void CommonOCLBasedPasses(
169 OpenCLProgramContext* pContext,
170 std::unique_ptr<llvm::Module> BuiltinGenericModule,
171 std::unique_ptr<llvm::Module> BuiltinSizeModule)
172 {
173 #if defined( _DEBUG )
174 llvm::verifyModule(*pContext->getModule());
175 #endif
176
177 COMPILER_TIME_START(pContext, TIME_UnificationPasses);
178
179 pContext->metrics.Init(&pContext->hash,
180 pContext->getModule()->getNamedMetadata("llvm.dbg.cu") != nullptr);
181 pContext->metrics.CollectFunctions(pContext->getModule());
182
183 unify_opt_PreProcess(pContext);
184
185 DumpLLVMIR(pContext, "beforeUnification");
186
187 // override the data layout to match Gen HW
188 int pointerSize = getPointerSize(*pContext->getModule());
189 std::string layoutstr;
190 if (pointerSize == 4)
191 {
192 layoutstr = "e-p:32:32:32";
193 }
194 else {
195 layoutstr = "e-p:64:64:64";
196 }
197 layoutstr += "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"\
198 "-f32:32:32-f64:64:64-v16:16:16-v24:32:32"\
199 "-v32:32:32-v48:64:64-v64:64:64-v96:128:128"\
200 "-v128:128:128-v192:256:256-v256:256:256"\
201 "-v512:512:512-v1024:1024:1024-n8:16:32";
202
203 StringRef dataLayout = layoutstr;
204 pContext->getModule()->setDataLayout(dataLayout);
205 BuiltinGenericModule->setDataLayout(dataLayout);
206 if( BuiltinSizeModule )
207 {
208 BuiltinSizeModule->setDataLayout(dataLayout);
209 }
210
211 MetaDataUtils *pMdUtils = pContext->getMetaDataUtils();
212
213 //extracting OCL version major before SPIRMetadataTranslation pass deletes its metadata node
214 const SPIRMD::SpirMetaDataUtils spirMDUtils(&(*pContext->getModule()));
215 int OCLMajor = getOCLMajorVersion(spirMDUtils);
216
217 CompOptions &CompilerOpts = pContext->getModuleMetaData()->compOpt;
218
219 // check OpenCL build options
220 bool shouldForceCR = pContext->m_Options.CorrectlyRoundedSqrt;
221
222 CompilerOpts.replaceGlobalOffsetsByZero =
223 pContext->m_InternalOptions.replaceGlobalOffsetsByZero;
224
225 CompilerOpts.SubgroupIndependentForwardProgressRequired =
226 (pContext->m_Options.NoSubgroupIFP == false);
227
228 if (OCLMajor >= 2)
229 {
230 CompilerOpts.UniformWGS =
231 pContext->m_Options.UniformWGS;
232 }
233
234 CompilerOpts.GreaterThan2GBBufferRequired =
235 !pContext->m_InternalOptions.Use32BitPtrArith;
236
237 CompilerOpts.GreaterThan4GBBufferRequired =
238 pContext->m_InternalOptions.IntelGreaterThan4GBBufferRequired;
239
240 CompilerOpts.DisableA64WA =
241 pContext->m_InternalOptions.IntelDisableA64WA;
242
243 CompilerOpts.ForceEnableA64WA =
244 pContext->m_InternalOptions.IntelForceEnableA64WA;
245
246 CompilerOpts.HasPositivePointerOffset =
247 pContext->m_InternalOptions.IntelHasPositivePointerOffset;
248
249 CompilerOpts.HasBufferOffsetArg =
250 pContext->m_InternalOptions.IntelHasBufferOffsetArg;
251
252 CompilerOpts.UseBindlessMode =
253 pContext->m_InternalOptions.UseBindlessMode;
254
255 CompilerOpts.UseLegacyBindlessMode =
256 pContext->m_InternalOptions.UseBindlessLegacyMode;
257
258 CompilerOpts.PreferBindlessImages =
259 pContext->m_InternalOptions.PreferBindlessImages ||
260 pContext->m_InternalOptions.UseBindlessMode;
261
262 if (CompilerOpts.PreferBindlessImages) {
263 pContext->getModuleMetaData()->UseBindlessImage = true;
264 }
265
266 CompilerOpts.EnableTakeGlobalAddress =
267 pContext->m_Options.EnableTakeGlobalAddress;
268
269 CompilerOpts.IsLibraryCompilation =
270 pContext->m_Options.IsLibraryCompilation;
271
272 CompilerOpts.EnableZEBinary =
273 pContext->m_InternalOptions.EnableZEBinary;
274
275 IGCPassManager mpmSPIR(pContext, "Unify");
276 #ifdef IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR
277 mpmSPIR.add(new PreprocessSPVIR());
278 #endif // IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR
279 mpmSPIR.add(new TypesLegalizationPass());
280 mpmSPIR.add(new TargetLibraryInfoWrapperPass());
281 mpmSPIR.add(createDeadCodeEliminationPass());
282 mpmSPIR.add(new MetaDataUtilsWrapper(pMdUtils, pContext->getModuleMetaData()));
283 mpmSPIR.add(new CodeGenContextWrapper(pContext));
284 mpmSPIR.add(new SPIRMetaDataTranslation());
285 mpmSPIR.run(*pContext->getModule());
286
287 bool isOptDisabled = CompilerOpts.OptDisable;
288 IGCPassManager mpm(pContext, "Unify");
289
290 // right now we don't support any standard function in the code gen
291 // maybe we want to support some at some point to take advantage of LLVM optimizations
292 TargetLibraryInfoImpl TLI;
293 TLI.disableAllFunctions();
294
295 mpm.add( new llvm::TargetLibraryInfoWrapperPass(TLI));
296
297 // This should be removed, once FE will be updated to use LLVM IR that supports
298 // AllowContract and ApproxFunc FastMathFlags.
299 mpm.add(new FixFastMathFlags());
300
301 mpm.add(new MetaDataUtilsWrapper(pMdUtils, pContext->getModuleMetaData()));
302 mpm.add(new CodeGenContextWrapper(pContext));
303
304 if (IGC_IS_FLAG_ENABLED(EnableUnmaskedFunctions))
305 {
306 mpm.add(new TransformUnmaskedFunctionsPass());
307 }
308
309 mpm.add(new ClampLoopUnroll(256));
310
311 mpm.add(new MoveStaticAllocas());
312
313 // Skip this pass if OCL version < 2.0
314 if (!(OCLMajor < 2))
315 {
316 mpm.add(createTransformBlocksPass());
317 }
318
319 // Clone kernel function being used as user function.
320 mpm.add(createKernelFunctionCloningPass());
321
322 mpm.add(new CorrectlyRoundedDivSqrt(shouldForceCR, false));
323 if(IGC_IS_FLAG_ENABLED(EnableIntelFast))
324 {
325 mpm.add(createBIFTransformsPass());
326 }
327
328 if(pContext->m_InternalOptions.KernelDebugEnable)
329 {
330 mpm.add(new ImplicitGlobalId());
331 }
332
333 if (IGC_IS_FLAG_ENABLED(EnableCodeAssumption))
334 {
335 mpm.add(new CodeAssumption());
336 }
337
338 if (pContext->m_instrTypes.hasFRem)
339 {
340 mpm.add(new HandleFRemInstructions());
341 }
342
343 mpm.add(new JointMatrixFuncsResolutionPass(pContext));
344
345 mpm.add(new PreBIImportAnalysis());
346 mpm.add(createTimeStatsCounterPass(pContext, TIME_Unify_BuiltinImport, STATS_COUNTER_START));
347 mpm.add(createBuiltInImportPass(std::move(BuiltinGenericModule), std::move(BuiltinSizeModule)));
348 mpm.add(createTimeStatsCounterPass(pContext, TIME_Unify_BuiltinImport, STATS_COUNTER_END));
349
350 if (IGC_GET_FLAG_VALUE(AllowMem2Reg))
351 {
352 mpm.add(createPromoteMemoryToRegisterPass());
353 }
354
355 mpm.add(new CatchAllLineNumber());
356
357
358 // OCL has built-ins so it always need to run inlining
359 {
360 // Estimate maximal function size in the module and disable subroutine if not profitable.
361 mpm.add(createEstimateFunctionSizePass());
362 mpm.add(createProcessFuncAttributesPass());
363 FastMathFlags Mask;
364 Mask.setFast();
365 Mask.setNoSignedZeros(false);
366 mpm.add(new SetFastMathFlags(Mask));
367
368 // Report undef references after setting func attribs for import linking
369 mpm.add(new UndefinedReferencesPass());
370
371 if (!IGC::ForceAlwaysInline())
372 {
373 int Threshold = IGC_GET_FLAG_VALUE(OCLInlineThreshold);
374 mpm.add(createFunctionInliningPass(Threshold));
375 }
376 else
377 {
378 mpm.add(createAlwaysInlinerLegacyPass());
379 }
380 // The inliner sometimes fails to delete unused functions, this cleans up the remaining mess.
381 mpm.add(createGlobalDCEPass());
382
383 // Check after GlobalDCE in case of doubles in dead functions
384 mpm.add(new ErrorCheck());
385
386 mpm.add(new LowerInvokeSIMD());
387
388 // Fix illegal argument/return types in function calls not already inlined.
389 // Structs/arrays are not allowed to be passed by value.
390 // Return types are not allowed to be more than 64-bits.
391 // This pass changes all illegal function signatures to be passed by pointer instead.
392 // NOTE: SPIR-V adaptor already handles this for struct types
393 if (pContext->m_instrTypes.hasSubroutines)
394 {
395 mpm.add(new LegalizeFunctionSignatures());
396 }
397
398 mpm.add(createProcessBuiltinMetaDataPass());
399 mpm.add(new PurgeMetaDataUtils());
400 }
401
402 // OpenCL WI + image function resolution
403
404 // OCLTODO : do another DCE that will get rid of unused WI func calls before this?
405 // We can save passing of unused implicit args from the runtime
406
407 // Adding Mem2Reg pass in order to help ImageFuncsAnalysis to identify the image arguments
408 // that the image functions operate on
409 // Clang output is: alloca --> store image func arg into allocated address -->
410 // load image arg from stored address --> call func on loaded image
411 // After Mem2Reg: call func on image func arg
412
413 mpm.add(createSROAPass());
414
415 mpm.add(new BreakConstantExpr());
416
417 if (IGC_IS_FLAG_ENABLED(EnableGASResolver))
418 {
419 // Add fix up of illegal `addrspacecast` in respect to OCL 2.0 spec.
420 mpm.add(createFixAddrSpaceCastPass());
421 mpm.add(createResolveGASPass());
422
423 if (IGC_IS_FLAG_ENABLED(EnableLowerGPCallArg))
424 mpm.add(createLowerGPCallArg());
425
426 // Run another round of constant breaking as GAS resolving may generate constants (constant address)
427 mpm.add(new BreakConstantExpr());
428 }
429
430 if (CompilerOpts.UniformWGS)
431 mpm.add(new RewriteLocalSize());
432
433 mpm.add(createSROAPass());
434 mpm.add(new BreakConstantExpr());
435
436 mpm.add(CreateFoldKnownWorkGroupSizes());
437
438 // 64-bit atomics have to be resolved before AddImplicitArgs pass as it uses
439 // local ids for spin lock initialization
440 mpm.add(new ResolveOCLAtomics());
441
442 // Run the AlignmentAnalysis pass before the passes which add implicit arguments, to ensure we do not lose load/store alignment information.
443 // For example, ProgramScopeConstantResolution will relocate the buffer's base to an i8* typed pointer.
444 mpm.add(new AlignmentAnalysis());
445
446 // Analysis passes
447 mpm.add(new WIFuncsAnalysis());
448 mpm.add(new ImageFuncsAnalysis());
449 mpm.add(new OpenCLPrintfAnalysis());
450 mpm.add(createDeadCodeEliminationPass());
451 mpm.add(new ProgramScopeConstantAnalysis());
452 mpm.add(new PrivateMemoryUsageAnalysis());
453 mpm.add(new AggregateArgumentsAnalysis());
454 mpm.add(new ExtensionFuncsAnalysis());
455 mpm.add(new ExtensionArgAnalysis());
456 mpm.add(new DeviceEnqueueFuncsAnalysis());
457 mpm.add(createGenericAddressAnalysisPass());
458
459 mpm.add(new BuiltinCallGraphAnalysis());
460
461 // Adding implicit args based on Analysis passes
462 mpm.add(new AddImplicitArgs());
463
464 // Resolution passes
465 mpm.add(new WIFuncResolution());
466 mpm.add(new OpenCLPrintfResolution());
467 mpm.add(new ResolveOCLAtomics());
468 mpm.add(new ResourceAllocator());
469 mpm.add(new SubGroupFuncsResolution());
470
471 // Run InlineLocals and GenericAddressDynamic together
472 mpm.add(new InlineLocalsResolution());
473
474 mpm.add(new WGFuncResolution());
475 mpm.add(new ResolveAggregateArguments());
476 mpm.add(new ExtensionFuncsResolution());
477 mpm.add(new DeviceEnqueueFuncsResolution());
478
479 mpm.add(createDeadCodeEliminationPass());
480
481 mpm.add(createBuiltinsConverterPass());
482
483 // check for unsupported intrinsics
484 mpm.add(new ErrorCheck());
485
486 mpm.add(new ImageFuncResolution());
487 mpm.add(new Image3dToImage2darray());
488
489 // Break down the intrinsics into smaller operations (eg. fmuladd to fmul add)
490 mpm.add(new BreakdownIntrinsicPass());
491
492 {
493 if(IGC_IS_FLAG_ENABLED(EnableConstantPromotion))
494 {
495 mpm.add(createSimplifyConstantPass());
496 mpm.add(createPromoteConstantPass());
497 }
498 mpm.add(createIGCInstructionCombiningPass());
499
500 // Instcombine can create constant expressions, which are not handled by the program scope constant resolution pass
501 mpm.add(new BreakConstantExpr());
502
503 // Run constant lowering conservatively for tests where constant
504 // objects are over-written after casting pointers in constant address
505 // space into ones in private address.
506 //
507 // NOTE: Per OpenCL C standard (both 1.2 and 2.0), that's illegal.
508 //
509 // This has to be run after instcombine to allow memcpy from GlobalVariable arrays private
510 // allocs to be optimized away.
511 mpm.add(new ProgramScopeConstantResolution(true));
512 }
513
514 // TODO: Run CheckInstrTypes after builtin import to determine if builtins have allocas.
515 mpm.add(createSROAPass());
516 mpm.add(createIGCInstructionCombiningPass());
517 // See the comment above (it's copied as is).
518 // Instcombine can create constant expressions, which are not handled by the program scope constant resolution pass.
519 // For example, in InsertDummyKernelForSymbolTablePass addresses of indirectly called functions
520 // should be processed and without BreakConstantExpr the addresses are not found.
521 mpm.add(new BreakConstantExpr());
522
523 // true means selective scalarization
524 mpm.add(createScalarizerPass(IGC_IS_FLAG_ENABLED(EnableSelectiveScalarizer)));
525
526 // Create a dummy kernel to attach the symbol table if necessary
527 // Only needed if function pointers, externally linked functions, or relocatable global variables are present
528 mpm.add(createInsertDummyKernelForSymbolTablePass());
529
530 FastMathFlags Mask;
531 Mask.setNoSignedZeros(true);
532 mpm.add(new SetFastMathFlags(Mask));
533 mpm.add(new FixResourcePtr());
534
535 if(isOptDisabled)
536 {
537 // Run additional predefined constant resolving when optimization is
538 // disabled. It's definitely a workaround so far.
539 mpm.add(createResolvePredefinedConstantPass());
540 }
541
542 mpm.add(createLowerSwitchPass());
543 mpm.add(createTypeLegalizerPass());
544 mpm.run(*pContext->getModule());
545
546 // Following functions checks whether -g option is specified.
547 // The flag is set only in SPIRMetadataTranslationPass which
548 // is run in above mpm.run statement. The downside of calling
549 // this function here, as opposed to beginning of this function,
550 // is that unreferenced constants will be eliminated. So
551 // debugger will not be able to query those variables.
552 insertOCLMissingDebugConstMetadata(pContext);
553
554 COMPILER_TIME_END(pContext, TIME_UnificationPasses);
555
556 DumpLLVMIR(pContext, "afterUnification");
557
558 MEM_SNAPSHOT(IGC::SMS_AFTER_UNIFICATION);
559 }
560
UnifyIROCL(OpenCLProgramContext * pContext,std::unique_ptr<llvm::Module> BuiltinGenericModule,std::unique_ptr<llvm::Module> BuiltinSizeModule)561 void UnifyIROCL(
562 OpenCLProgramContext* pContext,
563 std::unique_ptr<llvm::Module> BuiltinGenericModule,
564 std::unique_ptr<llvm::Module> BuiltinSizeModule)
565 {
566 CommonOCLBasedPasses(pContext, std::move(BuiltinGenericModule), std::move(BuiltinSizeModule));
567 }
568
UnifyIRSPIR(OpenCLProgramContext * pContext,std::unique_ptr<llvm::Module> BuiltinGenericModule,std::unique_ptr<llvm::Module> BuiltinSizeModule)569 void UnifyIRSPIR(
570 OpenCLProgramContext* pContext,
571 std::unique_ptr<llvm::Module> BuiltinGenericModule,
572 std::unique_ptr<llvm::Module> BuiltinSizeModule)
573 {
574 CommonOCLBasedPasses(pContext, std::move(BuiltinGenericModule), std::move(BuiltinSizeModule));
575 }
576
577 }
578