1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/Support/ScaledNumber.h>
11 #include "llvm/ADT/PostOrderIterator.h"
12 #include <llvm/IR/LLVMContext.h>
13 #include <llvm/IR/Verifier.h>
14 #include <llvm/Analysis/CFGPrinter.h>
15 #include <llvm/Analysis/Passes.h>
16 #include <llvm/Pass.h>
17 #include <llvm/IR/PassManager.h>
18 #include <llvm/Transforms/IPO.h>
19 #include <llvm/Transforms/IPO/AlwaysInliner.h>
20 #include <llvm/Transforms/Scalar.h>
21 #include <llvm/IR/Module.h>
22 #include <llvm/IR/Function.h>
23 #include <llvm/Analysis/TargetLibraryInfo.h>
24 #include <llvm/Transforms/InstCombine/InstCombineWorklist.h>
25 #include <llvm/Transforms/InstCombine/InstCombine.h>
26 
27 #include <llvmWrapper/Transforms/Utils.h>
28 
29 #include "common/LLVMWarningsPop.hpp"
30 
31 #include "AdaptorCommon/AddImplicitArgs.hpp"
32 #include "AdaptorCommon/ProcessFuncAttributes.h"
33 #include "AdaptorCommon/LegalizeFunctionSignatures.h"
34 #include "AdaptorCommon/TypesLegalizationPass.hpp"
35 #include "common/LLVMUtils.h"
36 
37 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
38 #include "Compiler/CISACodeGen/EstimateFunctionSize.h"
39 #include "Compiler/CISACodeGen/FixAddrSpaceCast.h"
40 #include "Compiler/CISACodeGen/ResolveGAS.h"
41 #include "Compiler/CISACodeGen/ResolvePredefinedConstant.h"
42 #include "Compiler/CISACodeGen/SimplifyConstant.h"
43 #include "Compiler/CISACodeGen/FoldKnownWorkGroupSizes.h"
44 
45 #include "Compiler/HandleFRemInstructions.hpp"
46 #include "Compiler/Optimizer/BuiltInFuncImport.h"
47 #include "Compiler/Optimizer/CodeAssumption.hpp"
48 #include "Compiler/Optimizer/Scalarizer.h"
49 #include "Compiler/Optimizer/OpenCLPasses/DebuggerSupport/ImplicitGIDPass.hpp"
50 #include "Compiler/Optimizer/OpenCLPasses/ExtenstionFuncs/ExtensionArgAnalysis.hpp"
51 #include "Compiler/Optimizer/OpenCLPasses/ExtenstionFuncs/ExtensionFuncsAnalysis.hpp"
52 #include "Compiler/Optimizer/OpenCLPasses/ExtenstionFuncs/ExtensionFuncResolution.hpp"
53 #include "Compiler/Optimizer/OpenCLPasses/ImageFuncs/ImageFuncsAnalysis.hpp"
54 #include "Compiler/Optimizer/OpenCLPasses/ImageFuncs/ImageFuncResolution.hpp"
55 #include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryUsageAnalysis.hpp"
56 #include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.hpp"
57 #include "Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantAnalysis.hpp"
58 #include "Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantResolution.hpp"
59 #include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncsAnalysis.hpp"
60 #include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncResolution.hpp"
61 #include "Compiler/Optimizer/OpenCLPasses/ResourceAllocator/ResourceAllocator.hpp"
62 #include "Compiler/Optimizer/OpenCLPasses/BreakConstantExpr/BreakConstantExpr.hpp"
63 #include "Compiler/Optimizer/OpenCLPasses/LocalBuffers/InlineLocalsResolution.hpp"
64 #include "Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.hpp"
65 #include "Compiler/Optimizer/OpenCLPasses/Atomics/ResolveOCLAtomics.hpp"
66 #include "Compiler/Optimizer/OpenCLPasses/WGFuncs/WGFuncResolution.hpp"
67 #include "Compiler/Optimizer/OpenCLPasses/AlignmentAnalysis/AlignmentAnalysis.hpp"
68 #include "Compiler/Optimizer/PreCompiledFuncImport.hpp"
69 #include "Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfAnalysis.hpp"
70 #include "Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfResolution.hpp"
71 #include "Compiler/Optimizer/OpenCLPasses/AggregateArguments/AggregateArguments.hpp"
72 #include "Compiler/Optimizer/OCLBIConverter.h"
73 #include "Compiler/Optimizer/OpenCLPasses/SetFastMathFlags/SetFastMathFlags.hpp"
74 #include "Compiler/Optimizer/OpenCLPasses/CorrectlyRoundedDivSqrt/CorrectlyRoundedDivSqrt.hpp"
75 #include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
76 #include "Compiler/Optimizer/OpenCLPasses/AddressSpaceAliasAnalysis/AddressSpaceAliasAnalysis.h"
77 #include "Compiler/Optimizer/OpenCLPasses/DeviceEnqueueFuncs/DeviceEnqueue.hpp"
78 #include "Compiler/Optimizer/OpenCLPasses/DeviceEnqueueFuncs/TransformBlocks.hpp"
79 #include "Compiler/Optimizer/OpenCLPasses/UndefinedReferences/UndefinedReferencesPass.hpp"
80 #include "Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.hpp"
81 #include "Compiler/Optimizer/OpenCLPasses/BIFTransforms/BIFTransforms.hpp"
82 #include "Compiler/Optimizer/OpenCLPasses/BreakdownIntrinsic.h"
83 #include "Compiler/Optimizer/OpenCLPasses/TransformUnmaskedFunctionsPass.h"
84 #include "Compiler/Optimizer/OpenCLPasses/StatelessToStatefull/StatelessToStatefull.hpp"
85 #include "Compiler/Optimizer/OpenCLPasses/KernelFunctionCloning.h"
86 #include "Compiler/Legalizer/TypeLegalizerPass.h"
87 #include "Compiler/Optimizer/OpenCLPasses/ClampLoopUnroll/ClampLoopUnroll.hpp"
88 #include "Compiler/Optimizer/OpenCLPasses/Image3dToImage2darray/Image3dToImage2darray.hpp"
89 #include "Compiler/Optimizer/OpenCLPasses/RewriteLocalSize/RewriteLocalSize.hpp"
90 #include "Compiler/MetaDataApi/PurgeMetaDataUtils.hpp"
91 #include "Compiler/MetaDataUtilsWrapper.h"
92 #include "Compiler/SPIRMetaDataTranslation.h"
93 #include "Compiler/Optimizer/OpenCLPasses/ErrorCheckPass.h"
94 #include "Compiler/Optimizer/OpenCLPasses/JointMatrixFuncsResolutionPass.h"
95 #include "Compiler/MetaDataApi/IGCMetaDataHelper.h"
96 #include "Compiler/CodeGenContextWrapper.hpp"
97 #include "Compiler/FixResourcePtr.hpp"
98 #include "Compiler/InitializePasses.h"
99 #include "Compiler/MetaDataApi/SpirMetaDataApi.h"
100 #include "Compiler/Optimizer/FixFastMathFlags.hpp"
101 #include "Compiler/CustomUnsafeOptPass.hpp"
102 #include "MoveStaticAllocas.h"
103 #include "PreprocessSPVIR.h"
104 #include "LowerInvokeSIMD.hpp"
105 #include "Compiler/Optimizer/IGCInstCombiner/IGCInstructionCombining.hpp"
106 
107 #include "common/debug/Debug.hpp"
108 #include "common/igc_regkeys.hpp"
109 #include "common/debug/Dump.hpp"
110 #include "common/MemStats.h"
111 
112 #include <iStdLib/utility.h>
113 
114 #include "Compiler/CISACodeGen/DebugInfo.hpp"
115 #include "Compiler/CISACodeGen/TimeStatsCounter.h"
116 #include "Compiler/DebugInfo/ScalarVISAModule.h"
117 #include "Compiler/DebugInfo/Utils.h"
118 #include "DebugInfo/VISADebugEmitter.hpp"
119 
120 #include <string>
121 #include <algorithm>
122 
123 
124 #include <Metrics/IGCMetric.h>
125 
126 
127 
128 using namespace llvm;
129 using namespace IGC::IGCMD;
130 using namespace IGC::Debug;
131 
132 namespace IGC
133 {
getOCLMajorVersion(const SPIRMD::SpirMetaDataUtils & spirMDUtils)134     int getOCLMajorVersion(const SPIRMD::SpirMetaDataUtils &spirMDUtils)
135     {
136         int oclMajor = 0, oclMinor = 0;
137         if (spirMDUtils.isOpenCLVersionsHasValue())
138         {
139             SPIRMD::VersionMetaDataHandle oclVersion = spirMDUtils.getOpenCLVersionsItem(0);
140             oclMajor = oclVersion->getMajor();
141             oclMinor = oclVersion->getMinor();
142         }
143         else
144         {
145             if (!spirMDUtils.empty_CompilerOptions())
146             {
147                 // check compiler options
148                 for (auto i = spirMDUtils.getCompilerOptionsItem(0)->begin(), e = spirMDUtils.getCompilerOptionsItem(0)->end(); i != e; ++i)
149                 {
150                     if (StringRef(*i).startswith("-cl-std=CL") && i->length() >= 13)
151                     {
152                         oclMajor = i->at(10) - '0';
153                         oclMinor = i->at(12) - '0';
154                         break;
155                     }
156                 }
157             }
158             // default is 1.2
159             if (!isLegalOCLVersion(oclMajor, oclMinor))
160             {
161                 oclMajor = 1;
162                 oclMinor = 2;
163             }
164         }
165         return oclMajor;
166     }
167 
CommonOCLBasedPasses(OpenCLProgramContext * pContext,std::unique_ptr<llvm::Module> BuiltinGenericModule,std::unique_ptr<llvm::Module> BuiltinSizeModule)168 static void CommonOCLBasedPasses(
169     OpenCLProgramContext* pContext,
170     std::unique_ptr<llvm::Module> BuiltinGenericModule,
171     std::unique_ptr<llvm::Module> BuiltinSizeModule)
172 {
173 #if defined( _DEBUG )
174     llvm::verifyModule(*pContext->getModule());
175 #endif
176 
177     COMPILER_TIME_START(pContext, TIME_UnificationPasses);
178 
179     pContext->metrics.Init(&pContext->hash,
180         pContext->getModule()->getNamedMetadata("llvm.dbg.cu") != nullptr);
181     pContext->metrics.CollectFunctions(pContext->getModule());
182 
183     unify_opt_PreProcess(pContext);
184 
185     DumpLLVMIR(pContext, "beforeUnification");
186 
187     // override the data layout to match Gen HW
188     int pointerSize = getPointerSize(*pContext->getModule());
189     std::string layoutstr;
190     if (pointerSize == 4)
191     {
192         layoutstr = "e-p:32:32:32";
193     }
194     else {
195         layoutstr = "e-p:64:64:64";
196     }
197     layoutstr += "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"\
198         "-f32:32:32-f64:64:64-v16:16:16-v24:32:32"\
199         "-v32:32:32-v48:64:64-v64:64:64-v96:128:128"\
200         "-v128:128:128-v192:256:256-v256:256:256"\
201         "-v512:512:512-v1024:1024:1024-n8:16:32";
202 
203     StringRef dataLayout = layoutstr;
204     pContext->getModule()->setDataLayout(dataLayout);
205     BuiltinGenericModule->setDataLayout(dataLayout);
206     if( BuiltinSizeModule )
207     {
208         BuiltinSizeModule->setDataLayout(dataLayout);
209     }
210 
211     MetaDataUtils *pMdUtils = pContext->getMetaDataUtils();
212 
213     //extracting OCL version major before SPIRMetadataTranslation pass deletes its metadata node
214     const SPIRMD::SpirMetaDataUtils spirMDUtils(&(*pContext->getModule()));
215     int OCLMajor = getOCLMajorVersion(spirMDUtils);
216 
217     CompOptions &CompilerOpts = pContext->getModuleMetaData()->compOpt;
218 
219     // check OpenCL build options
220     bool shouldForceCR = pContext->m_Options.CorrectlyRoundedSqrt;
221 
222     CompilerOpts.replaceGlobalOffsetsByZero =
223         pContext->m_InternalOptions.replaceGlobalOffsetsByZero;
224 
225     CompilerOpts.SubgroupIndependentForwardProgressRequired =
226         (pContext->m_Options.NoSubgroupIFP == false);
227 
228     if (OCLMajor >= 2)
229     {
230         CompilerOpts.UniformWGS =
231             pContext->m_Options.UniformWGS;
232     }
233 
234     CompilerOpts.GreaterThan2GBBufferRequired =
235         !pContext->m_InternalOptions.Use32BitPtrArith;
236 
237     CompilerOpts.GreaterThan4GBBufferRequired =
238         pContext->m_InternalOptions.IntelGreaterThan4GBBufferRequired;
239 
240     CompilerOpts.DisableA64WA =
241         pContext->m_InternalOptions.IntelDisableA64WA;
242 
243     CompilerOpts.ForceEnableA64WA =
244         pContext->m_InternalOptions.IntelForceEnableA64WA;
245 
246     CompilerOpts.HasPositivePointerOffset =
247         pContext->m_InternalOptions.IntelHasPositivePointerOffset;
248 
249     CompilerOpts.HasBufferOffsetArg =
250         pContext->m_InternalOptions.IntelHasBufferOffsetArg;
251 
252     CompilerOpts.UseBindlessMode =
253         pContext->m_InternalOptions.UseBindlessMode;
254 
255     CompilerOpts.UseLegacyBindlessMode =
256         pContext->m_InternalOptions.UseBindlessLegacyMode;
257 
258     CompilerOpts.PreferBindlessImages =
259         pContext->m_InternalOptions.PreferBindlessImages ||
260         pContext->m_InternalOptions.UseBindlessMode;
261 
262     if (CompilerOpts.PreferBindlessImages) {
263         pContext->getModuleMetaData()->UseBindlessImage = true;
264     }
265 
266     CompilerOpts.EnableTakeGlobalAddress =
267         pContext->m_Options.EnableTakeGlobalAddress;
268 
269     CompilerOpts.IsLibraryCompilation =
270         pContext->m_Options.IsLibraryCompilation;
271 
272     CompilerOpts.EnableZEBinary =
273         pContext->m_InternalOptions.EnableZEBinary;
274 
275     IGCPassManager mpmSPIR(pContext, "Unify");
276 #ifdef IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR
277     mpmSPIR.add(new PreprocessSPVIR());
278 #endif // IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR
279     mpmSPIR.add(new TypesLegalizationPass());
280     mpmSPIR.add(new TargetLibraryInfoWrapperPass());
281     mpmSPIR.add(createDeadCodeEliminationPass());
282     mpmSPIR.add(new MetaDataUtilsWrapper(pMdUtils, pContext->getModuleMetaData()));
283     mpmSPIR.add(new CodeGenContextWrapper(pContext));
284     mpmSPIR.add(new SPIRMetaDataTranslation());
285     mpmSPIR.run(*pContext->getModule());
286 
287     bool isOptDisabled = CompilerOpts.OptDisable;
288     IGCPassManager mpm(pContext, "Unify");
289 
290     // right now we don't support any standard function in the code gen
291     // maybe we want to support some at some point to take advantage of LLVM optimizations
292     TargetLibraryInfoImpl TLI;
293     TLI.disableAllFunctions();
294 
295     mpm.add( new llvm::TargetLibraryInfoWrapperPass(TLI));
296 
297     // This should be removed, once FE will be updated to use LLVM IR that supports
298     // AllowContract and ApproxFunc FastMathFlags.
299     mpm.add(new FixFastMathFlags());
300 
301     mpm.add(new MetaDataUtilsWrapper(pMdUtils, pContext->getModuleMetaData()));
302     mpm.add(new CodeGenContextWrapper(pContext));
303 
304     if (IGC_IS_FLAG_ENABLED(EnableUnmaskedFunctions))
305     {
306         mpm.add(new TransformUnmaskedFunctionsPass());
307     }
308 
309     mpm.add(new ClampLoopUnroll(256));
310 
311     mpm.add(new MoveStaticAllocas());
312 
313     // Skip this pass if OCL version < 2.0
314     if (!(OCLMajor < 2))
315     {
316         mpm.add(createTransformBlocksPass());
317     }
318 
319     // Clone kernel function being used as user function.
320     mpm.add(createKernelFunctionCloningPass());
321 
322     mpm.add(new CorrectlyRoundedDivSqrt(shouldForceCR, false));
323     if(IGC_IS_FLAG_ENABLED(EnableIntelFast))
324     {
325         mpm.add(createBIFTransformsPass());
326     }
327 
328     if(pContext->m_InternalOptions.KernelDebugEnable)
329     {
330         mpm.add(new ImplicitGlobalId());
331     }
332 
333     if (IGC_IS_FLAG_ENABLED(EnableCodeAssumption))
334     {
335         mpm.add(new CodeAssumption());
336     }
337 
338     if (pContext->m_instrTypes.hasFRem)
339     {
340         mpm.add(new HandleFRemInstructions());
341     }
342 
343     mpm.add(new JointMatrixFuncsResolutionPass(pContext));
344 
345     mpm.add(new PreBIImportAnalysis());
346     mpm.add(createTimeStatsCounterPass(pContext, TIME_Unify_BuiltinImport, STATS_COUNTER_START));
347     mpm.add(createBuiltInImportPass(std::move(BuiltinGenericModule), std::move(BuiltinSizeModule)));
348     mpm.add(createTimeStatsCounterPass(pContext, TIME_Unify_BuiltinImport, STATS_COUNTER_END));
349 
350     if (IGC_GET_FLAG_VALUE(AllowMem2Reg))
351     {
352         mpm.add(createPromoteMemoryToRegisterPass());
353     }
354 
355     mpm.add(new CatchAllLineNumber());
356 
357 
358     // OCL has built-ins so it always need to run inlining
359     {
360         // Estimate maximal function size in the module and disable subroutine if not profitable.
361         mpm.add(createEstimateFunctionSizePass());
362         mpm.add(createProcessFuncAttributesPass());
363         FastMathFlags Mask;
364         Mask.setFast();
365         Mask.setNoSignedZeros(false);
366         mpm.add(new SetFastMathFlags(Mask));
367 
368         // Report undef references after setting func attribs for import linking
369         mpm.add(new UndefinedReferencesPass());
370 
371         if (!IGC::ForceAlwaysInline())
372         {
373             int Threshold = IGC_GET_FLAG_VALUE(OCLInlineThreshold);
374             mpm.add(createFunctionInliningPass(Threshold));
375         }
376         else
377         {
378             mpm.add(createAlwaysInlinerLegacyPass());
379         }
380         // The inliner sometimes fails to delete unused functions, this cleans up the remaining mess.
381         mpm.add(createGlobalDCEPass());
382 
383         // Check after GlobalDCE in case of doubles in dead functions
384         mpm.add(new ErrorCheck());
385 
386         mpm.add(new LowerInvokeSIMD());
387 
388         // Fix illegal argument/return types in function calls not already inlined.
389         // Structs/arrays are not allowed to be passed by value.
390         // Return types are not allowed to be more than 64-bits.
391         // This pass changes all illegal function signatures to be passed by pointer instead.
392         // NOTE: SPIR-V adaptor already handles this for struct types
393         if (pContext->m_instrTypes.hasSubroutines)
394         {
395             mpm.add(new LegalizeFunctionSignatures());
396         }
397 
398         mpm.add(createProcessBuiltinMetaDataPass());
399         mpm.add(new PurgeMetaDataUtils());
400     }
401 
402     // OpenCL WI + image function resolution
403 
404     // OCLTODO : do another DCE that will get rid of unused WI func calls before this?
405     // We can save passing of unused implicit args from the runtime
406 
407     // Adding Mem2Reg pass in order to help ImageFuncsAnalysis to identify the image arguments
408     // that the image functions operate on
409     // Clang output is: alloca --> store image func arg into allocated address -->
410     //                  load image arg from stored address --> call func on loaded image
411     // After Mem2Reg: call func on image func arg
412 
413     mpm.add(createSROAPass());
414 
415     mpm.add(new BreakConstantExpr());
416 
417     if (IGC_IS_FLAG_ENABLED(EnableGASResolver))
418     {
419         // Add fix up of illegal `addrspacecast` in respect to OCL 2.0 spec.
420         mpm.add(createFixAddrSpaceCastPass());
421         mpm.add(createResolveGASPass());
422 
423         if (IGC_IS_FLAG_ENABLED(EnableLowerGPCallArg))
424             mpm.add(createLowerGPCallArg());
425 
426         // Run another round of constant breaking as GAS resolving may generate constants (constant address)
427         mpm.add(new BreakConstantExpr());
428     }
429 
430     if (CompilerOpts.UniformWGS)
431         mpm.add(new RewriteLocalSize());
432 
433     mpm.add(createSROAPass());
434     mpm.add(new BreakConstantExpr());
435 
436     mpm.add(CreateFoldKnownWorkGroupSizes());
437 
438     // 64-bit atomics have to be resolved before AddImplicitArgs pass as it uses
439     // local ids for spin lock initialization
440     mpm.add(new ResolveOCLAtomics());
441 
442     // Run the AlignmentAnalysis pass before the passes which add implicit arguments, to ensure we do not lose load/store alignment information.
443     // For example, ProgramScopeConstantResolution will relocate the buffer's base to an i8* typed pointer.
444     mpm.add(new AlignmentAnalysis());
445 
446     // Analysis passes
447     mpm.add(new WIFuncsAnalysis());
448     mpm.add(new ImageFuncsAnalysis());
449     mpm.add(new OpenCLPrintfAnalysis());
450     mpm.add(createDeadCodeEliminationPass());
451     mpm.add(new ProgramScopeConstantAnalysis());
452     mpm.add(new PrivateMemoryUsageAnalysis());
453     mpm.add(new AggregateArgumentsAnalysis());
454     mpm.add(new ExtensionFuncsAnalysis());
455     mpm.add(new ExtensionArgAnalysis());
456     mpm.add(new DeviceEnqueueFuncsAnalysis());
457     mpm.add(createGenericAddressAnalysisPass());
458 
459     mpm.add(new BuiltinCallGraphAnalysis());
460 
461     // Adding implicit args based on Analysis passes
462     mpm.add(new AddImplicitArgs());
463 
464     // Resolution passes
465     mpm.add(new WIFuncResolution());
466     mpm.add(new OpenCLPrintfResolution());
467     mpm.add(new ResolveOCLAtomics());
468     mpm.add(new ResourceAllocator());
469     mpm.add(new SubGroupFuncsResolution());
470 
471     // Run InlineLocals and GenericAddressDynamic together
472     mpm.add(new InlineLocalsResolution());
473 
474     mpm.add(new WGFuncResolution());
475     mpm.add(new ResolveAggregateArguments());
476     mpm.add(new ExtensionFuncsResolution());
477     mpm.add(new DeviceEnqueueFuncsResolution());
478 
479     mpm.add(createDeadCodeEliminationPass());
480 
481     mpm.add(createBuiltinsConverterPass());
482 
483     // check for unsupported intrinsics
484     mpm.add(new ErrorCheck());
485 
486     mpm.add(new ImageFuncResolution());
487     mpm.add(new Image3dToImage2darray());
488 
489     // Break down the intrinsics into smaller operations (eg. fmuladd to fmul add)
490     mpm.add(new BreakdownIntrinsicPass());
491 
492     {
493         if(IGC_IS_FLAG_ENABLED(EnableConstantPromotion))
494         {
495             mpm.add(createSimplifyConstantPass());
496             mpm.add(createPromoteConstantPass());
497         }
498         mpm.add(createIGCInstructionCombiningPass());
499 
500         // Instcombine can create constant expressions, which are not handled by the program scope constant resolution pass
501         mpm.add(new BreakConstantExpr());
502 
503         // Run constant lowering conservatively for tests where constant
504         // objects are over-written after casting pointers in constant address
505         // space into ones in private address.
506         //
507         // NOTE: Per OpenCL C standard (both 1.2 and 2.0), that's illegal.
508         //
509         // This has to be run after instcombine to allow memcpy from GlobalVariable arrays private
510         // allocs to be optimized away.
511         mpm.add(new ProgramScopeConstantResolution(true));
512     }
513 
514     // TODO: Run CheckInstrTypes after builtin import to determine if builtins have allocas.
515     mpm.add(createSROAPass());
516     mpm.add(createIGCInstructionCombiningPass());
517     // See the comment above (it's copied as is).
518     // Instcombine can create constant expressions, which are not handled by the program scope constant resolution pass.
519     // For example, in InsertDummyKernelForSymbolTablePass addresses of indirectly called functions
520     // should be processed and without BreakConstantExpr the addresses are not found.
521     mpm.add(new BreakConstantExpr());
522 
523     // true means selective scalarization
524     mpm.add(createScalarizerPass(IGC_IS_FLAG_ENABLED(EnableSelectiveScalarizer)));
525 
526     // Create a dummy kernel to attach the symbol table if necessary
527     // Only needed if function pointers, externally linked functions, or relocatable global variables are present
528     mpm.add(createInsertDummyKernelForSymbolTablePass());
529 
530     FastMathFlags Mask;
531     Mask.setNoSignedZeros(true);
532     mpm.add(new SetFastMathFlags(Mask));
533     mpm.add(new FixResourcePtr());
534 
535     if(isOptDisabled)
536     {
537         // Run additional predefined constant resolving when optimization is
538         // disabled. It's definitely a workaround so far.
539         mpm.add(createResolvePredefinedConstantPass());
540     }
541 
542     mpm.add(createLowerSwitchPass());
543     mpm.add(createTypeLegalizerPass());
544     mpm.run(*pContext->getModule());
545 
546     // Following functions checks whether -g option is specified.
547     // The flag is set only in SPIRMetadataTranslationPass which
548     // is run in above mpm.run statement. The downside of calling
549     // this function here, as opposed to beginning of this function,
550     // is that unreferenced constants will be eliminated. So
551     // debugger will not be able to query those variables.
552     insertOCLMissingDebugConstMetadata(pContext);
553 
554     COMPILER_TIME_END(pContext, TIME_UnificationPasses);
555 
556     DumpLLVMIR(pContext, "afterUnification");
557 
558     MEM_SNAPSHOT(IGC::SMS_AFTER_UNIFICATION);
559 }
560 
UnifyIROCL(OpenCLProgramContext * pContext,std::unique_ptr<llvm::Module> BuiltinGenericModule,std::unique_ptr<llvm::Module> BuiltinSizeModule)561 void UnifyIROCL(
562     OpenCLProgramContext* pContext,
563     std::unique_ptr<llvm::Module> BuiltinGenericModule,
564     std::unique_ptr<llvm::Module> BuiltinSizeModule)
565 {
566     CommonOCLBasedPasses(pContext, std::move(BuiltinGenericModule), std::move(BuiltinSizeModule));
567 }
568 
UnifyIRSPIR(OpenCLProgramContext * pContext,std::unique_ptr<llvm::Module> BuiltinGenericModule,std::unique_ptr<llvm::Module> BuiltinSizeModule)569 void UnifyIRSPIR(
570     OpenCLProgramContext* pContext,
571     std::unique_ptr<llvm::Module> BuiltinGenericModule,
572     std::unique_ptr<llvm::Module> BuiltinSizeModule)
573 {
574     CommonOCLBasedPasses(pContext, std::move(BuiltinGenericModule), std::move(BuiltinSizeModule));
575 }
576 
577 }
578