1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "Compiler/MetaDataUtilsWrapper.h"
12 #include "Compiler/CodeGenContextWrapper.hpp"
13 #include "Compiler/CISACodeGen/TranslationTable.hpp"
14 
15 #include "common/LLVMWarningsPush.hpp"
16 #include <llvmWrapper/IR/InstrTypes.h>
17 #include <llvm/Pass.h>
18 #include <llvm/IR/Function.h>
19 #include <llvm/IR/Instructions.h>
20 #include <llvm/IR/IntrinsicInst.h>
21 #include <llvm/Support/raw_ostream.h>
22 #include <llvm/ADT/Statistic.h>
23 #include <llvm/ADT/DenseMap.h>
24 #include <llvm/ADT/DenseSet.h>
25 #include <llvm/ADT/SmallSet.h>
26 #include <llvm/IR/Module.h>
27 #include <llvm/IR/Value.h>
28 #include <llvm/IR/InstIterator.h>
29 #include <llvm/IR/Dominators.h>
30 #include <llvm/Analysis/PostDominators.h>
31 #include "common/LLVMWarningsPop.hpp"
32 
33 
34 #ifdef OCL_SPECIFIC
35 #include "RuntimeServices.h"
36 #include "SoaAllocaAnalysis.h"
37 #include "Logger.h"
38 #endif
39 
40 #include <vector>
41 
42 namespace IGC
43 {
44     class BranchInfo;
45     class WIAnalysis;
46 
47     //This is a trick, since we cannot forward-declare enums embedded in class definitions.
48     // The better solution is to completely hoist-out the WIDependency enum into a separate enum class
49     // (c++ 11) and have it separate from WIAnalysis pass class. Nevertheless, that would require
50     // updating many places in the current CodeGen code...
51     // thus: WIAnalysis::WIDependancy ~ WIBaseClass::WIDependancy, the types are equal and do not require
52     // conversion
53     class WIBaseClass
54     {
55     public:
56         /// @brief describes the type of dependency on the work item
57         enum WIDependancy : uint8_t {
58             UNIFORM_GLOBAL    = 0,  /// Same for all work-items within a shader.
59             UNIFORM_WORKGROUP = 1,  /// Same for all work-items within a work group (compute).
60             UNIFORM_THREAD    = 2,  /// Same for all work-items within a HW thread.
61             CONSECUTIVE       = 3,  /// Elements are consecutive
62             PTR_CONSECUTIVE   = 4,  /// Elements are pointers which are consecutive
63             STRIDED           = 5,  /// Elements are in strides
64             RANDOM            = 6,  /// Unknown or non consecutive order
65             NumDeps           = 7,  /// Overall amount of dependencies
66             INVALID           = 8
67         };
68     };
69 
70     // Provide FastValueMapAttributeInfo for WIDependancy.
71     template<> struct FastValueMapAttributeInfo<WIBaseClass::WIDependancy> {
getEmptyAttributeIGC::FastValueMapAttributeInfo72         static inline WIBaseClass::WIDependancy getEmptyAttribute() { return WIBaseClass::INVALID; }
73     };
74 
75     class WIAnalysisRunner
76     {
77     public:
78         void init(
79             llvm::Function* F,
80             llvm::DominatorTree* DT,
81             llvm::PostDominatorTree* PDT,
82             IGCMD::MetaDataUtils* MDUtils,
83             CodeGenContext* CGCtx,
84             ModuleMetaData* ModMD,
85             TranslationTable* TransTable);
86 
WIAnalysisRunner(llvm::Function * F,llvm::DominatorTree * DT,llvm::PostDominatorTree * PDT,IGCMD::MetaDataUtils * MDUtils,CodeGenContext * CGCtx,ModuleMetaData * ModMD,TranslationTable * TransTable)87         WIAnalysisRunner(
88             llvm::Function* F,
89             llvm::DominatorTree* DT,
90             llvm::PostDominatorTree* PDT,
91             IGCMD::MetaDataUtils* MDUtils,
92             CodeGenContext* CGCtx,
93             ModuleMetaData* ModMD,
94             TranslationTable* TransTable)
95         {
96             init(F, DT, PDT, MDUtils, CGCtx, ModMD, TransTable);
97         }
98 
WIAnalysisRunner()99         WIAnalysisRunner() {}
~WIAnalysisRunner()100         ~WIAnalysisRunner() {}
101 
102         bool run();
103 
104         /// @brief Returns the type of dependency the instruction has on
105         /// the work-item
106         /// @param val llvm::Value to test
107         /// @return Dependency kind
108         WIBaseClass::WIDependancy whichDepend(const llvm::Value* val) const;
109 
110         /// @brief Returns True if 'val' is uniform
111         /// @param val llvm::Value to test
112         bool isUniform(const llvm::Value* val) const;
113         bool isWorkGroupOrGlobalUniform(const llvm::Value* val) const;
114         bool isGlobalUniform(const llvm::Value* val) const;
115 
116         /// incremental update of the dep-map on individual value
117         /// without propagation. Exposed for later pass.
incUpdateDepend(const llvm::Value * val,WIBaseClass::WIDependancy dep)118         void incUpdateDepend(const llvm::Value* val, WIBaseClass::WIDependancy dep)
119         {
120             m_depMap.SetAttribute(val, dep);
121         }
122 
123         /// check if a value is defined inside divergent control-flow
insideDivergentCF(const llvm::Value * val) const124         bool insideDivergentCF(const llvm::Value* val) const
125         {
126             return(llvm::isa<llvm::Instruction>(val) &&
127                 m_ctrlBranches.find(llvm::cast<llvm::Instruction>(val)->getParent()) != m_ctrlBranches.end());
128         }
129 
130         /// check if a value is defined inside workgroup divergent control-flow.
131         /// This will return false if the value is in the influence region
132         /// of only global and workgroup uniform branches.
133         bool insideWorkgroupDivergentCF(const llvm::Value* val) const;
134 
releaseMemory()135         void releaseMemory()
136         {
137             m_ctrlBranches.clear();
138             m_changed1.clear();
139             m_changed2.clear();
140             m_allocaDepMap.clear();
141             m_storeDepMap.clear();
142             m_depMap.clear();
143             m_forcedUniforms.clear();
144         }
145 
146         /// print - print m_deps in human readable form
147         void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const;
148 
149         /// dump - Dump the m_deps to a file.
150         void dump() const;
151 
152         // helper for dumping WI info into files with lock
153         void lock_print();
154 
155     private:
156         WIBaseClass::WIDependancy getCFDependency(const llvm::BasicBlock* BB) const;
157 
158         struct AllocaDep
159         {
160             std::vector<const llvm::StoreInst*> stores;
161             std::vector<const llvm::IntrinsicInst*> lifetimes;
162             bool assume_uniform;
163         };
164 
165         /// @brief Update dependency relations between all values
166         void updateDeps();
167 
168         /// @brief mark the arguments dependency based on the metadata set
169         void updateArgsDependency(llvm::Function* pF);
170 
171         /*! \name Dependency Calculation Functions
172          *  \{ */
173          /// @brief Calculate the dependency type for the instruction
174          /// @param inst Instruction to inspect
175          /// @return Type of dependency.
176         void calculate_dep(const llvm::Value* val);
177         WIBaseClass::WIDependancy calculate_dep(const llvm::BinaryOperator* inst);
178         WIBaseClass::WIDependancy calculate_dep(const llvm::CallInst* inst);
179         WIBaseClass::WIDependancy calculate_dep(const llvm::GetElementPtrInst* inst);
180         WIBaseClass::WIDependancy calculate_dep(const llvm::PHINode* inst);
181         WIBaseClass::WIDependancy calculate_dep(const llvm::SelectInst* inst);
182         WIBaseClass::WIDependancy calculate_dep(const llvm::AllocaInst* inst);
183         WIBaseClass::WIDependancy calculate_dep(const llvm::CastInst* inst);
184         WIBaseClass::WIDependancy calculate_dep(const llvm::VAArgInst* inst);
185         WIBaseClass::WIDependancy calculate_dep(const llvm::LoadInst* inst);
186 
187         WIBaseClass::WIDependancy calculate_dep_terminator(const IGCLLVM::TerminatorInst* inst);
188         /*! \} */
189 
190         /// @brief do the trivial checking WI-dep
191         /// @param I instruction to check
192         /// @return Dependency type. Returns Uniform if all operands are
193         ///         Uniform, Random otherwise
194         WIBaseClass::WIDependancy calculate_dep_simple(const llvm::Instruction* I);
195 
196         /// @brief update the WI-dep from a divergent branch,
197         ///        affected instructions are added to m_pChangedNew
198         /// @param the divergent branch
199         void update_cf_dep(const IGCLLVM::TerminatorInst* TI);
200 
201         /// @brief update the WI-dep for a sequence of insert-elements forming a vector
202         ///        affected instructions are added to m_pChangedNew
203         /// @param the insert-element instruction
204         void updateInsertElements(const llvm::InsertElementInst* inst);
205 
206         /// @check phi divergence at a join-blk due to a divergent branch
207         void updatePHIDepAtJoin(llvm::BasicBlock* blk, BranchInfo* brInfo);
208 
209         void updateDepMap(const llvm::Instruction* inst, WIBaseClass::WIDependancy dep);
210 
211         /// @brief Provide known dependency type for requested value
212         /// @param val llvm::Value to examine
213         /// @return Dependency type. Returns Uniform for unknown type
214         WIBaseClass::WIDependancy getDependency(const llvm::Value* val);
215 
216         /// @brief return true if there is calculated dependency type for requested value
217         /// @param val llvm::Value to examine
218         /// @return true if value has dependency type, false otherwise.
219         bool hasDependency(const llvm::Value* val) const;
220 
221         /// @brief return true if all uses of this value are marked RANDOM
222         bool allUsesRandom(const llvm::Value* val);
223 
224         /// @brief return true if any of the use require the value to be uniform
225         bool needToBeUniform(const llvm::Value* val);
226 
227         /// @brief return true is the instruction is simple and making it random is cheap
228         bool isInstructionSimple(const llvm::Instruction* inst);
229 
230         /// @brief return true if all the source operands are defined outside the region
231         bool isRegionInvariant(const llvm::Instruction* inst, BranchInfo* brInfo, unsigned level);
232 
233         /// @brief update dependency structure for Alloca
234         bool TrackAllocaDep(const llvm::Value* I, AllocaDep& dep);
235 
236         void checkLocalIdUniform(
237             llvm::Function* F,
238             bool& IsLxUniform,
239             bool& IsLyUniform,
240             bool& IsLzUniform);
241 
242     private:
243 #ifdef OCL_SPECIFIC
244         // @brief pointer to Soa alloca analysis performed for this function
245         SoaAllocaAnalysis* m_soaAllocaAnalysis;
246         /// Runtime services pointer
247         RuntimeServices* m_rtServices;
248 #endif
249 
250         /// The WIAnalysis follows pointer arithmetic
251         ///  and Index arithmetic when calculating dependency
252         ///  properties. If a part of the index is lost due to
253         ///  a transformation, it is acceptable.
254         ///  This constant decides how many bits need to be
255         ///  preserved before we give up on the analysis.
256         static const unsigned int MinIndexBitwidthToPreserve;
257 
258         /// Stores an updated list of all dependencies
259         /// for each block, store the list of diverging branches that affect it
260         llvm::DenseMap<const llvm::BasicBlock*, llvm::SmallPtrSet<const llvm::Instruction*, 4>> m_ctrlBranches;
261 
262         /// Iteratively one set holds the changed from the previous iteration and
263         /// the other holds the new changed values from the current iteration.
264         std::vector<const llvm::Value*> m_changed1;
265         std::vector<const llvm::Value*> m_changed2;
266         /// ptr to m_changed1, m_changed2
267         std::vector<const llvm::Value*>* m_pChangedOld;
268         std::vector<const llvm::Value*>* m_pChangedNew;
269 
270         /// <summary>
271         ///  hold the vector-defs that are promoted from an uniform alloca
272         ///  therefore, need to be forced into uniform no matter what.
273         /// </summary>
274         std::vector<const llvm::Value*> m_forcedUniforms;
275 
276         llvm::Function* m_func;
277         llvm::DominatorTree* DT;
278         llvm::PostDominatorTree* PDT;
279         IGC::IGCMD::MetaDataUtils* m_pMdUtils;
280         IGC::CodeGenContext* m_CGCtx;
281         IGC::ModuleMetaData* m_ModMD;
282         IGC::TranslationTable* m_TT;
283 
284         // Allow access to all the store into an alloca if we were able to track it
285         llvm::DenseMap<const llvm::AllocaInst*, AllocaDep> m_allocaDepMap;
286         // reverse map to allow to know what alloca to update when store changes
287         llvm::DenseMap<const llvm::StoreInst*, const llvm::AllocaInst*> m_storeDepMap;
288 
289         IGC::FastValueMap<WIBaseClass::WIDependancy, FastValueMapAttributeInfo<WIBaseClass::WIDependancy>> m_depMap;
290 
291         // For dumpping WIA info per each invocation
292         static llvm::DenseMap<const llvm::Function*, int> m_funcInvocationId;
293     };
294 
295     /// @brief Work Item Analysis class used to provide information on
296     ///  individual instructions. The analysis class detects values which
297     ///  depend in work-item and describe their dependency.
298     ///  The algorithm used is recursive and new instructions are updated
299     ///  according to their operands (which are already calculated).
300     ///  original code for OCL vectorizer
301     ///
302     class WIAnalysis : public llvm::FunctionPass, public WIBaseClass
303     {
304     public:
305         static char ID; // Pass identification, replacement for typeid
306 
307         WIAnalysis();
308 
~WIAnalysis()309         ~WIAnalysis() {}
310 
311         /// @brief Provides name of pass
getPassName() const312         llvm::StringRef getPassName() const override
313         {
314             return "WIAnalysis";
315         }
316 
getAnalysisUsage(llvm::AnalysisUsage & AU) const317         void getAnalysisUsage(llvm::AnalysisUsage& AU) const override
318         {
319             // Analysis pass preserve all
320             AU.setPreservesAll();
321 #ifdef OCL_SPECIFIC
322             AU.addRequired<SoaAllocaAnalysis>();
323 #endif
324             AU.addRequired<llvm::DominatorTreeWrapperPass>();
325             AU.addRequired<llvm::PostDominatorTreeWrapperPass>();
326             AU.addRequired<MetaDataUtilsWrapper>();
327             AU.addRequired<CodeGenContextWrapper>();
328             AU.addRequired<TranslationTable>();
329         }
330 
331         /// @brief LLVM llvm::Function pass entry
332         /// @param F llvm::Function to transform
333         /// @return True if changed
334         bool runOnFunction(llvm::Function& F) override;
335 
336         /// print - print m_deps in human readable form
337         void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const override;
338 
339         /// dump - Dump the m_deps to dbgs().
340         void dump() const;
341     public:
342         /// @brief Returns the type of dependency the instruction has on
343         /// the work-item
344         /// @param val llvm::Value to test
345         /// @return Dependency kind
346         WIDependancy whichDepend(const llvm::Value* val);
347 
348         /// @brief Returns True if 'val' is uniform
349         /// @param val llvm::Value to test
350         bool isUniform(const llvm::Value* val) const;  // Return true for any uniform
351         bool isWorkGroupOrGlobalUniform(const llvm::Value* val);
352         bool isGlobalUniform(const llvm::Value* val);
353 
354         /// incremental update of the dep-map on individual value
355         /// without propagation. Exposed for later pass.
356         void incUpdateDepend(const llvm::Value* val, WIDependancy dep);
357 
358         /// check if a value is defined inside divergent control-flow
359         bool insideDivergentCF(const llvm::Value* val) const;
360 
361         /// check if a value is defined inside workgroup divergent control-flow
362         /// This will return false if the value is in the influence region
363         /// of only global and workgroup uniform branches.
364         bool insideWorkgroupDivergentCF(const llvm::Value* val) const;
365 
releaseMemory()366         void releaseMemory() override
367         {
368             Runner.releaseMemory();
369         }
370 
371 
372         /// Return true if Dep is any of uniform dependancy.
isDepUniform(WIDependancy Dep)373         static bool isDepUniform(WIDependancy Dep) {
374             return Dep == WIDependancy::UNIFORM_GLOBAL ||
375                 Dep == WIDependancy::UNIFORM_WORKGROUP ||
376                 Dep == WIDependancy::UNIFORM_THREAD;
377         }
378     private:
379         WIAnalysisRunner Runner;
380     };
381 
382 } // namespace IGC
383