1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "Compiler/MetaDataUtilsWrapper.h" 12 #include "Compiler/CodeGenContextWrapper.hpp" 13 #include "Compiler/CISACodeGen/TranslationTable.hpp" 14 15 #include "common/LLVMWarningsPush.hpp" 16 #include <llvmWrapper/IR/InstrTypes.h> 17 #include <llvm/Pass.h> 18 #include <llvm/IR/Function.h> 19 #include <llvm/IR/Instructions.h> 20 #include <llvm/IR/IntrinsicInst.h> 21 #include <llvm/Support/raw_ostream.h> 22 #include <llvm/ADT/Statistic.h> 23 #include <llvm/ADT/DenseMap.h> 24 #include <llvm/ADT/DenseSet.h> 25 #include <llvm/ADT/SmallSet.h> 26 #include <llvm/IR/Module.h> 27 #include <llvm/IR/Value.h> 28 #include <llvm/IR/InstIterator.h> 29 #include <llvm/IR/Dominators.h> 30 #include <llvm/Analysis/PostDominators.h> 31 #include "common/LLVMWarningsPop.hpp" 32 33 34 #ifdef OCL_SPECIFIC 35 #include "RuntimeServices.h" 36 #include "SoaAllocaAnalysis.h" 37 #include "Logger.h" 38 #endif 39 40 #include <vector> 41 42 namespace IGC 43 { 44 class BranchInfo; 45 class WIAnalysis; 46 47 //This is a trick, since we cannot forward-declare enums embedded in class definitions. 48 // The better solution is to completely hoist-out the WIDependency enum into a separate enum class 49 // (c++ 11) and have it separate from WIAnalysis pass class. Nevertheless, that would require 50 // updating many places in the current CodeGen code... 51 // thus: WIAnalysis::WIDependancy ~ WIBaseClass::WIDependancy, the types are equal and do not require 52 // conversion 53 class WIBaseClass 54 { 55 public: 56 /// @brief describes the type of dependency on the work item 57 enum WIDependancy : uint8_t { 58 UNIFORM_GLOBAL = 0, /// Same for all work-items within a shader. 59 UNIFORM_WORKGROUP = 1, /// Same for all work-items within a work group (compute). 60 UNIFORM_THREAD = 2, /// Same for all work-items within a HW thread. 61 CONSECUTIVE = 3, /// Elements are consecutive 62 PTR_CONSECUTIVE = 4, /// Elements are pointers which are consecutive 63 STRIDED = 5, /// Elements are in strides 64 RANDOM = 6, /// Unknown or non consecutive order 65 NumDeps = 7, /// Overall amount of dependencies 66 INVALID = 8 67 }; 68 }; 69 70 // Provide FastValueMapAttributeInfo for WIDependancy. 71 template<> struct FastValueMapAttributeInfo<WIBaseClass::WIDependancy> { getEmptyAttributeIGC::FastValueMapAttributeInfo72 static inline WIBaseClass::WIDependancy getEmptyAttribute() { return WIBaseClass::INVALID; } 73 }; 74 75 class WIAnalysisRunner 76 { 77 public: 78 void init( 79 llvm::Function* F, 80 llvm::DominatorTree* DT, 81 llvm::PostDominatorTree* PDT, 82 IGCMD::MetaDataUtils* MDUtils, 83 CodeGenContext* CGCtx, 84 ModuleMetaData* ModMD, 85 TranslationTable* TransTable); 86 WIAnalysisRunner(llvm::Function * F,llvm::DominatorTree * DT,llvm::PostDominatorTree * PDT,IGCMD::MetaDataUtils * MDUtils,CodeGenContext * CGCtx,ModuleMetaData * ModMD,TranslationTable * TransTable)87 WIAnalysisRunner( 88 llvm::Function* F, 89 llvm::DominatorTree* DT, 90 llvm::PostDominatorTree* PDT, 91 IGCMD::MetaDataUtils* MDUtils, 92 CodeGenContext* CGCtx, 93 ModuleMetaData* ModMD, 94 TranslationTable* TransTable) 95 { 96 init(F, DT, PDT, MDUtils, CGCtx, ModMD, TransTable); 97 } 98 WIAnalysisRunner()99 WIAnalysisRunner() {} ~WIAnalysisRunner()100 ~WIAnalysisRunner() {} 101 102 bool run(); 103 104 /// @brief Returns the type of dependency the instruction has on 105 /// the work-item 106 /// @param val llvm::Value to test 107 /// @return Dependency kind 108 WIBaseClass::WIDependancy whichDepend(const llvm::Value* val) const; 109 110 /// @brief Returns True if 'val' is uniform 111 /// @param val llvm::Value to test 112 bool isUniform(const llvm::Value* val) const; 113 bool isWorkGroupOrGlobalUniform(const llvm::Value* val) const; 114 bool isGlobalUniform(const llvm::Value* val) const; 115 116 /// incremental update of the dep-map on individual value 117 /// without propagation. Exposed for later pass. incUpdateDepend(const llvm::Value * val,WIBaseClass::WIDependancy dep)118 void incUpdateDepend(const llvm::Value* val, WIBaseClass::WIDependancy dep) 119 { 120 m_depMap.SetAttribute(val, dep); 121 } 122 123 /// check if a value is defined inside divergent control-flow insideDivergentCF(const llvm::Value * val) const124 bool insideDivergentCF(const llvm::Value* val) const 125 { 126 return(llvm::isa<llvm::Instruction>(val) && 127 m_ctrlBranches.find(llvm::cast<llvm::Instruction>(val)->getParent()) != m_ctrlBranches.end()); 128 } 129 130 /// check if a value is defined inside workgroup divergent control-flow. 131 /// This will return false if the value is in the influence region 132 /// of only global and workgroup uniform branches. 133 bool insideWorkgroupDivergentCF(const llvm::Value* val) const; 134 releaseMemory()135 void releaseMemory() 136 { 137 m_ctrlBranches.clear(); 138 m_changed1.clear(); 139 m_changed2.clear(); 140 m_allocaDepMap.clear(); 141 m_storeDepMap.clear(); 142 m_depMap.clear(); 143 m_forcedUniforms.clear(); 144 } 145 146 /// print - print m_deps in human readable form 147 void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const; 148 149 /// dump - Dump the m_deps to a file. 150 void dump() const; 151 152 // helper for dumping WI info into files with lock 153 void lock_print(); 154 155 private: 156 WIBaseClass::WIDependancy getCFDependency(const llvm::BasicBlock* BB) const; 157 158 struct AllocaDep 159 { 160 std::vector<const llvm::StoreInst*> stores; 161 std::vector<const llvm::IntrinsicInst*> lifetimes; 162 bool assume_uniform; 163 }; 164 165 /// @brief Update dependency relations between all values 166 void updateDeps(); 167 168 /// @brief mark the arguments dependency based on the metadata set 169 void updateArgsDependency(llvm::Function* pF); 170 171 /*! \name Dependency Calculation Functions 172 * \{ */ 173 /// @brief Calculate the dependency type for the instruction 174 /// @param inst Instruction to inspect 175 /// @return Type of dependency. 176 void calculate_dep(const llvm::Value* val); 177 WIBaseClass::WIDependancy calculate_dep(const llvm::BinaryOperator* inst); 178 WIBaseClass::WIDependancy calculate_dep(const llvm::CallInst* inst); 179 WIBaseClass::WIDependancy calculate_dep(const llvm::GetElementPtrInst* inst); 180 WIBaseClass::WIDependancy calculate_dep(const llvm::PHINode* inst); 181 WIBaseClass::WIDependancy calculate_dep(const llvm::SelectInst* inst); 182 WIBaseClass::WIDependancy calculate_dep(const llvm::AllocaInst* inst); 183 WIBaseClass::WIDependancy calculate_dep(const llvm::CastInst* inst); 184 WIBaseClass::WIDependancy calculate_dep(const llvm::VAArgInst* inst); 185 WIBaseClass::WIDependancy calculate_dep(const llvm::LoadInst* inst); 186 187 WIBaseClass::WIDependancy calculate_dep_terminator(const IGCLLVM::TerminatorInst* inst); 188 /*! \} */ 189 190 /// @brief do the trivial checking WI-dep 191 /// @param I instruction to check 192 /// @return Dependency type. Returns Uniform if all operands are 193 /// Uniform, Random otherwise 194 WIBaseClass::WIDependancy calculate_dep_simple(const llvm::Instruction* I); 195 196 /// @brief update the WI-dep from a divergent branch, 197 /// affected instructions are added to m_pChangedNew 198 /// @param the divergent branch 199 void update_cf_dep(const IGCLLVM::TerminatorInst* TI); 200 201 /// @brief update the WI-dep for a sequence of insert-elements forming a vector 202 /// affected instructions are added to m_pChangedNew 203 /// @param the insert-element instruction 204 void updateInsertElements(const llvm::InsertElementInst* inst); 205 206 /// @check phi divergence at a join-blk due to a divergent branch 207 void updatePHIDepAtJoin(llvm::BasicBlock* blk, BranchInfo* brInfo); 208 209 void updateDepMap(const llvm::Instruction* inst, WIBaseClass::WIDependancy dep); 210 211 /// @brief Provide known dependency type for requested value 212 /// @param val llvm::Value to examine 213 /// @return Dependency type. Returns Uniform for unknown type 214 WIBaseClass::WIDependancy getDependency(const llvm::Value* val); 215 216 /// @brief return true if there is calculated dependency type for requested value 217 /// @param val llvm::Value to examine 218 /// @return true if value has dependency type, false otherwise. 219 bool hasDependency(const llvm::Value* val) const; 220 221 /// @brief return true if all uses of this value are marked RANDOM 222 bool allUsesRandom(const llvm::Value* val); 223 224 /// @brief return true if any of the use require the value to be uniform 225 bool needToBeUniform(const llvm::Value* val); 226 227 /// @brief return true is the instruction is simple and making it random is cheap 228 bool isInstructionSimple(const llvm::Instruction* inst); 229 230 /// @brief return true if all the source operands are defined outside the region 231 bool isRegionInvariant(const llvm::Instruction* inst, BranchInfo* brInfo, unsigned level); 232 233 /// @brief update dependency structure for Alloca 234 bool TrackAllocaDep(const llvm::Value* I, AllocaDep& dep); 235 236 void checkLocalIdUniform( 237 llvm::Function* F, 238 bool& IsLxUniform, 239 bool& IsLyUniform, 240 bool& IsLzUniform); 241 242 private: 243 #ifdef OCL_SPECIFIC 244 // @brief pointer to Soa alloca analysis performed for this function 245 SoaAllocaAnalysis* m_soaAllocaAnalysis; 246 /// Runtime services pointer 247 RuntimeServices* m_rtServices; 248 #endif 249 250 /// The WIAnalysis follows pointer arithmetic 251 /// and Index arithmetic when calculating dependency 252 /// properties. If a part of the index is lost due to 253 /// a transformation, it is acceptable. 254 /// This constant decides how many bits need to be 255 /// preserved before we give up on the analysis. 256 static const unsigned int MinIndexBitwidthToPreserve; 257 258 /// Stores an updated list of all dependencies 259 /// for each block, store the list of diverging branches that affect it 260 llvm::DenseMap<const llvm::BasicBlock*, llvm::SmallPtrSet<const llvm::Instruction*, 4>> m_ctrlBranches; 261 262 /// Iteratively one set holds the changed from the previous iteration and 263 /// the other holds the new changed values from the current iteration. 264 std::vector<const llvm::Value*> m_changed1; 265 std::vector<const llvm::Value*> m_changed2; 266 /// ptr to m_changed1, m_changed2 267 std::vector<const llvm::Value*>* m_pChangedOld; 268 std::vector<const llvm::Value*>* m_pChangedNew; 269 270 /// <summary> 271 /// hold the vector-defs that are promoted from an uniform alloca 272 /// therefore, need to be forced into uniform no matter what. 273 /// </summary> 274 std::vector<const llvm::Value*> m_forcedUniforms; 275 276 llvm::Function* m_func; 277 llvm::DominatorTree* DT; 278 llvm::PostDominatorTree* PDT; 279 IGC::IGCMD::MetaDataUtils* m_pMdUtils; 280 IGC::CodeGenContext* m_CGCtx; 281 IGC::ModuleMetaData* m_ModMD; 282 IGC::TranslationTable* m_TT; 283 284 // Allow access to all the store into an alloca if we were able to track it 285 llvm::DenseMap<const llvm::AllocaInst*, AllocaDep> m_allocaDepMap; 286 // reverse map to allow to know what alloca to update when store changes 287 llvm::DenseMap<const llvm::StoreInst*, const llvm::AllocaInst*> m_storeDepMap; 288 289 IGC::FastValueMap<WIBaseClass::WIDependancy, FastValueMapAttributeInfo<WIBaseClass::WIDependancy>> m_depMap; 290 291 // For dumpping WIA info per each invocation 292 static llvm::DenseMap<const llvm::Function*, int> m_funcInvocationId; 293 }; 294 295 /// @brief Work Item Analysis class used to provide information on 296 /// individual instructions. The analysis class detects values which 297 /// depend in work-item and describe their dependency. 298 /// The algorithm used is recursive and new instructions are updated 299 /// according to their operands (which are already calculated). 300 /// original code for OCL vectorizer 301 /// 302 class WIAnalysis : public llvm::FunctionPass, public WIBaseClass 303 { 304 public: 305 static char ID; // Pass identification, replacement for typeid 306 307 WIAnalysis(); 308 ~WIAnalysis()309 ~WIAnalysis() {} 310 311 /// @brief Provides name of pass getPassName() const312 llvm::StringRef getPassName() const override 313 { 314 return "WIAnalysis"; 315 } 316 getAnalysisUsage(llvm::AnalysisUsage & AU) const317 void getAnalysisUsage(llvm::AnalysisUsage& AU) const override 318 { 319 // Analysis pass preserve all 320 AU.setPreservesAll(); 321 #ifdef OCL_SPECIFIC 322 AU.addRequired<SoaAllocaAnalysis>(); 323 #endif 324 AU.addRequired<llvm::DominatorTreeWrapperPass>(); 325 AU.addRequired<llvm::PostDominatorTreeWrapperPass>(); 326 AU.addRequired<MetaDataUtilsWrapper>(); 327 AU.addRequired<CodeGenContextWrapper>(); 328 AU.addRequired<TranslationTable>(); 329 } 330 331 /// @brief LLVM llvm::Function pass entry 332 /// @param F llvm::Function to transform 333 /// @return True if changed 334 bool runOnFunction(llvm::Function& F) override; 335 336 /// print - print m_deps in human readable form 337 void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const override; 338 339 /// dump - Dump the m_deps to dbgs(). 340 void dump() const; 341 public: 342 /// @brief Returns the type of dependency the instruction has on 343 /// the work-item 344 /// @param val llvm::Value to test 345 /// @return Dependency kind 346 WIDependancy whichDepend(const llvm::Value* val); 347 348 /// @brief Returns True if 'val' is uniform 349 /// @param val llvm::Value to test 350 bool isUniform(const llvm::Value* val) const; // Return true for any uniform 351 bool isWorkGroupOrGlobalUniform(const llvm::Value* val); 352 bool isGlobalUniform(const llvm::Value* val); 353 354 /// incremental update of the dep-map on individual value 355 /// without propagation. Exposed for later pass. 356 void incUpdateDepend(const llvm::Value* val, WIDependancy dep); 357 358 /// check if a value is defined inside divergent control-flow 359 bool insideDivergentCF(const llvm::Value* val) const; 360 361 /// check if a value is defined inside workgroup divergent control-flow 362 /// This will return false if the value is in the influence region 363 /// of only global and workgroup uniform branches. 364 bool insideWorkgroupDivergentCF(const llvm::Value* val) const; 365 releaseMemory()366 void releaseMemory() override 367 { 368 Runner.releaseMemory(); 369 } 370 371 372 /// Return true if Dep is any of uniform dependancy. isDepUniform(WIDependancy Dep)373 static bool isDepUniform(WIDependancy Dep) { 374 return Dep == WIDependancy::UNIFORM_GLOBAL || 375 Dep == WIDependancy::UNIFORM_WORKGROUP || 376 Dep == WIDependancy::UNIFORM_THREAD; 377 } 378 private: 379 WIAnalysisRunner Runner; 380 }; 381 382 } // namespace IGC 383