1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 #include "Compiler/CISACodeGen/WIAnalysis.hpp" 11 #include "Compiler/CISACodeGen/PatternMatchPass.hpp" 12 #include "Compiler/CISACodeGen/DeSSA.hpp" 13 #include "Compiler/CISACodeGen/CoalescingEngine.hpp" 14 #include "Compiler/CISACodeGen/BlockCoalescing.hpp" 15 #include "common/LLVMWarningsPush.hpp" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Dominators.h" 18 #include "llvm/ADT/TinyPtrVector.h" 19 #include <llvm/IR/Function.h> 20 #include <llvm/IR/Instructions.h> 21 #include <llvm/IR/InstIterator.h> 22 #include <llvm/IR/InstVisitor.h> 23 #include "llvm/Pass.h" 24 #include "llvmWrapper/IR/DerivedTypes.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "common/LLVMWarningsPop.hpp" 27 #include "Compiler/CISACodeGen/RegisterEstimator.hpp" 28 #include <list> 29 #include <map> 30 #include <algorithm> 31 #include "Probe/Assertion.h" 32 33 namespace IGC { 34 35 struct SSubVecDesc 36 { 37 // Denote a subvector of BaseVector starting at StartElementOffset. 38 // StartElementOffset is in the unit of BaseVector's element type. 39 // 40 // This can potentially denote subvector and basevector relationship 41 // among vector values of different element sizes. For now, subvector 42 // and basevector have the same element size (could be differnt types, 43 // such as int32_t and float, etc). Here is the example showing the 44 // relationship among them: 45 // Given the aliasing relation: 46 // Aliaser[0:n] --> BaseVector[0:m] 47 // where (StartElementOffset + n) <= m. Then, 48 // Aliaser = BaseVector[StartElementOffset, StartElementOffset+n] 49 50 // Aliaser 51 // It is a dessa node value; either scalar or subvector 52 llvm::Value* Aliaser; 53 54 // Aliasee: 55 llvm::Value* BaseVector; 56 57 // Valid only if this entry is for BaseVector, ie, Aliaser == BaseVector 58 llvm::SmallVector<SSubVecDesc*, 16> Aliasers; 59 60 // In the unit of BaseVector's element size 61 short StartElementOffset; // in the unit of BaseVector's element type 62 short NumElts; // the number of element of Aliaser 63 SSubVecDescIGC::SSubVecDesc64 SSubVecDesc(llvm::Value* V) 65 : Aliaser(V), BaseVector(V), StartElementOffset(0) 66 { 67 IGCLLVM::FixedVectorType* VTy = llvm::dyn_cast<IGCLLVM::FixedVectorType>(V->getType()); 68 NumElts = VTy ? (short)VTy->getNumElements() : 1; 69 } 70 71 // Temporary : tobedeleted SSubVecDescIGC::SSubVecDesc72 SSubVecDesc() 73 : Aliaser(nullptr), BaseVector(nullptr), 74 StartElementOffset(0), NumElts(0) 75 {} 76 }; 77 78 // Represent a Vector's element at index = EltIx. 79 struct SVecElement { 80 llvm::Value* Vec; 81 llvm::Value* Elem; 82 int EltIx; 83 SVecElementIGC::SVecElement84 SVecElement() : Vec(nullptr), Elem(nullptr), EltIx(-1) {} 85 }; 86 87 // A temporary struct for capturing vector/sub-vector relation. 88 // For example: 89 // extElt: 90 // s0 = extElt From, 4 91 // s1 = extElt From, 5 92 // ... 93 // 94 // cast: 95 // s0 = castinst s0 96 // s1 = castinst s1 97 // ... 98 // 99 // insELt: 100 // V0 = insElt Undef, s0, 0 101 // V1 = insElt V0, s1, 1 102 // ...... 103 // Vn = insElt Vn-1, sn, n 104 // 105 // where s0-s1 are typically from extElt, but not necessary. 106 // And they can from different vectors. Sometimes, castInst 107 // are present between ins and ext. Here, two cases are 108 // considered: 109 // 110 // case 1: Insert to (x0 and x1 are inserted to y) 111 // case 1.1 112 // int4 x0, x1; 113 // int8 y = (x0, x1) 114 // 115 // case 1.2 116 // int4 y = (s0, s1, s2, s3) 117 // 118 // case 2: Extract from (y is extracted from x) 119 // int8 x 120 // int4 y = x.s0123 (use half of x) 121 // 122 // 123 // A vector is used to keep this info. Each element of the 124 // vector corresponds to a single IEI. So, for vector size 125 // of N, there are N elements. The vector is defined as the 126 // following inside the class: 127 // SmallVector<SVecInsExtInfo, 16> VecInsEltInfoTy 128 // 129 struct SVecInsEltInfo { 130 llvm::InsertElementInst* IEI; 131 llvm::Value* Elt; 132 133 // If Elt is null, EEI must not be null, which 134 // indicates that (FromVec, FromVec_eltIx) is 135 // used as scalar operands in this IEI. 136 llvm::ExtractElementInst* EEI; 137 llvm::Value* FromVec; 138 int FromVec_eltIx; 139 SVecInsEltInfoIGC::SVecInsEltInfo140 SVecInsEltInfo() 141 : IEI(nullptr), Elt(nullptr), 142 EEI(nullptr), FromVec(nullptr), FromVec_eltIx(0) 143 {} 144 }; 145 146 /// RPE based analysis for querying variable reuse status. 147 /// 148 /// Let two instructions DInst and UInst be defined in the same basic block, 149 /// 150 /// DInst = ... 151 /// UInst = DInst op Other 152 /// 153 /// and assume it is legal to use the same CVariable for DInst and UInst. This 154 /// analysis determines if this reuse will be applied or not. When overall 155 /// register pressure is low, this decision could be most aggressive. When DInst 156 /// and UInst are acrossing a high pressure region (defined below), then the 157 /// reuse will only be applied less aggressively. 158 /// 159 /// Denote by RPE(x) the estimated register pressure at point x. Let Threshold 160 /// be a predefined threshold constant. We say pair (DInst, UInst) is crossing a 161 /// high register pressure region if 162 /// 163 /// (1) RPE(x) >= Threshold for any x between DInst and UInst (inclusive), or 164 /// (2) RPE(x) >= Threshold for any use x of UInst. 165 /// 166 class VariableReuseAnalysis : public llvm::FunctionPass, 167 public llvm::InstVisitor<VariableReuseAnalysis> 168 { 169 public: 170 static char ID; 171 172 VariableReuseAnalysis(); ~VariableReuseAnalysis()173 ~VariableReuseAnalysis() {} 174 175 typedef llvm::SmallVector<SVecInsEltInfo, 32> VecInsEltInfoTy; 176 typedef std::map<llvm::Value*, SSubVecDesc*> AliasMapTy; // ordered map 177 typedef llvm::SmallVector<llvm::Value*, 32> ValueVectorTy; 178 typedef llvm::DenseMap<llvm::Value*, llvm::Value*> Val2ValMapTy; 179 180 // following to be deleted 181 typedef llvm::DenseMap<llvm::Value*, SSubVecDesc> ValueAliasMapTy; 182 typedef llvm::DenseMap<llvm::Value*, llvm::TinyPtrVector<llvm::Value*> > AliasRootMapTy; 183 typedef llvm::SmallVector<SVecElement, 32> VecEltTy; 184 185 virtual bool runOnFunction(llvm::Function& F) override; 186 187 // Need to perform this after WI/LiveVars/DeSSA/CoalescingEnging. 188 // (todo: check if coalescing can be merged into dessa completely) getAnalysisUsage(llvm::AnalysisUsage & AU) const189 virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override { 190 // AU.addRequired<RegisterEstimator>(); 191 AU.setPreservesAll(); 192 AU.addRequired<llvm::DominatorTreeWrapperPass>(); 193 AU.addRequired<WIAnalysis>(); 194 AU.addRequired<LiveVarsAnalysis>(); 195 AU.addRequired<CodeGenPatternMatch>(); 196 AU.addRequired<DeSSA>(); 197 AU.addRequired<CoalescingEngine>(); 198 AU.addRequired<BlockCoalescing>(); 199 AU.addRequired<CodeGenContextWrapper>(); 200 } 201 getPassName() const202 llvm::StringRef getPassName() const override { 203 return "VariableReuseAnalysis"; 204 } 205 206 /// Initialize per-function states. In particular, check if the entire function 207 /// has a low pressure. BeginFunction(llvm::Function * F,unsigned SimdSize)208 void BeginFunction(llvm::Function* F, unsigned SimdSize) { 209 m_SimdSize = (uint16_t)SimdSize; 210 if (m_RPE) { 211 if (m_RPE->isGRFPressureLow(m_SimdSize)) 212 m_IsFunctionPressureLow = Status::True; 213 else 214 m_IsFunctionPressureLow = Status::False; 215 } 216 } 217 isCurFunctionPressureLow() const218 bool isCurFunctionPressureLow() const { 219 return m_IsFunctionPressureLow == Status::True; 220 } 221 isCurBlockPressureLow() const222 bool isCurBlockPressureLow() const { 223 return m_IsBlockPressureLow == Status::True; 224 } 225 226 /// RAII class to initialize and cleanup basic block level cache. 227 class EnterBlockRAII { 228 public: EnterBlockRAII(VariableReuseAnalysis * VRA,llvm::BasicBlock * BB)229 explicit EnterBlockRAII(VariableReuseAnalysis* VRA, llvm::BasicBlock* BB) 230 : VRA(VRA) { 231 VRA->BeginBlock(BB); 232 } ~EnterBlockRAII()233 ~EnterBlockRAII() { VRA->EndBlock(); } 234 VariableReuseAnalysis* VRA; 235 }; 236 friend class EnterBlockRAII; 237 238 // Check use instruction's legality and its pressure impact. 239 bool checkUseInst(llvm::Instruction* UInst, LiveVars* LV); 240 241 // Check def instruction's legality and its pressure impact. 242 bool checkDefInst(llvm::Instruction* DInst, llvm::Instruction* UInst, 243 LiveVars* LV); 244 245 // Visitor 246 void visitExtractElementInst(llvm::ExtractElementInst& I); 247 isAliasedValue(llvm::Value * V)248 bool isAliasedValue(llvm::Value* V) { 249 if (m_pCtx->getVectorCoalescingControl() > 0) { 250 return isAliased(V); 251 } 252 return false; 253 } 254 255 // getRootValue(): 256 // return dessa root value; if dessa root value 257 // is null, return itself. 258 llvm::Value* getRootValue(llvm::Value* V); 259 // getAliasRootValue() 260 // return alias root value if it exists, itself otherwise. 261 llvm::Value* getAliasRootValue(llvm::Value* V); // to be deleted 262 263 /// printAlias - print value aliasing info in human readable form 264 void printAlias(llvm::raw_ostream& OS, const llvm::Function* F = nullptr) const; 265 /// dumpAalias - dump alias info to dbgs(). 266 void dumpAlias() const; 267 268 // 269 // m_aliasMap: 270 // For mapping aliaser to aliasee: 271 // aliaser -> aliasee 272 // where aliasee is a vector value and aliaser could be a scalar or 273 // a vector values. 274 // 275 // Properties of the map: 276 // 1. No chain aliases 277 // No following: 278 // Vec0 -> Vec1 279 // v0 -> Vec0 280 // Instead, they are represented as follows: 281 // Vec0 -> Vec1 282 // v0 -> Vec1 283 // 2. Aliasee is an aliaser to itself (for convenience) 284 // Vec0 -> Vec0 285 // When this entry is seen, we know Vec0 is an aliasee, 286 // also called a alias root value. 287 // 3. Liveness of aliaser and aliasee are not combined 288 // Unlike dessa alias, in which aliser's liveness is merged 289 // into aliasee's. Here, aliaser's liveness is nerver merged 290 // into aliasee's. 291 // 292 // Note: 293 // notation: 294 // aliasCC(v) : all values that share the same alias root as v. 295 // dessaCC(v) : all values in the same dessa congruent class as v. 296 // subAlias(v, startIx, nelts) : 297 // all v in aliasCC(v) whose elements overlap 298 // v's baseVector[startIx : startIx+nelts-1]. 299 // For example, V is of int4, s0, s1, s2, s3 are scalars that are aliased 300 // to V's element at 0, 1, 2, and 2, respectively. 301 // s0 --> v[0] 302 // s1 --> v[1] 303 // s2 --> v[2] 304 // s3 --> v[3] 305 // aliasCC(s0) = aliasCC(s1) = aliasCC(s2) = aliasCC(s3) = aliasCC(v) 306 // = {v, s0, s1, s2, s3, s4} 307 // subAlias(v, 2, 2) = {s2, s3, v} // only s2&s3 overlaps V[2:3] 308 // dessaCC(s0) = { values in the same dessa CC } 309 // 310 // [todo] add Algo here. 311 // 312 AliasMapTy m_aliasMap; 313 314 // Function argument cannot be made a sub-part of another bigger 315 // value as it has been assigned a fixed physical GRF. The following 316 // map is used for checking if a value is an arg or coalesced with 317 // an argument by dessa. 318 std::list<llvm::Value*> m_ArgDeSSARoot; isOrCoalescedWithArg(llvm::Value * V)319 bool isOrCoalescedWithArg(llvm::Value* V) 320 { 321 if (llvm::isa<llvm::Argument>(V)) 322 return true; 323 if (m_DeSSA) { 324 if (llvm::Value * R = m_DeSSA->getRootValue(V)) { 325 auto IE = m_ArgDeSSARoot.end(); 326 auto it = std::find(m_ArgDeSSARoot.begin(), IE, R); 327 return it != IE; 328 } 329 } 330 return false; 331 } 332 333 // Add an entry V->itself if not existing yet. 334 void addVecAlias(llvm::Value* Aliaser, llvm::Value* Aliasee, int Idx); 335 SSubVecDesc* getOrCreateSubVecDesc(llvm::Value* V); 336 void getAllAliasVals( 337 ValueVectorTy& AliasVals, 338 llvm::Value* Aliaser, 339 llvm::Value* VecAliasee, 340 int Idx); 341 342 // No need to emit code for instructions in this map due to aliasing 343 llvm::DenseMap <llvm::Instruction*, int> m_HasBecomeNoopInsts; 344 345 // For emitting livetime start to visa to assist liveness analysis 346 // 1. m_LifetimeAt1stDefInBB : aliasee -> BB 347 // Once a first def is encounted, add lifetime start and clear 348 // this map entry afterwards. 349 // 2. m_LifetimeAtEndOfBB : BB -> set of values 350 // Add lifetime start for all values in the set at the end of BB. 351 llvm::DenseMap<llvm::Value*, llvm::BasicBlock*> m_LifetimeAt1stDefOfBB; 352 llvm::DenseMap<llvm::BasicBlock*, llvm::TinyPtrVector<llvm::Value*> > m_LifetimeAtEndOfBB; 353 354 private: reset()355 void reset() { 356 m_SimdSize = 0; 357 m_IsFunctionPressureLow = Status::Undef; 358 m_IsBlockPressureLow = Status::Undef; 359 m_aliasMap.clear(); 360 m_root2AliasMap.clear(); 361 m_HasBecomeNoopInsts.clear(); 362 m_LifetimeAt1stDefOfBB.clear(); 363 m_LifetimeAtEndOfBB.clear(); 364 365 } 366 367 // Initialize per-block states. In particular, check if the entire block has a 368 // low pressure. BeginBlock(llvm::BasicBlock * BB)369 void BeginBlock(llvm::BasicBlock* BB) { 370 IGC_ASSERT(m_SimdSize != 0); 371 if (m_RPE) { 372 CodeGenContext* context = nullptr; 373 context = getAnalysis<CodeGenContextWrapper>().getCodeGenContext(); 374 uint32_t BBPresure = m_RPE->getMaxLiveGRFAtBB(BB, m_SimdSize); 375 if (BBPresure <= context->getNumGRFPerThread()) 376 m_IsBlockPressureLow = Status::True; 377 else 378 m_IsBlockPressureLow = Status::False; 379 } 380 } 381 382 // Cleanup per-block states. EndBlock()383 void EndBlock() { m_IsBlockPressureLow = Status::Undef; } 384 385 void visitLiveInstructions(llvm::Function* F); 386 387 void setLifeTimeStartPos( 388 llvm::Value* RootVal, 389 ValueVectorTy& AllVals, 390 BlockCoalescing* theBC); 391 void postProcessing(); 392 393 // Return true if this instruction can be converted to an alias 394 bool canBeAlias(llvm::CastInst* I); 395 396 // If V has been payload-coalesced, return true. hasBeenPayloadCoalesced(llvm::Value * V)397 bool hasBeenPayloadCoalesced(llvm::Value* V) { 398 return (m_coalescingEngine->GetValueCCTupleMapping(V) != nullptr); 399 } 400 401 void mergeVariables(llvm::Function* F); 402 void InsertElementAliasing(llvm::Function* F); 403 404 llvm::Value* traceAliasValue(llvm::Value* V); 405 bool getElementValue( 406 llvm::InsertElementInst* IEI, int& IEI_ix, 407 llvm::Value*& S, 408 llvm::Value*& V, int& V_ix); 409 bool getAllInsEltsIfAvailable( 410 llvm::InsertElementInst* FirstIEI, 411 VecInsEltInfoTy& AllIEIs); 412 413 bool processExtractFrom(VecInsEltInfoTy& AllIEIs); 414 bool processInsertTo(VecInsEltInfoTy& AllIEIs); 415 416 // Check if sub can be aliased to Base[Base_ix:size(sub)-1] 417 bool aliasInterfere(llvm::Value* Sub, llvm::Value* Base, int Base_ix); 418 419 // DCC: DeSSA congruent class 420 // If any of V's DCC is an aliaser, return true. 421 bool hasAnyOfDCCAsAliaser(llvm::Value* V) const; 422 bool hasAnotherInDCCAsAliasee(llvm::Value* V) const; 423 bool isAliased(llvm::Value* V) const; 424 425 // Returns true for the following pattern: 426 // a = extractElement <vectorType> EEI_Vec, <constant EEI_ix> 427 // b = insertElement <vectorType> V1, E, <constant IEI_ix> 428 // where EEI_ix and IEI_ix are constants; Return false otherwise. 429 bool getVectorIndicesIfConstant( 430 llvm::InsertElementInst* IEI, 431 int& IEI_ix, 432 llvm::Value*& EEI_Vec, 433 int& EEI_ix); 434 435 436 CodeGenContext* m_pCtx; 437 WIAnalysis* m_WIA; 438 LiveVars* m_LV; 439 DeSSA* m_DeSSA; 440 CodeGenPatternMatch* m_PatternMatch; 441 CoalescingEngine* m_coalescingEngine; 442 llvm::DominatorTree* m_DT; 443 const llvm::DataLayout* m_DL; 444 445 llvm::BumpPtrAllocator Allocator; 446 447 /// Current Function; set on entry to runOnFunction 448 /// and unset on exit to runOnFunction 449 llvm::Function* m_F; 450 451 // The register pressure estimator (optional). 452 RegisterEstimator* m_RPE; 453 454 // Results may be cached at kernel level or basic block level. Use the 455 // following enum to indicate cached flag status. 456 enum class Status : int8_t { 457 Undef = -1, 458 False = 0, 459 True = 1 460 }; 461 462 // Per SIMD-compilation constant. Each compilation needs to initialize the 463 // SIMD mode. 464 uint16_t m_SimdSize; 465 466 // When this function has low register pressure, reuse can be applied 467 // aggressively without checking each individual def-use pair. 468 Status m_IsFunctionPressureLow; 469 470 // When this block has low register pressure, reuse can be applied 471 // aggressively without checking each individual def-use pair. 472 Status m_IsBlockPressureLow; 473 474 // Temporaries under VATemp 475 // if a value V is in a dessa CC and V is aliased, 476 // add <V's root, V> into the map. This is a quick 477 // way to check if any of values in a dessa CC has 478 // been aliased (either aliaser or aliasee) 479 Val2ValMapTy m_root2AliasMap; 480 }; 481 482 llvm::FunctionPass* createVariableReuseAnalysisPass(); 483 484 } // namespace IGC 485