1 //===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the BlockGenerator and VectorBlockGenerator classes, which 10 // generate sequential code and vectorized code for a polyhedral statement, 11 // respectively. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef POLLY_BLOCK_GENERATORS_H 16 #define POLLY_BLOCK_GENERATORS_H 17 18 #include "polly/CodeGen/IRBuilder.h" 19 #include "polly/Support/ScopHelper.h" 20 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 21 #include "isl/isl-noexceptions.h" 22 23 namespace polly { 24 using llvm::AllocaInst; 25 using llvm::ArrayRef; 26 using llvm::AssertingVH; 27 using llvm::BasicBlock; 28 using llvm::BinaryOperator; 29 using llvm::CmpInst; 30 using llvm::DataLayout; 31 using llvm::DenseMap; 32 using llvm::DominatorTree; 33 using llvm::Function; 34 using llvm::Instruction; 35 using llvm::LoadInst; 36 using llvm::Loop; 37 using llvm::LoopInfo; 38 using llvm::LoopToScevMapT; 39 using llvm::MapVector; 40 using llvm::PHINode; 41 using llvm::ScalarEvolution; 42 using llvm::SetVector; 43 using llvm::SmallVector; 44 using llvm::StoreInst; 45 using llvm::StringRef; 46 using llvm::Type; 47 using llvm::UnaryInstruction; 48 using llvm::Value; 49 50 class MemoryAccess; 51 class ScopArrayInfo; 52 class IslExprBuilder; 53 54 /// Generate a new basic block for a polyhedral statement. 55 class BlockGenerator { 56 public: 57 typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT; 58 59 /// Map types to resolve scalar dependences. 60 /// 61 ///@{ 62 using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>; 63 64 /// Simple vector of instructions to store escape users. 65 using EscapeUserVectorTy = SmallVector<Instruction *, 4>; 66 67 /// Map type to resolve escaping users for scalar instructions. 68 /// 69 /// @see The EscapeMap member. 70 using EscapeUsersAllocaMapTy = 71 MapVector<Instruction *, 72 std::pair<AssertingVH<Value>, EscapeUserVectorTy>>; 73 74 ///@} 75 76 /// Create a generator for basic blocks. 77 /// 78 /// @param Builder The LLVM-IR Builder used to generate the statement. The 79 /// code is generated at the location, the Builder points 80 /// to. 81 /// @param LI The loop info for the current function 82 /// @param SE The scalar evolution info for the current function 83 /// @param DT The dominator tree of this function. 84 /// @param ScalarMap Map from scalars to their demoted location. 85 /// @param EscapeMap Map from scalars to their escape users and locations. 86 /// @param GlobalMap A mapping from llvm::Values used in the original scop 87 /// region to a new set of llvm::Values. Each reference to 88 /// an original value appearing in this mapping is replaced 89 /// with the new value it is mapped to. 90 /// @param ExprBuilder An expression builder to generate new access functions. 91 /// @param StartBlock The first basic block after the RTC. 92 BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, 93 DominatorTree &DT, AllocaMapTy &ScalarMap, 94 EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, 95 IslExprBuilder *ExprBuilder, BasicBlock *StartBlock); 96 97 /// Copy the basic block. 98 /// 99 /// This copies the entire basic block and updates references to old values 100 /// with references to new values, as defined by GlobalMap. 101 /// 102 /// @param Stmt The block statement to code generate. 103 /// @param LTS A map from old loops to new induction variables as 104 /// SCEVs. 105 /// @param NewAccesses A map from memory access ids to new ast expressions, 106 /// which may contain new access expressions for certain 107 /// memory accesses. 108 void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 109 isl_id_to_ast_expr *NewAccesses); 110 111 /// Remove a ScopArrayInfo's allocation from the ScalarMap. 112 /// 113 /// This function allows to remove values from the ScalarMap. This is useful 114 /// if the corresponding alloca instruction will be deleted (or moved into 115 /// another module), as without removing these values the underlying 116 /// AssertingVH will trigger due to us still keeping reference to this 117 /// scalar. 118 /// 119 /// @param Array The array for which the alloca was generated. freeScalarAlloc(ScopArrayInfo * Array)120 void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); } 121 122 /// Return the alloca for @p Access. 123 /// 124 /// If no alloca was mapped for @p Access a new one is created. 125 /// 126 /// @param Access The memory access for which to generate the alloca. 127 /// 128 /// @returns The alloca for @p Access or a replacement value taken from 129 /// GlobalMap. 130 Value *getOrCreateAlloca(const MemoryAccess &Access); 131 132 /// Return the alloca for @p Array. 133 /// 134 /// If no alloca was mapped for @p Array a new one is created. 135 /// 136 /// @param Array The array for which to generate the alloca. 137 /// 138 /// @returns The alloca for @p Array or a replacement value taken from 139 /// GlobalMap. 140 Value *getOrCreateAlloca(const ScopArrayInfo *Array); 141 142 /// Finalize the code generation for the SCoP @p S. 143 /// 144 /// This will initialize and finalize the scalar variables we demoted during 145 /// the code generation. 146 /// 147 /// @see createScalarInitialization(Scop &) 148 /// @see createScalarFinalization(Region &) 149 void finalizeSCoP(Scop &S); 150 151 /// An empty destructor ~BlockGenerator()152 virtual ~BlockGenerator() {} 153 154 BlockGenerator(const BlockGenerator &) = default; 155 156 protected: 157 PollyIRBuilder &Builder; 158 LoopInfo &LI; 159 ScalarEvolution &SE; 160 IslExprBuilder *ExprBuilder; 161 162 /// The dominator tree of this function. 163 DominatorTree &DT; 164 165 /// The entry block of the current function. 166 BasicBlock *EntryBB; 167 168 /// Map to resolve scalar dependences for PHI operands and scalars. 169 /// 170 /// When translating code that contains scalar dependences as they result from 171 /// inter-block scalar dependences (including the use of data carrying PHI 172 /// nodes), we do not directly regenerate in-register SSA code, but instead 173 /// allocate some stack memory through which these scalar values are passed. 174 /// Only a later pass of -mem2reg will then (re)introduce in-register 175 /// computations. 176 /// 177 /// To keep track of the memory location(s) used to store the data computed by 178 /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a 179 /// given ScopArrayInfo to the junk of stack allocated memory, that is 180 /// used for code generation. 181 /// 182 /// Up to two different ScopArrayInfo objects are associated with each 183 /// llvm::Value: 184 /// 185 /// MemoryType::Value objects are used for normal scalar dependences that go 186 /// from a scalar definition to its use. Such dependences are lowered by 187 /// directly writing the value an instruction computes into the corresponding 188 /// chunk of memory and reading it back from this chunk of memory right before 189 /// every use of this original scalar value. The memory allocations for 190 /// MemoryType::Value objects end with '.s2a'. 191 /// 192 /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI 193 /// nodes. For each PHI nodes we introduce, besides the Array of type 194 /// MemoryType::Value, a second chunk of memory into which we write at the end 195 /// of each basic block preceding the PHI instruction the value passed 196 /// through this basic block. At the place where the PHI node is executed, we 197 /// replace the PHI node with a load from the corresponding MemoryType::PHI 198 /// memory location. The memory allocations for MemoryType::PHI end with 199 /// '.phiops'. 200 /// 201 /// Example: 202 /// 203 /// Input C Code 204 /// ============ 205 /// 206 /// S1: x1 = ... 207 /// for (i=0...N) { 208 /// S2: x2 = phi(x1, add) 209 /// S3: add = x2 + 42; 210 /// } 211 /// S4: print(x1) 212 /// print(x2) 213 /// print(add) 214 /// 215 /// 216 /// Unmodified IR IR After expansion 217 /// ============= ================== 218 /// 219 /// S1: x1 = ... S1: x1 = ... 220 /// x1.s2a = s1 221 /// x2.phiops = s1 222 /// | | 223 /// | <--<--<--<--< | <--<--<--<--< 224 /// | / \ | / \ . 225 /// V V \ V V \ . 226 /// S2: x2 = phi (x1, add) | S2: x2 = x2.phiops | 227 /// | x2.s2a = x2 | 228 /// | | 229 /// S3: add = x2 + 42 | S3: add = x2 + 42 | 230 /// | add.s2a = add | 231 /// | x2.phiops = add | 232 /// | \ / | \ / 233 /// | \ / | \ / 234 /// | >-->-->-->--> | >-->-->-->--> 235 /// V V 236 /// 237 /// S4: x1 = x1.s2a 238 /// S4: ... = x1 ... = x1 239 /// x2 = x2.s2a 240 /// ... = x2 ... = x2 241 /// add = add.s2a 242 /// ... = add ... = add 243 /// 244 /// ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a, 245 /// add:Value -> add.s2a, x2:PHI -> x2.phiops } 246 /// 247 /// ??? Why does a PHI-node require two memory chunks ??? 248 /// 249 /// One may wonder why a PHI node requires two memory chunks and not just 250 /// all data is stored in a single location. The following example tries 251 /// to store all data in .s2a and drops the .phiops location: 252 /// 253 /// S1: x1 = ... 254 /// x1.s2a = s1 255 /// x2.s2a = s1 // use .s2a instead of .phiops 256 /// | 257 /// | <--<--<--<--< 258 /// | / \ . 259 /// V V \ . 260 /// S2: x2 = x2.s2a | // value is same as above, but read 261 /// | // from .s2a 262 /// | 263 /// x2.s2a = x2 | // store into .s2a as normal 264 /// | 265 /// S3: add = x2 + 42 | 266 /// add.s2a = add | 267 /// x2.s2a = add | // use s2a instead of .phiops 268 /// | \ / // !!! This is wrong, as x2.s2a now 269 /// | >-->-->-->--> // contains add instead of x2. 270 /// V 271 /// 272 /// S4: x1 = x1.s2a 273 /// ... = x1 274 /// x2 = x2.s2a // !!! We now read 'add' instead of 275 /// ... = x2 // 'x2' 276 /// add = add.s2a 277 /// ... = add 278 /// 279 /// As visible in the example, the SSA value of the PHI node may still be 280 /// needed _after_ the basic block, which could conceptually branch to the 281 /// PHI node, has been run and has overwritten the PHI's old value. Hence, a 282 /// single memory location is not enough to code-generate a PHI node. 283 /// 284 /// Memory locations used for the special PHI node modeling. 285 AllocaMapTy &ScalarMap; 286 287 /// Map from instructions to their escape users as well as the alloca. 288 EscapeUsersAllocaMapTy &EscapeMap; 289 290 /// A map from llvm::Values referenced in the old code to a new set of 291 /// llvm::Values, which is used to replace these old values during 292 /// code generation. 293 ValueMapT &GlobalMap; 294 295 /// The first basic block after the RTC. 296 BasicBlock *StartBlock; 297 298 /// Split @p BB to create a new one we can use to clone @p BB in. 299 BasicBlock *splitBB(BasicBlock *BB); 300 301 /// Copy the given basic block. 302 /// 303 /// @param Stmt The statement to code generate. 304 /// @param BB The basic block to code generate. 305 /// @param BBMap A mapping from old values to their new values in this 306 /// block. 307 /// @param LTS A map from old loops to new induction variables as 308 /// SCEVs. 309 /// @param NewAccesses A map from memory access ids to new ast expressions, 310 /// which may contain new access expressions for certain 311 /// memory accesses. 312 /// 313 /// @returns The copy of the basic block. 314 BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, 315 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); 316 317 /// Copy the given basic block. 318 /// 319 /// @param Stmt The statement to code generate. 320 /// @param BB The basic block to code generate. 321 /// @param BBCopy The new basic block to generate code in. 322 /// @param BBMap A mapping from old values to their new values in this 323 /// block. 324 /// @param LTS A map from old loops to new induction variables as 325 /// SCEVs. 326 /// @param NewAccesses A map from memory access ids to new ast expressions, 327 /// which may contain new access expressions for certain 328 /// memory accesses. 329 void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy, 330 ValueMapT &BBMap, LoopToScevMapT <S, 331 isl_id_to_ast_expr *NewAccesses); 332 333 /// Generate reload of scalars demoted to memory and needed by @p Stmt. 334 /// 335 /// @param Stmt The statement we generate code for. 336 /// @param LTS A mapping from loops virtual canonical induction 337 /// variable to their new values. 338 /// @param BBMap A mapping from old values to their new values in this block. 339 /// @param NewAccesses A map from memory access ids to new ast expressions. 340 void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT <S, 341 ValueMapT &BBMap, 342 __isl_keep isl_id_to_ast_expr *NewAccesses); 343 344 /// When statement tracing is enabled, build the print instructions for 345 /// printing the current statement instance. 346 /// 347 /// The printed output looks like: 348 /// 349 /// Stmt1(0) 350 /// 351 /// If printing of scalars is enabled, it also appends the value of each 352 /// scalar to the line: 353 /// 354 /// Stmt1(0) %i=1 %sum=5 355 /// 356 /// @param Stmt The statement we generate code for. 357 /// @param LTS A mapping from loops virtual canonical induction 358 /// variable to their new values. 359 /// @param BBMap A mapping from old values to their new values in this block. 360 void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S, 361 ValueMapT &BBMap); 362 363 /// Generate instructions that compute whether one instance of @p Set is 364 /// executed. 365 /// 366 /// @param Stmt The statement we generate code for. 367 /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in 368 /// @p Stmt's domain are ignored. 369 /// 370 /// @return An expression of type i1, generated into the current builder 371 /// position, that evaluates to 1 if the executed instance is part of 372 /// @p Set. 373 Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain); 374 375 /// Generate code that executes in a subset of @p Stmt's domain. 376 /// 377 /// @param Stmt The statement we generate code for. 378 /// @param Subdomain The condition for some code to be executed. 379 /// @param Subject A name for the code that is executed 380 /// conditionally. Used to name new basic blocks and 381 /// instructions. 382 /// @param GenThenFunc Callback which generates the code to be executed 383 /// when the current executed instance is in @p Set. The 384 /// IRBuilder's position is moved to within the block that 385 /// executes conditionally for this callback. 386 void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain, 387 StringRef Subject, 388 const std::function<void()> &GenThenFunc); 389 390 /// Generate the scalar stores for the given statement. 391 /// 392 /// After the statement @p Stmt was copied all inner-SCoP scalar dependences 393 /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to 394 /// be demoted to memory. 395 /// 396 /// @param Stmt The statement we generate code for. 397 /// @param LTS A mapping from loops virtual canonical induction 398 /// variable to their new values 399 /// (for values recalculated in the new ScoP, but not 400 /// within this basic block) 401 /// @param BBMap A mapping from old values to their new values in this block. 402 /// @param NewAccesses A map from memory access ids to new ast expressions. 403 virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, 404 ValueMapT &BBMap, 405 __isl_keep isl_id_to_ast_expr *NewAccesses); 406 407 /// Handle users of @p Array outside the SCoP. 408 /// 409 /// @param S The current SCoP. 410 /// @param Inst The ScopArrayInfo to handle. 411 void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array); 412 413 /// Find scalar statements that have outside users. 414 /// 415 /// We register these scalar values to later update subsequent scalar uses of 416 /// these values to either use the newly computed value from within the scop 417 /// (if the scop was executed) or the unchanged original code (if the run-time 418 /// check failed). 419 /// 420 /// @param S The scop for which to find the outside users. 421 void findOutsideUsers(Scop &S); 422 423 /// Initialize the memory of demoted scalars. 424 /// 425 /// @param S The scop for which to generate the scalar initializers. 426 void createScalarInitialization(Scop &S); 427 428 /// Create exit PHI node merges for PHI nodes with more than two edges 429 /// from inside the scop. 430 /// 431 /// For scops which have a PHI node in the exit block that has more than two 432 /// incoming edges from inside the scop region, we require some special 433 /// handling to understand which of the possible values will be passed to the 434 /// PHI node from inside the optimized version of the scop. To do so ScopInfo 435 /// models the possible incoming values as write accesses of the ScopStmts. 436 /// 437 /// This function creates corresponding code to reload the computed outgoing 438 /// value from the stack slot it has been stored into and to pass it on to the 439 /// PHI node in the original exit block. 440 /// 441 /// @param S The scop for which to generate the exiting PHI nodes. 442 void createExitPHINodeMerges(Scop &S); 443 444 /// Promote the values of demoted scalars after the SCoP. 445 /// 446 /// If a scalar value was used outside the SCoP we need to promote the value 447 /// stored in the memory cell allocated for that scalar and combine it with 448 /// the original value in the non-optimized SCoP. 449 void createScalarFinalization(Scop &S); 450 451 /// Try to synthesize a new value 452 /// 453 /// Given an old value, we try to synthesize it in a new context from its 454 /// original SCEV expression. We start from the original SCEV expression, 455 /// then replace outdated parameter and loop references, and finally 456 /// expand it to code that computes this updated expression. 457 /// 458 /// @param Stmt The statement to code generate 459 /// @param Old The old Value 460 /// @param BBMap A mapping from old values to their new values 461 /// (for values recalculated within this basic block) 462 /// @param LTS A mapping from loops virtual canonical induction 463 /// variable to their new values 464 /// (for values recalculated in the new ScoP, but not 465 /// within this basic block) 466 /// @param L The loop that surrounded the instruction that referenced 467 /// this value in the original code. This loop is used to 468 /// evaluate the scalar evolution at the right scope. 469 /// 470 /// @returns o A newly synthesized value. 471 /// o NULL, if synthesizing the value failed. 472 Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, 473 LoopToScevMapT <S, Loop *L) const; 474 475 /// Get the new version of a value. 476 /// 477 /// Given an old value, we first check if a new version of this value is 478 /// available in the BBMap or GlobalMap. In case it is not and the value can 479 /// be recomputed using SCEV, we do so. If we can not recompute a value 480 /// using SCEV, but we understand that the value is constant within the scop, 481 /// we return the old value. If the value can still not be derived, this 482 /// function will assert. 483 /// 484 /// @param Stmt The statement to code generate. 485 /// @param Old The old Value. 486 /// @param BBMap A mapping from old values to their new values 487 /// (for values recalculated within this basic block). 488 /// @param LTS A mapping from loops virtual canonical induction 489 /// variable to their new values 490 /// (for values recalculated in the new ScoP, but not 491 /// within this basic block). 492 /// @param L The loop that surrounded the instruction that referenced 493 /// this value in the original code. This loop is used to 494 /// evaluate the scalar evolution at the right scope. 495 /// 496 /// @returns o The old value, if it is still valid. 497 /// o The new value, if available. 498 /// o NULL, if no value is found. 499 Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, 500 LoopToScevMapT <S, Loop *L) const; 501 502 void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, 503 LoopToScevMapT <S); 504 505 /// Get the innermost loop that surrounds the statement @p Stmt. 506 Loop *getLoopForStmt(const ScopStmt &Stmt) const; 507 508 /// Generate the operand address 509 /// @param NewAccesses A map from memory access ids to new ast expressions, 510 /// which may contain new access expressions for certain 511 /// memory accesses. 512 Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst, 513 ValueMapT &BBMap, LoopToScevMapT <S, 514 isl_id_to_ast_expr *NewAccesses); 515 516 /// Generate the operand address. 517 /// 518 /// @param Stmt The statement to generate code for. 519 /// @param L The innermost loop that surrounds the statement. 520 /// @param Pointer If the access expression is not changed (ie. not found 521 /// in @p LTS), use this Pointer from the original code 522 /// instead. 523 /// @param BBMap A mapping from old values to their new values. 524 /// @param LTS A mapping from loops virtual canonical induction 525 /// variable to their new values. 526 /// @param NewAccesses Ahead-of-time generated access expressions. 527 /// @param Id Identifier of the MemoryAccess to generate. 528 /// @param ExpectedType The type the returned value should have. 529 /// 530 /// @return The generated address. 531 Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer, 532 ValueMapT &BBMap, LoopToScevMapT <S, 533 isl_id_to_ast_expr *NewAccesses, 534 __isl_take isl_id *Id, Type *ExpectedType); 535 536 /// Generate the pointer value that is accesses by @p Access. 537 /// 538 /// For write accesses, generate the target address. For read accesses, 539 /// generate the source address. 540 /// The access can be either an array access or a scalar access. In the first 541 /// case, the returned address will point to an element into that array. In 542 /// the scalar case, an alloca is used. 543 /// If a new AccessRelation is set for the MemoryAccess, the new relation will 544 /// be used. 545 /// 546 /// @param Access The access to generate a pointer for. 547 /// @param L The innermost loop that surrounds the statement. 548 /// @param LTS A mapping from loops virtual canonical induction 549 /// variable to their new values. 550 /// @param BBMap A mapping from old values to their new values. 551 /// @param NewAccesses A map from memory access ids to new ast expressions. 552 /// 553 /// @return The generated address. 554 Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT <S, 555 ValueMapT &BBMap, 556 __isl_keep isl_id_to_ast_expr *NewAccesses); 557 558 /// @param NewAccesses A map from memory access ids to new ast expressions, 559 /// which may contain new access expressions for certain 560 /// memory accesses. 561 Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap, 562 LoopToScevMapT <S, 563 isl_id_to_ast_expr *NewAccesses); 564 565 /// @param NewAccesses A map from memory access ids to new ast expressions, 566 /// which may contain new access expressions for certain 567 /// memory accesses. 568 void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap, 569 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); 570 571 /// Copy a single PHI instruction. 572 /// 573 /// The implementation in the BlockGenerator is trivial, however it allows 574 /// subclasses to handle PHIs different. copyPHIInstruction(ScopStmt &,PHINode *,ValueMapT &,LoopToScevMapT &)575 virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &, 576 LoopToScevMapT &) {} 577 578 /// Copy a single Instruction. 579 /// 580 /// This copies a single Instruction and updates references to old values 581 /// with references to new values, as defined by GlobalMap and BBMap. 582 /// 583 /// @param Stmt The statement to code generate. 584 /// @param Inst The instruction to copy. 585 /// @param BBMap A mapping from old values to their new values 586 /// (for values recalculated within this basic block). 587 /// @param GlobalMap A mapping from old values to their new values 588 /// (for values recalculated in the new ScoP, but not 589 /// within this basic block). 590 /// @param LTS A mapping from loops virtual canonical induction 591 /// variable to their new values 592 /// (for values recalculated in the new ScoP, but not 593 /// within this basic block). 594 /// @param NewAccesses A map from memory access ids to new ast expressions, 595 /// which may contain new access expressions for certain 596 /// memory accesses. 597 void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, 598 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); 599 600 /// Helper to determine if @p Inst can be synthesized in @p Stmt. 601 /// 602 /// @returns false, iff @p Inst can be synthesized in @p Stmt. 603 bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst); 604 605 /// Remove dead instructions generated for BB 606 /// 607 /// @param BB The basic block code for which code has been generated. 608 /// @param BBMap A local map from old to new instructions. 609 void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap); 610 611 /// Invalidate the scalar evolution expressions for a scop. 612 /// 613 /// This function invalidates the scalar evolution results for all 614 /// instructions that are part of a given scop, and the loops 615 /// surrounding the users of merge blocks. This is necessary to ensure that 616 /// later scops do not obtain scalar evolution expressions that reference 617 /// values that earlier dominated the later scop, but have been moved in the 618 /// conditional part of an earlier scop and consequently do not any more 619 /// dominate the later scop. 620 /// 621 /// @param S The scop to invalidate. 622 void invalidateScalarEvolution(Scop &S); 623 }; 624 625 /// Generate a new vector basic block for a polyhedral statement. 626 /// 627 /// The only public function exposed is generate(). 628 class VectorBlockGenerator : BlockGenerator { 629 public: 630 /// Generate a new vector basic block for a ScoPStmt. 631 /// 632 /// This code generation is similar to the normal, scalar code generation, 633 /// except that each instruction is code generated for several vector lanes 634 /// at a time. If possible instructions are issued as actual vector 635 /// instructions, but e.g. for address calculation instructions we currently 636 /// generate scalar instructions for each vector lane. 637 /// 638 /// @param BlockGen A block generator object used as parent. 639 /// @param Stmt The statement to code generate. 640 /// @param VLTS A mapping from loops virtual canonical induction 641 /// variable to their new values 642 /// (for values recalculated in the new ScoP, but not 643 /// within this basic block), one for each lane. 644 /// @param Schedule A map from the statement to a schedule where the 645 /// innermost dimension is the dimension of the innermost 646 /// loop containing the statement. 647 /// @param NewAccesses A map from memory access ids to new ast expressions, 648 /// which may contain new access expressions for certain 649 /// memory accesses. generate(BlockGenerator & BlockGen,ScopStmt & Stmt,std::vector<LoopToScevMapT> & VLTS,__isl_keep isl_map * Schedule,__isl_keep isl_id_to_ast_expr * NewAccesses)650 static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt, 651 std::vector<LoopToScevMapT> &VLTS, 652 __isl_keep isl_map *Schedule, 653 __isl_keep isl_id_to_ast_expr *NewAccesses) { 654 VectorBlockGenerator Generator(BlockGen, VLTS, Schedule); 655 Generator.copyStmt(Stmt, NewAccesses); 656 } 657 658 private: 659 // This is a vector of loop->scev maps. The first map is used for the first 660 // vector lane, ... 661 // Each map, contains information about Instructions in the old ScoP, which 662 // are recalculated in the new SCoP. When copying the basic block, we replace 663 // all references to the old instructions with their recalculated values. 664 // 665 // For example, when the code generator produces this AST: 666 // 667 // for (int c1 = 0; c1 <= 1023; c1 += 1) 668 // for (int c2 = 0; c2 <= 1023; c2 += VF) 669 // for (int lane = 0; lane <= VF; lane += 1) 670 // Stmt(c2 + lane + 3, c1); 671 // 672 // VLTS[lane] contains a map: 673 // "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"), 674 // "inner loop in the old loop nest" -> SCEV("c1"). 675 std::vector<LoopToScevMapT> &VLTS; 676 677 // A map from the statement to a schedule where the innermost dimension is the 678 // dimension of the innermost loop containing the statement. 679 isl_map *Schedule; 680 681 VectorBlockGenerator(BlockGenerator &BlockGen, 682 std::vector<LoopToScevMapT> &VLTS, 683 __isl_keep isl_map *Schedule); 684 685 int getVectorWidth(); 686 687 Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap, 688 VectorValueMapT &ScalarMaps, Loop *L); 689 690 /// Load a vector from a set of adjacent scalars 691 /// 692 /// In case a set of scalars is known to be next to each other in memory, 693 /// create a vector load that loads those scalars 694 /// 695 /// %vector_ptr= bitcast double* %p to <4 x double>* 696 /// %vec_full = load <4 x double>* %vector_ptr 697 /// 698 /// @param Stmt The statement to code generate. 699 /// @param NegativeStride This is used to indicate a -1 stride. In such 700 /// a case we load the end of a base address and 701 /// shuffle the accesses in reverse order into the 702 /// vector. By default we would do only positive 703 /// strides. 704 /// 705 /// @param NewAccesses A map from memory access ids to new ast 706 /// expressions, which may contain new access 707 /// expressions for certain memory accesses. 708 Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load, 709 VectorValueMapT &ScalarMaps, 710 __isl_keep isl_id_to_ast_expr *NewAccesses, 711 bool NegativeStride); 712 713 /// Load a vector initialized from a single scalar in memory 714 /// 715 /// In case all elements of a vector are initialized to the same 716 /// scalar value, this value is loaded and shuffled into all elements 717 /// of the vector. 718 /// 719 /// %splat_one = load <1 x double>* %p 720 /// %splat = shufflevector <1 x double> %splat_one, <1 x 721 /// double> %splat_one, <4 x i32> zeroinitializer 722 /// 723 /// @param NewAccesses A map from memory access ids to new ast expressions, 724 /// which may contain new access expressions for certain 725 /// memory accesses. 726 Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load, 727 ValueMapT &BBMap, 728 __isl_keep isl_id_to_ast_expr *NewAccesses); 729 730 /// Load a vector from scalars distributed in memory 731 /// 732 /// In case some scalars a distributed randomly in memory. Create a vector 733 /// by loading each scalar and by inserting one after the other into the 734 /// vector. 735 /// 736 /// %scalar_1= load double* %p_1 737 /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 738 /// %scalar 2 = load double* %p_2 739 /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 740 /// 741 /// @param NewAccesses A map from memory access ids to new ast expressions, 742 /// which may contain new access expressions for certain 743 /// memory accesses. 744 Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load, 745 VectorValueMapT &ScalarMaps, 746 __isl_keep isl_id_to_ast_expr *NewAccesses); 747 748 /// @param NewAccesses A map from memory access ids to new ast expressions, 749 /// which may contain new access expressions for certain 750 /// memory accesses. 751 void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, 752 VectorValueMapT &ScalarMaps, 753 __isl_keep isl_id_to_ast_expr *NewAccesses); 754 755 void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, 756 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); 757 758 void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, 759 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); 760 761 /// @param NewAccesses A map from memory access ids to new ast expressions, 762 /// which may contain new access expressions for certain 763 /// memory accesses. 764 void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, 765 VectorValueMapT &ScalarMaps, 766 __isl_keep isl_id_to_ast_expr *NewAccesses); 767 768 /// @param NewAccesses A map from memory access ids to new ast expressions, 769 /// which may contain new access expressions for certain 770 /// memory accesses. 771 void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst, 772 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, 773 __isl_keep isl_id_to_ast_expr *NewAccesses); 774 775 bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap, 776 VectorValueMapT &ScalarMaps); 777 778 bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); 779 780 /// Generate vector loads for scalars. 781 /// 782 /// @param Stmt The scop statement for which to generate the loads. 783 /// @param VectorBlockMap A map that will be updated to relate the original 784 /// values with the newly generated vector loads. 785 void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap); 786 787 /// Verify absence of scalar stores. 788 /// 789 /// @param Stmt The scop statement to check for scalar stores. 790 void verifyNoScalarStores(ScopStmt &Stmt); 791 792 /// @param NewAccesses A map from memory access ids to new ast expressions, 793 /// which may contain new access expressions for certain 794 /// memory accesses. 795 void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, 796 VectorValueMapT &ScalarMaps, 797 __isl_keep isl_id_to_ast_expr *NewAccesses); 798 799 /// @param NewAccesses A map from memory access ids to new ast expressions, 800 /// which may contain new access expressions for certain 801 /// memory accesses. 802 void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses); 803 }; 804 805 /// Generator for new versions of polyhedral region statements. 806 class RegionGenerator : public BlockGenerator { 807 public: 808 /// Create a generator for regions. 809 /// 810 /// @param BlockGen A generator for basic blocks. RegionGenerator(BlockGenerator & BlockGen)811 RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {} 812 ~RegionGenerator()813 virtual ~RegionGenerator() {} 814 815 /// Copy the region statement @p Stmt. 816 /// 817 /// This copies the entire region represented by @p Stmt and updates 818 /// references to old values with references to new values, as defined by 819 /// GlobalMap. 820 /// 821 /// @param Stmt The statement to code generate. 822 /// @param LTS A map from old loops to new induction variables as SCEVs. 823 void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 824 __isl_keep isl_id_to_ast_expr *IdToAstExp); 825 826 private: 827 /// A map from old to the first new block in the region, that was created to 828 /// model the old basic block. 829 DenseMap<BasicBlock *, BasicBlock *> StartBlockMap; 830 831 /// A map from old to the last new block in the region, that was created to 832 /// model the old basic block. 833 DenseMap<BasicBlock *, BasicBlock *> EndBlockMap; 834 835 /// The "BBMaps" for the whole region (one for each block). In case a basic 836 /// block is code generated to multiple basic blocks (e.g., for partial 837 /// writes), the StartBasic is used as index for the RegionMap. 838 DenseMap<BasicBlock *, ValueMapT> RegionMaps; 839 840 /// Mapping to remember PHI nodes that still need incoming values. 841 using PHINodePairTy = std::pair<PHINode *, PHINode *>; 842 DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap; 843 844 /// Repair the dominance tree after we created a copy block for @p BB. 845 /// 846 /// @returns The immediate dominator in the DT for @p BBCopy if in the region. 847 BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy); 848 849 /// Add the new operand from the copy of @p IncomingBB to @p PHICopy. 850 /// 851 /// PHI nodes, which may have (multiple) edges that enter from outside the 852 /// non-affine subregion and even from outside the scop, are code generated as 853 /// follows: 854 /// 855 /// # Original 856 /// 857 /// Region: %A-> %exit 858 /// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC) 859 /// 860 /// pre: 861 /// %val = add i64 1, 1 862 /// 863 /// A: 864 /// br label %nonaff 865 /// 866 /// nonaffB: 867 /// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D] 868 /// %cmp = <nonaff> 869 /// br i1 %cmp, label %C, label %nonaffC 870 /// 871 /// nonaffC: 872 /// %valC = add i64 1, 1 873 /// br i1 undef, label %D, label %nonaffB 874 /// 875 /// D: 876 /// %valD = ... 877 /// %exit_cond = <loopexit> 878 /// br i1 %exit_cond, label %nonaffB, label %exit 879 /// 880 /// exit: 881 /// ... 882 /// 883 /// - %start and %C enter from outside the non-affine region. 884 /// - %nonaffC enters from within the non-affine region. 885 /// 886 /// # New 887 /// 888 /// polly.A: 889 /// store i64 %val, i64* %phi.phiops 890 /// br label %polly.nonaffA.entry 891 /// 892 /// polly.nonaffB.entry: 893 /// %phi.phiops.reload = load i64, i64* %phi.phiops 894 /// br label %nonaffB 895 /// 896 /// polly.nonaffB: 897 /// %polly.phi = [%phi.phiops.reload, %nonaffB.entry], 898 /// [%p.valC, %polly.nonaffC] 899 /// 900 /// polly.nonaffC: 901 /// %p.valC = add i64 1, 1 902 /// br i1 undef, label %polly.D, label %polly.nonaffB 903 /// 904 /// polly.D: 905 /// %p.valD = ... 906 /// store i64 %p.valD, i64* %phi.phiops 907 /// %p.exit_cond = <loopexit> 908 /// br i1 %p.exit_cond, label %polly.nonaffB, label %exit 909 /// 910 /// Values that enter the PHI from outside the non-affine region are stored 911 /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and 912 /// reloaded in %polly.nonaffB.entry, a basic block generated before the 913 /// actual non-affine region. 914 /// 915 /// When generating the PHI node of the non-affine region in %polly.nonaffB, 916 /// incoming edges from outside the region are combined into a single branch 917 /// from %polly.nonaffB.entry which has as incoming value the value reloaded 918 /// from the %phi.phiops stack slot. Incoming edges from within the region 919 /// refer to the copied instructions (%p.valC) and basic blocks 920 /// (%polly.nonaffC) of the non-affine region. 921 /// 922 /// @param Stmt The statement to code generate. 923 /// @param PHI The original PHI we copy. 924 /// @param PHICopy The copy of @p PHI. 925 /// @param IncomingBB An incoming block of @p PHI. 926 /// @param LTS A map from old loops to new induction variables as 927 /// SCEVs. 928 void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy, 929 BasicBlock *IncomingBB, LoopToScevMapT <S); 930 931 /// Create a PHI that combines the incoming values from all incoming blocks 932 /// that are in the subregion. 933 /// 934 /// PHIs in the subregion's exit block can have incoming edges from within and 935 /// outside the subregion. This function combines the incoming values from 936 /// within the subregion to appear as if there is only one incoming edge from 937 /// the subregion (an additional exit block is created by RegionGenerator). 938 /// This is to avoid that a value is written to the .phiops location without 939 /// leaving the subregion because the exiting block as an edge back into the 940 /// subregion. 941 /// 942 /// @param MA The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in 943 /// the subregion's exit block. 944 /// @param LTS Virtual induction variable mapping. 945 /// @param BBMap A mapping from old values to their new values in this block. 946 /// @param L Loop surrounding this region statement. 947 /// 948 /// @returns The constructed PHI node. 949 PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap, 950 Loop *L); 951 952 /// @param Return the new value of a scalar write, creating a PHINode if 953 /// necessary. 954 /// 955 /// @param MA A scalar WRITE MemoryAccess. 956 /// @param LTS Virtual induction variable mapping. 957 /// @param BBMap A mapping from old values to their new values in this block. 958 /// 959 /// @returns The effective value of @p MA's written value when leaving the 960 /// subregion. 961 /// @see buildExitPHI 962 Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap); 963 964 /// Generate the scalar stores for the given statement. 965 /// 966 /// After the statement @p Stmt was copied all inner-SCoP scalar dependences 967 /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to 968 /// be demoted to memory. 969 /// 970 /// @param Stmt The statement we generate code for. 971 /// @param LTS A mapping from loops virtual canonical induction variable to 972 /// their new values (for values recalculated in the new ScoP, 973 /// but not within this basic block) 974 /// @param BBMap A mapping from old values to their new values in this block. 975 /// @param LTS A mapping from loops virtual canonical induction variable to 976 /// their new values. 977 virtual void 978 generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp, 979 __isl_keep isl_id_to_ast_expr *NewAccesses) override; 980 981 /// Copy a single PHI instruction. 982 /// 983 /// This copies a single PHI instruction and updates references to old values 984 /// with references to new values, as defined by GlobalMap and BBMap. 985 /// 986 /// @param Stmt The statement to code generate. 987 /// @param PHI The PHI instruction to copy. 988 /// @param BBMap A mapping from old values to their new values 989 /// (for values recalculated within this basic block). 990 /// @param LTS A map from old loops to new induction variables as SCEVs. 991 virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst, 992 ValueMapT &BBMap, 993 LoopToScevMapT <S) override; 994 }; 995 } // namespace polly 996 #endif 997