1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient 10 // way to create LLVM instructions for OpenMP directives. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 16 17 #include "llvm/Frontend/OpenMP/OMPConstants.h" 18 #include "llvm/IR/DebugLoc.h" 19 #include "llvm/IR/IRBuilder.h" 20 #include "llvm/Support/Allocator.h" 21 #include <forward_list> 22 23 namespace llvm { 24 class CanonicalLoopInfo; 25 26 /// Move the instruction after an InsertPoint to the beginning of another 27 /// BasicBlock. 28 /// 29 /// The instructions after \p IP are moved to the beginning of \p New which must 30 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to 31 /// \p New will be added such that there is no semantic change. Otherwise, the 32 /// \p IP insert block remains degenerate and it is up to the caller to insert a 33 /// terminator. 34 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, 35 bool CreateBranch); 36 37 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new 38 /// insert location will stick to after the instruction before the insertion 39 /// point (instead of moving with the instruction the InsertPoint stores 40 /// internally). 41 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); 42 43 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate 44 /// (missing the terminator). 45 /// 46 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed 47 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch 48 /// is true, a branch to the new successor will new created such that 49 /// semantically there is no change; otherwise the block of the insertion point 50 /// remains degenerate and it is the caller's responsibility to insert a 51 /// terminator. Returns the new successor block. 52 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, 53 llvm::Twine Name = {}); 54 55 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 56 /// degenerate (missing the terminator). Its new insert location will stick to 57 /// after the instruction before the insertion point (instead of moving with the 58 /// instruction the InsertPoint stores internally). 59 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, 60 llvm::Twine Name = {}); 61 62 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 63 /// degenerate (missing the terminator). Its new insert location will stick to 64 /// after the instruction before the insertion point (instead of moving with the 65 /// instruction the InsertPoint stores internally). 66 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); 67 68 /// Like splitBB, but reuses the current block's name for the new name. 69 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, 70 llvm::Twine Suffix = ".split"); 71 72 /// An interface to create LLVM-IR for OpenMP directives. 73 /// 74 /// Each OpenMP directive has a corresponding public generator method. 75 class OpenMPIRBuilder { 76 public: 77 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will 78 /// not have an effect on \p M (see initialize). 79 OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} 80 ~OpenMPIRBuilder(); 81 82 /// Initialize the internal state, this will put structures types and 83 /// potentially other helpers into the underlying module. Must be called 84 /// before any other method and only once! 85 void initialize(); 86 87 /// Finalize the underlying module, e.g., by outlining regions. 88 /// \param Fn The function to be finalized. If not used, 89 /// all functions are finalized. 90 void finalize(Function *Fn = nullptr); 91 92 /// Add attributes known for \p FnID to \p Fn. 93 void addAttributes(omp::RuntimeFunction FnID, Function &Fn); 94 95 /// Type used throughout for insertion points. 96 using InsertPointTy = IRBuilder<>::InsertPoint; 97 98 /// Callback type for variable finalization (think destructors). 99 /// 100 /// \param CodeGenIP is the insertion point at which the finalization code 101 /// should be placed. 102 /// 103 /// A finalize callback knows about all objects that need finalization, e.g. 104 /// destruction, when the scope of the currently generated construct is left 105 /// at the time, and location, the callback is invoked. 106 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; 107 108 struct FinalizationInfo { 109 /// The finalization callback provided by the last in-flight invocation of 110 /// createXXXX for the directive of kind DK. 111 FinalizeCallbackTy FiniCB; 112 113 /// The directive kind of the innermost directive that has an associated 114 /// region which might require finalization when it is left. 115 omp::Directive DK; 116 117 /// Flag to indicate if the directive is cancellable. 118 bool IsCancellable; 119 }; 120 121 /// Push a finalization callback on the finalization stack. 122 /// 123 /// NOTE: Temporary solution until Clang CG is gone. 124 void pushFinalizationCB(const FinalizationInfo &FI) { 125 FinalizationStack.push_back(FI); 126 } 127 128 /// Pop the last finalization callback from the finalization stack. 129 /// 130 /// NOTE: Temporary solution until Clang CG is gone. 131 void popFinalizationCB() { FinalizationStack.pop_back(); } 132 133 /// Callback type for body (=inner region) code generation 134 /// 135 /// The callback takes code locations as arguments, each describing a 136 /// location where additional instructions can be inserted. 137 /// 138 /// The CodeGenIP may be in the middle of a basic block or point to the end of 139 /// it. The basic block may have a terminator or be degenerate. The callback 140 /// function may just insert instructions at that position, but also split the 141 /// block (without the Before argument of BasicBlock::splitBasicBlock such 142 /// that the identify of the split predecessor block is preserved) and insert 143 /// additional control flow, including branches that do not lead back to what 144 /// follows the CodeGenIP. Note that since the callback is allowed to split 145 /// the block, callers must assume that InsertPoints to positions in the 146 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If 147 /// such InsertPoints need to be preserved, it can split the block itself 148 /// before calling the callback. 149 /// 150 /// AllocaIP and CodeGenIP must not point to the same position. 151 /// 152 /// \param AllocaIP is the insertion point at which new alloca instructions 153 /// should be placed. The BasicBlock it is pointing to must 154 /// not be split. 155 /// \param CodeGenIP is the insertion point at which the body code should be 156 /// placed. 157 using BodyGenCallbackTy = 158 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 159 160 // This is created primarily for sections construct as llvm::function_ref 161 // (BodyGenCallbackTy) is not storable (as described in the comments of 162 // function_ref class - function_ref contains non-ownable reference 163 // to the callable. 164 using StorableBodyGenCallbackTy = 165 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 166 167 /// Callback type for loop body code generation. 168 /// 169 /// \param CodeGenIP is the insertion point where the loop's body code must be 170 /// placed. This will be a dedicated BasicBlock with a 171 /// conditional branch from the loop condition check and 172 /// terminated with an unconditional branch to the loop 173 /// latch. 174 /// \param IndVar is the induction variable usable at the insertion point. 175 using LoopBodyGenCallbackTy = 176 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; 177 178 /// Callback type for variable privatization (think copy & default 179 /// constructor). 180 /// 181 /// \param AllocaIP is the insertion point at which new alloca instructions 182 /// should be placed. 183 /// \param CodeGenIP is the insertion point at which the privatization code 184 /// should be placed. 185 /// \param Original The value being copied/created, should not be used in the 186 /// generated IR. 187 /// \param Inner The equivalent of \p Original that should be used in the 188 /// generated IR; this is equal to \p Original if the value is 189 /// a pointer and can thus be passed directly, otherwise it is 190 /// an equivalent but different value. 191 /// \param ReplVal The replacement value, thus a copy or new created version 192 /// of \p Inner. 193 /// 194 /// \returns The new insertion point where code generation continues and 195 /// \p ReplVal the replacement value. 196 using PrivatizeCallbackTy = function_ref<InsertPointTy( 197 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, 198 Value &Inner, Value *&ReplVal)>; 199 200 /// Description of a LLVM-IR insertion point (IP) and a debug/source location 201 /// (filename, line, column, ...). 202 struct LocationDescription { 203 LocationDescription(const IRBuilderBase &IRB) 204 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} 205 LocationDescription(const InsertPointTy &IP) : IP(IP) {} 206 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) 207 : IP(IP), DL(DL) {} 208 InsertPointTy IP; 209 DebugLoc DL; 210 }; 211 212 /// Emitter methods for OpenMP directives. 213 /// 214 ///{ 215 216 /// Generator for '#omp barrier' 217 /// 218 /// \param Loc The location where the barrier directive was encountered. 219 /// \param DK The kind of directive that caused the barrier. 220 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 221 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 222 /// should be checked and acted upon. 223 /// 224 /// \returns The insertion point after the barrier. 225 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, 226 bool ForceSimpleCall = false, 227 bool CheckCancelFlag = true); 228 229 /// Generator for '#omp cancel' 230 /// 231 /// \param Loc The location where the directive was encountered. 232 /// \param IfCondition The evaluated 'if' clause expression, if any. 233 /// \param CanceledDirective The kind of directive that is cancled. 234 /// 235 /// \returns The insertion point after the barrier. 236 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, 237 omp::Directive CanceledDirective); 238 239 /// Generator for '#omp parallel' 240 /// 241 /// \param Loc The insert and source location description. 242 /// \param AllocaIP The insertion points to be used for alloca instructions. 243 /// \param BodyGenCB Callback that will generate the region code. 244 /// \param PrivCB Callback to copy a given variable (think copy constructor). 245 /// \param FiniCB Callback to finalize variable copies. 246 /// \param IfCondition The evaluated 'if' clause expression, if any. 247 /// \param NumThreads The evaluated 'num_threads' clause expression, if any. 248 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). 249 /// \param IsCancellable Flag to indicate a cancellable parallel region. 250 /// 251 /// \returns The insertion position *after* the parallel. 252 IRBuilder<>::InsertPoint 253 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, 254 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 255 FinalizeCallbackTy FiniCB, Value *IfCondition, 256 Value *NumThreads, omp::ProcBindKind ProcBind, 257 bool IsCancellable); 258 259 /// Generator for the control flow structure of an OpenMP canonical loop. 260 /// 261 /// This generator operates on the logical iteration space of the loop, i.e. 262 /// the caller only has to provide a loop trip count of the loop as defined by 263 /// base language semantics. The trip count is interpreted as an unsigned 264 /// integer. The induction variable passed to \p BodyGenCB will be of the same 265 /// type and run from 0 to \p TripCount - 1. It is up to the callback to 266 /// convert the logical iteration variable to the loop counter variable in the 267 /// loop body. 268 /// 269 /// \param Loc The insert and source location description. The insert 270 /// location can be between two instructions or the end of a 271 /// degenerate block (e.g. a BB under construction). 272 /// \param BodyGenCB Callback that will generate the loop body code. 273 /// \param TripCount Number of iterations the loop body is executed. 274 /// \param Name Base name used to derive BB and instruction names. 275 /// 276 /// \returns An object representing the created control flow structure which 277 /// can be used for loop-associated directives. 278 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 279 LoopBodyGenCallbackTy BodyGenCB, 280 Value *TripCount, 281 const Twine &Name = "loop"); 282 283 /// Generator for the control flow structure of an OpenMP canonical loop. 284 /// 285 /// Instead of a logical iteration space, this allows specifying user-defined 286 /// loop counter values using increment, upper- and lower bounds. To 287 /// disambiguate the terminology when counting downwards, instead of lower 288 /// bounds we use \p Start for the loop counter value in the first body 289 /// iteration. 290 /// 291 /// Consider the following limitations: 292 /// 293 /// * A loop counter space over all integer values of its bit-width cannot be 294 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be 295 /// stored into an 8 bit integer): 296 /// 297 /// DO I = 0, 255, 1 298 /// 299 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. 300 /// effectively counting downwards: 301 /// 302 /// for (uint8_t i = 100u; i > 0; i += 127u) 303 /// 304 /// 305 /// TODO: May need to add additional parameters to represent: 306 /// 307 /// * Allow representing downcounting with unsigned integers. 308 /// 309 /// * Sign of the step and the comparison operator might disagree: 310 /// 311 /// for (int i = 0; i < 42; i -= 1u) 312 /// 313 // 314 /// \param Loc The insert and source location description. 315 /// \param BodyGenCB Callback that will generate the loop body code. 316 /// \param Start Value of the loop counter for the first iterations. 317 /// \param Stop Loop counter values past this will stop the loop. 318 /// \param Step Loop counter increment after each iteration; negative 319 /// means counting down. 320 /// \param IsSigned Whether Start, Stop and Step are signed integers. 321 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop 322 /// counter. 323 /// \param ComputeIP Insertion point for instructions computing the trip 324 /// count. Can be used to ensure the trip count is available 325 /// at the outermost loop of a loop nest. If not set, 326 /// defaults to the preheader of the generated loop. 327 /// \param Name Base name used to derive BB and instruction names. 328 /// 329 /// \returns An object representing the created control flow structure which 330 /// can be used for loop-associated directives. 331 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 332 LoopBodyGenCallbackTy BodyGenCB, 333 Value *Start, Value *Stop, Value *Step, 334 bool IsSigned, bool InclusiveStop, 335 InsertPointTy ComputeIP = {}, 336 const Twine &Name = "loop"); 337 338 /// Collapse a loop nest into a single loop. 339 /// 340 /// Merges loops of a loop nest into a single CanonicalLoopNest representation 341 /// that has the same number of innermost loop iterations as the origin loop 342 /// nest. The induction variables of the input loops are derived from the 343 /// collapsed loop's induction variable. This is intended to be used to 344 /// implement OpenMP's collapse clause. Before applying a directive, 345 /// collapseLoops normalizes a loop nest to contain only a single loop and the 346 /// directive's implementation does not need to handle multiple loops itself. 347 /// This does not remove the need to handle all loop nest handling by 348 /// directives, such as the ordered(<n>) clause or the simd schedule-clause 349 /// modifier of the worksharing-loop directive. 350 /// 351 /// Example: 352 /// \code 353 /// for (int i = 0; i < 7; ++i) // Canonical loop "i" 354 /// for (int j = 0; j < 9; ++j) // Canonical loop "j" 355 /// body(i, j); 356 /// \endcode 357 /// 358 /// After collapsing with Loops={i,j}, the loop is changed to 359 /// \code 360 /// for (int ij = 0; ij < 63; ++ij) { 361 /// int i = ij / 9; 362 /// int j = ij % 9; 363 /// body(i, j); 364 /// } 365 /// \endcode 366 /// 367 /// In the current implementation, the following limitations apply: 368 /// 369 /// * All input loops have an induction variable of the same type. 370 /// 371 /// * The collapsed loop will have the same trip count integer type as the 372 /// input loops. Therefore it is possible that the collapsed loop cannot 373 /// represent all iterations of the input loops. For instance, assuming a 374 /// 32 bit integer type, and two input loops both iterating 2^16 times, the 375 /// theoretical trip count of the collapsed loop would be 2^32 iteration, 376 /// which cannot be represented in an 32-bit integer. Behavior is undefined 377 /// in this case. 378 /// 379 /// * The trip counts of every input loop must be available at \p ComputeIP. 380 /// Non-rectangular loops are not yet supported. 381 /// 382 /// * At each nest level, code between a surrounding loop and its nested loop 383 /// is hoisted into the loop body, and such code will be executed more 384 /// often than before collapsing (or not at all if any inner loop iteration 385 /// has a trip count of 0). This is permitted by the OpenMP specification. 386 /// 387 /// \param DL Debug location for instructions added for collapsing, 388 /// such as instructions to compute/derive the input loop's 389 /// induction variables. 390 /// \param Loops Loops in the loop nest to collapse. Loops are specified 391 /// from outermost-to-innermost and every control flow of a 392 /// loop's body must pass through its directly nested loop. 393 /// \param ComputeIP Where additional instruction that compute the collapsed 394 /// trip count. If not set, defaults to before the generated 395 /// loop. 396 /// 397 /// \returns The CanonicalLoopInfo object representing the collapsed loop. 398 CanonicalLoopInfo *collapseLoops(DebugLoc DL, 399 ArrayRef<CanonicalLoopInfo *> Loops, 400 InsertPointTy ComputeIP); 401 402 private: 403 /// Modifies the canonical loop to be a statically-scheduled workshare loop. 404 /// 405 /// This takes a \p LoopInfo representing a canonical loop, such as the one 406 /// created by \p createCanonicalLoop and emits additional instructions to 407 /// turn it into a workshare loop. In particular, it calls to an OpenMP 408 /// runtime function in the preheader to obtain the loop bounds to be used in 409 /// the current thread, updates the relevant instructions in the canonical 410 /// loop and calls to an OpenMP runtime finalization function after the loop. 411 /// 412 /// \param DL Debug location for instructions added for the 413 /// workshare-loop construct itself. 414 /// \param CLI A descriptor of the canonical loop to workshare. 415 /// \param AllocaIP An insertion point for Alloca instructions usable in the 416 /// preheader of the loop. 417 /// \param NeedsBarrier Indicates whether a barrier must be inserted after 418 /// the loop. 419 /// 420 /// \returns Point where to insert code after the workshare construct. 421 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 422 InsertPointTy AllocaIP, 423 bool NeedsBarrier); 424 425 /// Modifies the canonical loop a statically-scheduled workshare loop with a 426 /// user-specified chunk size. 427 /// 428 /// \param DL Debug location for instructions added for the 429 /// workshare-loop construct itself. 430 /// \param CLI A descriptor of the canonical loop to workshare. 431 /// \param AllocaIP An insertion point for Alloca instructions usable in 432 /// the preheader of the loop. 433 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the 434 /// loop. 435 /// \param ChunkSize The user-specified chunk size. 436 /// 437 /// \returns Point where to insert code after the workshare construct. 438 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, 439 CanonicalLoopInfo *CLI, 440 InsertPointTy AllocaIP, 441 bool NeedsBarrier, 442 Value *ChunkSize); 443 444 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. 445 /// 446 /// This takes a \p LoopInfo representing a canonical loop, such as the one 447 /// created by \p createCanonicalLoop and emits additional instructions to 448 /// turn it into a workshare loop. In particular, it calls to an OpenMP 449 /// runtime function in the preheader to obtain, and then in each iteration 450 /// to update the loop counter. 451 /// 452 /// \param DL Debug location for instructions added for the 453 /// workshare-loop construct itself. 454 /// \param CLI A descriptor of the canonical loop to workshare. 455 /// \param AllocaIP An insertion point for Alloca instructions usable in the 456 /// preheader of the loop. 457 /// \param SchedType Type of scheduling to be passed to the init function. 458 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 459 /// the loop. 460 /// \param Chunk The size of loop chunk considered as a unit when 461 /// scheduling. If \p nullptr, defaults to 1. 462 /// 463 /// \returns Point where to insert code after the workshare construct. 464 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 465 InsertPointTy AllocaIP, 466 omp::OMPScheduleType SchedType, 467 bool NeedsBarrier, 468 Value *Chunk = nullptr); 469 470 public: 471 /// Modifies the canonical loop to be a workshare loop. 472 /// 473 /// This takes a \p LoopInfo representing a canonical loop, such as the one 474 /// created by \p createCanonicalLoop and emits additional instructions to 475 /// turn it into a workshare loop. In particular, it calls to an OpenMP 476 /// runtime function in the preheader to obtain the loop bounds to be used in 477 /// the current thread, updates the relevant instructions in the canonical 478 /// loop and calls to an OpenMP runtime finalization function after the loop. 479 /// 480 /// The concrete transformation is done by applyStaticWorkshareLoop, 481 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending 482 /// on the value of \p SchedKind and \p ChunkSize. 483 /// 484 /// \param DL Debug location for instructions added for the 485 /// workshare-loop construct itself. 486 /// \param CLI A descriptor of the canonical loop to workshare. 487 /// \param AllocaIP An insertion point for Alloca instructions usable in the 488 /// preheader of the loop. 489 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 490 /// the loop. 491 /// \param SchedKind Scheduling algorithm to use. 492 /// \param ChunkSize The chunk size for the inner loop. 493 /// \param HasSimdModifier Whether the simd modifier is present in the 494 /// schedule clause. 495 /// \param HasMonotonicModifier Whether the monotonic modifier is present in 496 /// the schedule clause. 497 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is 498 /// present in the schedule clause. 499 /// \param HasOrderedClause Whether the (parameterless) ordered clause is 500 /// present. 501 /// 502 /// \returns Point where to insert code after the workshare construct. 503 InsertPointTy applyWorkshareLoop( 504 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, 505 bool NeedsBarrier, 506 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, 507 Value *ChunkSize = nullptr, bool HasSimdModifier = false, 508 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, 509 bool HasOrderedClause = false); 510 511 /// Tile a loop nest. 512 /// 513 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in 514 /// \p/ Loops must be perfectly nested, from outermost to innermost loop 515 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value 516 /// of every loop and every tile sizes must be usable in the outermost 517 /// loop's preheader. This implies that the loop nest is rectangular. 518 /// 519 /// Example: 520 /// \code 521 /// for (int i = 0; i < 15; ++i) // Canonical loop "i" 522 /// for (int j = 0; j < 14; ++j) // Canonical loop "j" 523 /// body(i, j); 524 /// \endcode 525 /// 526 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to 527 /// \code 528 /// for (int i1 = 0; i1 < 3; ++i1) 529 /// for (int j1 = 0; j1 < 2; ++j1) 530 /// for (int i2 = 0; i2 < 5; ++i2) 531 /// for (int j2 = 0; j2 < 7; ++j2) 532 /// body(i1*3+i2, j1*3+j2); 533 /// \endcode 534 /// 535 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are 536 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also 537 /// handles non-constant trip counts, non-constant tile sizes and trip counts 538 /// that are not multiples of the tile size. In the latter case the tile loop 539 /// of the last floor-loop iteration will have fewer iterations than specified 540 /// as its tile size. 541 /// 542 /// 543 /// @param DL Debug location for instructions added by tiling, for 544 /// instance the floor- and tile trip count computation. 545 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are 546 /// invalidated by this method, i.e. should not used after 547 /// tiling. 548 /// @param TileSizes For each loop in \p Loops, the tile size for that 549 /// dimensions. 550 /// 551 /// \returns A list of generated loops. Contains twice as many loops as the 552 /// input loop nest; the first half are the floor loops and the 553 /// second half are the tile loops. 554 std::vector<CanonicalLoopInfo *> 555 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 556 ArrayRef<Value *> TileSizes); 557 558 /// Fully unroll a loop. 559 /// 560 /// Instead of unrolling the loop immediately (and duplicating its body 561 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop 562 /// metadata. 563 /// 564 /// \param DL Debug location for instructions added by unrolling. 565 /// \param Loop The loop to unroll. The loop will be invalidated. 566 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); 567 568 /// Fully or partially unroll a loop. How the loop is unrolled is determined 569 /// using LLVM's LoopUnrollPass. 570 /// 571 /// \param DL Debug location for instructions added by unrolling. 572 /// \param Loop The loop to unroll. The loop will be invalidated. 573 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); 574 575 /// Partially unroll a loop. 576 /// 577 /// The CanonicalLoopInfo of the unrolled loop for use with chained 578 /// loop-associated directive can be requested using \p UnrolledCLI. Not 579 /// needing the CanonicalLoopInfo allows more efficient code generation by 580 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. 581 /// A loop-associated directive applied to the unrolled loop needs to know the 582 /// new trip count which means that if using a heuristically determined unroll 583 /// factor (\p Factor == 0), that factor must be computed immediately. We are 584 /// using the same logic as the LoopUnrollPass to derived the unroll factor, 585 /// but which assumes that some canonicalization has taken place (e.g. 586 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform 587 /// better when the unrolled loop's CanonicalLoopInfo is not needed. 588 /// 589 /// \param DL Debug location for instructions added by unrolling. 590 /// \param Loop The loop to unroll. The loop will be invalidated. 591 /// \param Factor The factor to unroll the loop by. A factor of 0 592 /// indicates that a heuristic should be used to determine 593 /// the unroll-factor. 594 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the 595 /// partially unrolled loop. Otherwise, uses loop metadata 596 /// to defer unrolling to the LoopUnrollPass. 597 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, 598 CanonicalLoopInfo **UnrolledCLI); 599 600 /// Add metadata to simd-ize a loop. 601 /// 602 /// \param Loop The loop to simd-ize. 603 /// \param Simdlen The Simdlen length to apply to the simd loop. 604 void applySimd(CanonicalLoopInfo *Loop, ConstantInt *Simdlen); 605 606 /// Generator for '#omp flush' 607 /// 608 /// \param Loc The location where the flush directive was encountered 609 void createFlush(const LocationDescription &Loc); 610 611 /// Generator for '#omp taskwait' 612 /// 613 /// \param Loc The location where the taskwait directive was encountered. 614 void createTaskwait(const LocationDescription &Loc); 615 616 /// Generator for '#omp taskyield' 617 /// 618 /// \param Loc The location where the taskyield directive was encountered. 619 void createTaskyield(const LocationDescription &Loc); 620 621 /// Generator for `#omp task` 622 /// 623 /// \param Loc The location where the task construct was encountered. 624 /// \param AllocaIP The insertion point to be used for alloca instructions. 625 /// \param BodyGenCB Callback that will generate the region code. 626 /// \param Tied True if the task is tied, false if the task is untied. 627 /// \param Final i1 value which is `true` if the task is final, `false` if the 628 /// task is not final. 629 InsertPointTy createTask(const LocationDescription &Loc, 630 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, 631 bool Tied = true, Value *Final = nullptr); 632 633 /// Generator for the taskgroup construct 634 /// 635 /// \param Loc The location where the taskgroup construct was encountered. 636 /// \param AllocaIP The insertion point to be used for alloca instructions. 637 /// \param BodyGenCB Callback that will generate the region code. 638 InsertPointTy createTaskgroup(const LocationDescription &Loc, 639 InsertPointTy AllocaIP, 640 BodyGenCallbackTy BodyGenCB); 641 642 /// Functions used to generate reductions. Such functions take two Values 643 /// representing LHS and RHS of the reduction, respectively, and a reference 644 /// to the value that is updated to refer to the reduction result. 645 using ReductionGenTy = 646 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; 647 648 /// Functions used to generate atomic reductions. Such functions take two 649 /// Values representing pointers to LHS and RHS of the reduction, as well as 650 /// the element type of these pointers. They are expected to atomically 651 /// update the LHS to the reduced value. 652 using AtomicReductionGenTy = 653 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; 654 655 /// Information about an OpenMP reduction. 656 struct ReductionInfo { 657 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, 658 ReductionGenTy ReductionGen, 659 AtomicReductionGenTy AtomicReductionGen) 660 : ElementType(ElementType), Variable(Variable), 661 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), 662 AtomicReductionGen(AtomicReductionGen) { 663 assert(cast<PointerType>(Variable->getType()) 664 ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type"); 665 } 666 667 /// Reduction element type, must match pointee type of variable. 668 Type *ElementType; 669 670 /// Reduction variable of pointer type. 671 Value *Variable; 672 673 /// Thread-private partial reduction variable. 674 Value *PrivateVariable; 675 676 /// Callback for generating the reduction body. The IR produced by this will 677 /// be used to combine two values in a thread-safe context, e.g., under 678 /// lock or within the same thread, and therefore need not be atomic. 679 ReductionGenTy ReductionGen; 680 681 /// Callback for generating the atomic reduction body, may be null. The IR 682 /// produced by this will be used to atomically combine two values during 683 /// reduction. If null, the implementation will use the non-atomic version 684 /// along with the appropriate synchronization mechanisms. 685 AtomicReductionGenTy AtomicReductionGen; 686 }; 687 688 // TODO: provide atomic and non-atomic reduction generators for reduction 689 // operators defined by the OpenMP specification. 690 691 /// Generator for '#omp reduction'. 692 /// 693 /// Emits the IR instructing the runtime to perform the specific kind of 694 /// reductions. Expects reduction variables to have been privatized and 695 /// initialized to reduction-neutral values separately. Emits the calls to 696 /// runtime functions as well as the reduction function and the basic blocks 697 /// performing the reduction atomically and non-atomically. 698 /// 699 /// The code emitted for the following: 700 /// 701 /// \code 702 /// type var_1; 703 /// type var_2; 704 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) 705 /// /* body */; 706 /// \endcode 707 /// 708 /// corresponds to the following sketch. 709 /// 710 /// \code 711 /// void _outlined_par() { 712 /// // N is the number of different reductions. 713 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; 714 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, 715 /// _omp_reduction_func, 716 /// _gomp_critical_user.reduction.var)) { 717 /// case 1: { 718 /// var_1 = var_1 <reduction-op> privatized_var_1; 719 /// var_2 = var_2 <reduction-op> privatized_var_2; 720 /// // ... 721 /// __kmpc_end_reduce(...); 722 /// break; 723 /// } 724 /// case 2: { 725 /// _Atomic<ReductionOp>(var_1, privatized_var_1); 726 /// _Atomic<ReductionOp>(var_2, privatized_var_2); 727 /// // ... 728 /// break; 729 /// } 730 /// default: break; 731 /// } 732 /// } 733 /// 734 /// void _omp_reduction_func(void **lhs, void **rhs) { 735 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; 736 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; 737 /// // ... 738 /// } 739 /// \endcode 740 /// 741 /// \param Loc The location where the reduction was 742 /// encountered. Must be within the associate 743 /// directive and after the last local access to the 744 /// reduction variables. 745 /// \param AllocaIP An insertion point suitable for allocas usable 746 /// in reductions. 747 /// \param ReductionInfos A list of info on each reduction variable. 748 /// \param IsNoWait A flag set if the reduction is marked as nowait. 749 InsertPointTy createReductions(const LocationDescription &Loc, 750 InsertPointTy AllocaIP, 751 ArrayRef<ReductionInfo> ReductionInfos, 752 bool IsNoWait = false); 753 754 ///} 755 756 /// Return the insertion point used by the underlying IRBuilder. 757 InsertPointTy getInsertionPoint() { return Builder.saveIP(); } 758 759 /// Update the internal location to \p Loc. 760 bool updateToLocation(const LocationDescription &Loc) { 761 Builder.restoreIP(Loc.IP); 762 Builder.SetCurrentDebugLocation(Loc.DL); 763 return Loc.IP.getBlock() != nullptr; 764 } 765 766 /// Return the function declaration for the runtime function with \p FnID. 767 FunctionCallee getOrCreateRuntimeFunction(Module &M, 768 omp::RuntimeFunction FnID); 769 770 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); 771 772 /// Return the (LLVM-IR) string describing the source location \p LocStr. 773 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); 774 775 /// Return the (LLVM-IR) string describing the default source location. 776 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); 777 778 /// Return the (LLVM-IR) string describing the source location identified by 779 /// the arguments. 780 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, 781 unsigned Line, unsigned Column, 782 uint32_t &SrcLocStrSize); 783 784 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as 785 /// fallback if \p DL does not specify the function name. 786 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, 787 Function *F = nullptr); 788 789 /// Return the (LLVM-IR) string describing the source location \p Loc. 790 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, 791 uint32_t &SrcLocStrSize); 792 793 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. 794 /// TODO: Create a enum class for the Reserve2Flags 795 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, 796 omp::IdentFlag Flags = omp::IdentFlag(0), 797 unsigned Reserve2Flags = 0); 798 799 /// Create a hidden global flag \p Name in the module with initial value \p 800 /// Value. 801 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); 802 803 /// Create an offloading section struct used to register this global at 804 /// runtime. 805 /// 806 /// Type struct __tgt_offload_entry{ 807 /// void *addr; // Pointer to the offload entry info. 808 /// // (function or global) 809 /// char *name; // Name of the function or global. 810 /// size_t size; // Size of the entry info (0 if it a function). 811 /// int32_t flags; 812 /// int32_t reserved; 813 /// }; 814 /// 815 /// \param Addr The pointer to the global being registered. 816 /// \param Name The symbol name associated with the global. 817 /// \param Size The size in bytes of the global (0 for functions). 818 /// \param Flags Flags associated with the entry. 819 /// \param SectionName The section this entry will be placed at. 820 void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, 821 int32_t Flags, 822 StringRef SectionName = "omp_offloading_entries"); 823 824 /// Generate control flow and cleanup for cancellation. 825 /// 826 /// \param CancelFlag Flag indicating if the cancellation is performed. 827 /// \param CanceledDirective The kind of directive that is cancled. 828 /// \param ExitCB Extra code to be generated in the exit block. 829 void emitCancelationCheckImpl(Value *CancelFlag, 830 omp::Directive CanceledDirective, 831 FinalizeCallbackTy ExitCB = {}); 832 833 /// Generate a target region entry call. 834 /// 835 /// \param Loc The location at which the request originated and is fulfilled. 836 /// \param Return Return value of the created function returned by reference. 837 /// \param DeviceID Identifier for the device via the 'device' clause. 838 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause 839 /// or 0 if unspecified and -1 if there is no 'teams' clause. 840 /// \param NumThreads Number of threads via the 'thread_limit' clause. 841 /// \param HostPtr Pointer to the host-side pointer of the target kernel. 842 /// \param KernelArgs Array of arguments to the kernel. 843 /// \param NoWaitKernelArgs Optional array of arguments to the nowait kernel. 844 InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, 845 Value *Ident, Value *DeviceID, Value *NumTeams, 846 Value *NumThreads, Value *HostPtr, 847 ArrayRef<Value *> KernelArgs, 848 ArrayRef<Value *> NoWaitArgs = {}); 849 850 /// Generate a barrier runtime call. 851 /// 852 /// \param Loc The location at which the request originated and is fulfilled. 853 /// \param DK The directive which caused the barrier 854 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 855 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 856 /// should be checked and acted upon. 857 /// 858 /// \returns The insertion point after the barrier. 859 InsertPointTy emitBarrierImpl(const LocationDescription &Loc, 860 omp::Directive DK, bool ForceSimpleCall, 861 bool CheckCancelFlag); 862 863 /// Generate a flush runtime call. 864 /// 865 /// \param Loc The location at which the request originated and is fulfilled. 866 void emitFlush(const LocationDescription &Loc); 867 868 /// The finalization stack made up of finalize callbacks currently in-flight, 869 /// wrapped into FinalizationInfo objects that reference also the finalization 870 /// target block and the kind of cancellable directive. 871 SmallVector<FinalizationInfo, 8> FinalizationStack; 872 873 /// Return true if the last entry in the finalization stack is of kind \p DK 874 /// and cancellable. 875 bool isLastFinalizationInfoCancellable(omp::Directive DK) { 876 return !FinalizationStack.empty() && 877 FinalizationStack.back().IsCancellable && 878 FinalizationStack.back().DK == DK; 879 } 880 881 /// Generate a taskwait runtime call. 882 /// 883 /// \param Loc The location at which the request originated and is fulfilled. 884 void emitTaskwaitImpl(const LocationDescription &Loc); 885 886 /// Generate a taskyield runtime call. 887 /// 888 /// \param Loc The location at which the request originated and is fulfilled. 889 void emitTaskyieldImpl(const LocationDescription &Loc); 890 891 /// Return the current thread ID. 892 /// 893 /// \param Ident The ident (ident_t*) describing the query origin. 894 Value *getOrCreateThreadID(Value *Ident); 895 896 /// The underlying LLVM-IR module 897 Module &M; 898 899 /// The LLVM-IR Builder used to create IR. 900 IRBuilder<> Builder; 901 902 /// Map to remember source location strings 903 StringMap<Constant *> SrcLocStrMap; 904 905 /// Map to remember existing ident_t*. 906 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; 907 908 /// Helper that contains information about regions we need to outline 909 /// during finalization. 910 struct OutlineInfo { 911 using PostOutlineCBTy = std::function<void(Function &)>; 912 PostOutlineCBTy PostOutlineCB; 913 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; 914 SmallVector<Value *, 2> ExcludeArgsFromAggregate; 915 916 /// Collect all blocks in between EntryBB and ExitBB in both the given 917 /// vector and set. 918 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, 919 SmallVectorImpl<BasicBlock *> &BlockVector); 920 921 /// Return the function that contains the region to be outlined. 922 Function *getFunction() const { return EntryBB->getParent(); } 923 }; 924 925 /// Collection of regions that need to be outlined during finalization. 926 SmallVector<OutlineInfo, 16> OutlineInfos; 927 928 /// Collection of owned canonical loop objects that eventually need to be 929 /// free'd. 930 std::forward_list<CanonicalLoopInfo> LoopInfos; 931 932 /// Add a new region that will be outlined later. 933 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } 934 935 /// An ordered map of auto-generated variables to their unique names. 936 /// It stores variables with the following names: 1) ".gomp_critical_user_" + 937 /// <critical_section_name> + ".var" for "omp critical" directives; 2) 938 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate 939 /// variables. 940 StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars; 941 942 /// Create the global variable holding the offload mappings information. 943 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 944 std::string VarName); 945 946 /// Create the global variable holding the offload names information. 947 GlobalVariable * 948 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 949 std::string VarName); 950 951 struct MapperAllocas { 952 AllocaInst *ArgsBase = nullptr; 953 AllocaInst *Args = nullptr; 954 AllocaInst *ArgSizes = nullptr; 955 }; 956 957 /// Create the allocas instruction used in call to mapper functions. 958 void createMapperAllocas(const LocationDescription &Loc, 959 InsertPointTy AllocaIP, unsigned NumOperands, 960 struct MapperAllocas &MapperAllocas); 961 962 /// Create the call for the target mapper function. 963 /// \param Loc The source location description. 964 /// \param MapperFunc Function to be called. 965 /// \param SrcLocInfo Source location information global. 966 /// \param MaptypesArg The argument types. 967 /// \param MapnamesArg The argument names. 968 /// \param MapperAllocas The AllocaInst used for the call. 969 /// \param DeviceID Device ID for the call. 970 /// \param NumOperands Number of operands in the call. 971 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, 972 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, 973 struct MapperAllocas &MapperAllocas, int64_t DeviceID, 974 unsigned NumOperands); 975 976 public: 977 /// Generator for __kmpc_copyprivate 978 /// 979 /// \param Loc The source location description. 980 /// \param BufSize Number of elements in the buffer. 981 /// \param CpyBuf List of pointers to data to be copied. 982 /// \param CpyFn function to call for copying data. 983 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. 984 /// 985 /// \return The insertion position *after* the CopyPrivate call. 986 987 InsertPointTy createCopyPrivate(const LocationDescription &Loc, 988 llvm::Value *BufSize, llvm::Value *CpyBuf, 989 llvm::Value *CpyFn, llvm::Value *DidIt); 990 991 /// Generator for '#omp single' 992 /// 993 /// \param Loc The source location description. 994 /// \param BodyGenCB Callback that will generate the region code. 995 /// \param FiniCB Callback to finalize variable copies. 996 /// \param IsNowait If false, a barrier is emitted. 997 /// \param DidIt Local variable used as a flag to indicate 'single' thread 998 /// 999 /// \returns The insertion position *after* the single call. 1000 InsertPointTy createSingle(const LocationDescription &Loc, 1001 BodyGenCallbackTy BodyGenCB, 1002 FinalizeCallbackTy FiniCB, bool IsNowait, 1003 llvm::Value *DidIt); 1004 1005 /// Generator for '#omp master' 1006 /// 1007 /// \param Loc The insert and source location description. 1008 /// \param BodyGenCB Callback that will generate the region code. 1009 /// \param FiniCB Callback to finalize variable copies. 1010 /// 1011 /// \returns The insertion position *after* the master. 1012 InsertPointTy createMaster(const LocationDescription &Loc, 1013 BodyGenCallbackTy BodyGenCB, 1014 FinalizeCallbackTy FiniCB); 1015 1016 /// Generator for '#omp masked' 1017 /// 1018 /// \param Loc The insert and source location description. 1019 /// \param BodyGenCB Callback that will generate the region code. 1020 /// \param FiniCB Callback to finialize variable copies. 1021 /// 1022 /// \returns The insertion position *after* the masked. 1023 InsertPointTy createMasked(const LocationDescription &Loc, 1024 BodyGenCallbackTy BodyGenCB, 1025 FinalizeCallbackTy FiniCB, Value *Filter); 1026 1027 /// Generator for '#omp critical' 1028 /// 1029 /// \param Loc The insert and source location description. 1030 /// \param BodyGenCB Callback that will generate the region body code. 1031 /// \param FiniCB Callback to finalize variable copies. 1032 /// \param CriticalName name of the lock used by the critical directive 1033 /// \param HintInst Hint Instruction for hint clause associated with critical 1034 /// 1035 /// \returns The insertion position *after* the critical. 1036 InsertPointTy createCritical(const LocationDescription &Loc, 1037 BodyGenCallbackTy BodyGenCB, 1038 FinalizeCallbackTy FiniCB, 1039 StringRef CriticalName, Value *HintInst); 1040 1041 /// Generator for '#omp ordered depend (source | sink)' 1042 /// 1043 /// \param Loc The insert and source location description. 1044 /// \param AllocaIP The insertion point to be used for alloca instructions. 1045 /// \param NumLoops The number of loops in depend clause. 1046 /// \param StoreValues The value will be stored in vector address. 1047 /// \param Name The name of alloca instruction. 1048 /// \param IsDependSource If true, depend source; otherwise, depend sink. 1049 /// 1050 /// \return The insertion position *after* the ordered. 1051 InsertPointTy createOrderedDepend(const LocationDescription &Loc, 1052 InsertPointTy AllocaIP, unsigned NumLoops, 1053 ArrayRef<llvm::Value *> StoreValues, 1054 const Twine &Name, bool IsDependSource); 1055 1056 /// Generator for '#omp ordered [threads | simd]' 1057 /// 1058 /// \param Loc The insert and source location description. 1059 /// \param BodyGenCB Callback that will generate the region code. 1060 /// \param FiniCB Callback to finalize variable copies. 1061 /// \param IsThreads If true, with threads clause or without clause; 1062 /// otherwise, with simd clause; 1063 /// 1064 /// \returns The insertion position *after* the ordered. 1065 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, 1066 BodyGenCallbackTy BodyGenCB, 1067 FinalizeCallbackTy FiniCB, 1068 bool IsThreads); 1069 1070 /// Generator for '#omp sections' 1071 /// 1072 /// \param Loc The insert and source location description. 1073 /// \param AllocaIP The insertion points to be used for alloca instructions. 1074 /// \param SectionCBs Callbacks that will generate body of each section. 1075 /// \param PrivCB Callback to copy a given variable (think copy constructor). 1076 /// \param FiniCB Callback to finalize variable copies. 1077 /// \param IsCancellable Flag to indicate a cancellable parallel region. 1078 /// \param IsNowait If true, barrier - to ensure all sections are executed 1079 /// before moving forward will not be generated. 1080 /// \returns The insertion position *after* the sections. 1081 InsertPointTy createSections(const LocationDescription &Loc, 1082 InsertPointTy AllocaIP, 1083 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, 1084 PrivatizeCallbackTy PrivCB, 1085 FinalizeCallbackTy FiniCB, bool IsCancellable, 1086 bool IsNowait); 1087 1088 /// Generator for '#omp section' 1089 /// 1090 /// \param Loc The insert and source location description. 1091 /// \param BodyGenCB Callback that will generate the region body code. 1092 /// \param FiniCB Callback to finalize variable copies. 1093 /// \returns The insertion position *after* the section. 1094 InsertPointTy createSection(const LocationDescription &Loc, 1095 BodyGenCallbackTy BodyGenCB, 1096 FinalizeCallbackTy FiniCB); 1097 1098 /// Generate conditional branch and relevant BasicBlocks through which private 1099 /// threads copy the 'copyin' variables from Master copy to threadprivate 1100 /// copies. 1101 /// 1102 /// \param IP insertion block for copyin conditional 1103 /// \param MasterVarPtr a pointer to the master variable 1104 /// \param PrivateVarPtr a pointer to the threadprivate variable 1105 /// \param IntPtrTy Pointer size type 1106 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks 1107 // and copy.in.end block 1108 /// 1109 /// \returns The insertion point where copying operation to be emitted. 1110 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, 1111 Value *PrivateAddr, 1112 llvm::IntegerType *IntPtrTy, 1113 bool BranchtoEnd = true); 1114 1115 /// Create a runtime call for kmpc_Alloc 1116 /// 1117 /// \param Loc The insert and source location description. 1118 /// \param Size Size of allocated memory space 1119 /// \param Allocator Allocator information instruction 1120 /// \param Name Name of call Instruction for OMP_alloc 1121 /// 1122 /// \returns CallInst to the OMP_Alloc call 1123 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, 1124 Value *Allocator, std::string Name = ""); 1125 1126 /// Create a runtime call for kmpc_free 1127 /// 1128 /// \param Loc The insert and source location description. 1129 /// \param Addr Address of memory space to be freed 1130 /// \param Allocator Allocator information instruction 1131 /// \param Name Name of call Instruction for OMP_Free 1132 /// 1133 /// \returns CallInst to the OMP_Free call 1134 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, 1135 Value *Allocator, std::string Name = ""); 1136 1137 /// Create a runtime call for kmpc_threadprivate_cached 1138 /// 1139 /// \param Loc The insert and source location description. 1140 /// \param Pointer pointer to data to be cached 1141 /// \param Size size of data to be cached 1142 /// \param Name Name of call Instruction for callinst 1143 /// 1144 /// \returns CallInst to the thread private cache call. 1145 CallInst *createCachedThreadPrivate(const LocationDescription &Loc, 1146 llvm::Value *Pointer, 1147 llvm::ConstantInt *Size, 1148 const llvm::Twine &Name = Twine("")); 1149 1150 /// Create a runtime call for __tgt_interop_init 1151 /// 1152 /// \param Loc The insert and source location description. 1153 /// \param InteropVar variable to be allocated 1154 /// \param InteropType type of interop operation 1155 /// \param Device devide to which offloading will occur 1156 /// \param NumDependences number of dependence variables 1157 /// \param DependenceAddress pointer to dependence variables 1158 /// \param HaveNowaitClause does nowait clause exist 1159 /// 1160 /// \returns CallInst to the __tgt_interop_init call 1161 CallInst *createOMPInteropInit(const LocationDescription &Loc, 1162 Value *InteropVar, 1163 omp::OMPInteropType InteropType, Value *Device, 1164 Value *NumDependences, 1165 Value *DependenceAddress, 1166 bool HaveNowaitClause); 1167 1168 /// Create a runtime call for __tgt_interop_destroy 1169 /// 1170 /// \param Loc The insert and source location description. 1171 /// \param InteropVar variable to be allocated 1172 /// \param Device devide to which offloading will occur 1173 /// \param NumDependences number of dependence variables 1174 /// \param DependenceAddress pointer to dependence variables 1175 /// \param HaveNowaitClause does nowait clause exist 1176 /// 1177 /// \returns CallInst to the __tgt_interop_destroy call 1178 CallInst *createOMPInteropDestroy(const LocationDescription &Loc, 1179 Value *InteropVar, Value *Device, 1180 Value *NumDependences, 1181 Value *DependenceAddress, 1182 bool HaveNowaitClause); 1183 1184 /// Create a runtime call for __tgt_interop_use 1185 /// 1186 /// \param Loc The insert and source location description. 1187 /// \param InteropVar variable to be allocated 1188 /// \param Device devide to which offloading will occur 1189 /// \param NumDependences number of dependence variables 1190 /// \param DependenceAddress pointer to dependence variables 1191 /// \param HaveNowaitClause does nowait clause exist 1192 /// 1193 /// \returns CallInst to the __tgt_interop_use call 1194 CallInst *createOMPInteropUse(const LocationDescription &Loc, 1195 Value *InteropVar, Value *Device, 1196 Value *NumDependences, Value *DependenceAddress, 1197 bool HaveNowaitClause); 1198 1199 /// The `omp target` interface 1200 /// 1201 /// For more information about the usage of this interface, 1202 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h 1203 /// 1204 ///{ 1205 1206 /// Create a runtime call for kmpc_target_init 1207 /// 1208 /// \param Loc The insert and source location description. 1209 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 1210 /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. 1211 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, 1212 bool RequiresFullRuntime); 1213 1214 /// Create a runtime call for kmpc_target_deinit 1215 /// 1216 /// \param Loc The insert and source location description. 1217 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 1218 /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. 1219 void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, 1220 bool RequiresFullRuntime); 1221 1222 ///} 1223 1224 /// Declarations for LLVM-IR types (simple, array, function and structure) are 1225 /// generated below. Their names are defined and used in OpenMPKinds.def. Here 1226 /// we provide the declarations, the initializeTypes function will provide the 1227 /// values. 1228 /// 1229 ///{ 1230 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; 1231 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 1232 ArrayType *VarName##Ty = nullptr; \ 1233 PointerType *VarName##PtrTy = nullptr; 1234 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 1235 FunctionType *VarName = nullptr; \ 1236 PointerType *VarName##Ptr = nullptr; 1237 #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ 1238 StructType *VarName = nullptr; \ 1239 PointerType *VarName##Ptr = nullptr; 1240 #include "llvm/Frontend/OpenMP/OMPKinds.def" 1241 1242 ///} 1243 1244 private: 1245 /// Create all simple and struct types exposed by the runtime and remember 1246 /// the llvm::PointerTypes of them for easy access later. 1247 void initializeTypes(Module &M); 1248 1249 /// Common interface for generating entry calls for OMP Directives. 1250 /// if the directive has a region/body, It will set the insertion 1251 /// point to the body 1252 /// 1253 /// \param OMPD Directive to generate entry blocks for 1254 /// \param EntryCall Call to the entry OMP Runtime Function 1255 /// \param ExitBB block where the region ends. 1256 /// \param Conditional indicate if the entry call result will be used 1257 /// to evaluate a conditional of whether a thread will execute 1258 /// body code or not. 1259 /// 1260 /// \return The insertion position in exit block 1261 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, 1262 BasicBlock *ExitBB, 1263 bool Conditional = false); 1264 1265 /// Common interface to finalize the region 1266 /// 1267 /// \param OMPD Directive to generate exiting code for 1268 /// \param FinIP Insertion point for emitting Finalization code and exit call 1269 /// \param ExitCall Call to the ending OMP Runtime Function 1270 /// \param HasFinalize indicate if the directive will require finalization 1271 /// and has a finalization callback in the stack that 1272 /// should be called. 1273 /// 1274 /// \return The insertion position in exit block 1275 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, 1276 InsertPointTy FinIP, 1277 Instruction *ExitCall, 1278 bool HasFinalize = true); 1279 1280 /// Common Interface to generate OMP inlined regions 1281 /// 1282 /// \param OMPD Directive to generate inlined region for 1283 /// \param EntryCall Call to the entry OMP Runtime Function 1284 /// \param ExitCall Call to the ending OMP Runtime Function 1285 /// \param BodyGenCB Body code generation callback. 1286 /// \param FiniCB Finalization Callback. Will be called when finalizing region 1287 /// \param Conditional indicate if the entry call result will be used 1288 /// to evaluate a conditional of whether a thread will execute 1289 /// body code or not. 1290 /// \param HasFinalize indicate if the directive will require finalization 1291 /// and has a finalization callback in the stack that 1292 /// should be called. 1293 /// \param IsCancellable if HasFinalize is set to true, indicate if the 1294 /// the directive should be cancellable. 1295 /// \return The insertion point after the region 1296 1297 InsertPointTy 1298 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, 1299 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, 1300 FinalizeCallbackTy FiniCB, bool Conditional = false, 1301 bool HasFinalize = true, bool IsCancellable = false); 1302 1303 /// Get the platform-specific name separator. 1304 /// \param Parts different parts of the final name that needs separation 1305 /// \param FirstSeparator First separator used between the initial two 1306 /// parts of the name. 1307 /// \param Separator separator used between all of the rest consecutive 1308 /// parts of the name 1309 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, 1310 StringRef FirstSeparator, 1311 StringRef Separator); 1312 1313 /// Gets (if variable with the given name already exist) or creates 1314 /// internal global variable with the specified Name. The created variable has 1315 /// linkage CommonLinkage by default and is initialized by null value. 1316 /// \param Ty Type of the global variable. If it is exist already the type 1317 /// must be the same. 1318 /// \param Name Name of the variable. 1319 Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name, 1320 unsigned AddressSpace = 0); 1321 1322 /// Returns corresponding lock object for the specified critical region 1323 /// name. If the lock object does not exist it is created, otherwise the 1324 /// reference to the existing copy is returned. 1325 /// \param CriticalName Name of the critical region. 1326 /// 1327 Value *getOMPCriticalRegionLock(StringRef CriticalName); 1328 1329 /// Callback type for Atomic Expression update 1330 /// ex: 1331 /// \code{.cpp} 1332 /// unsigned x = 0; 1333 /// #pragma omp atomic update 1334 /// x = Expr(x_old); //Expr() is any legal operation 1335 /// \endcode 1336 /// 1337 /// \param XOld the value of the atomic memory address to use for update 1338 /// \param IRB reference to the IRBuilder to use 1339 /// 1340 /// \returns Value to update X to. 1341 using AtomicUpdateCallbackTy = 1342 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; 1343 1344 private: 1345 enum AtomicKind { Read, Write, Update, Capture, Compare }; 1346 1347 /// Determine whether to emit flush or not 1348 /// 1349 /// \param Loc The insert and source location description. 1350 /// \param AO The required atomic ordering 1351 /// \param AK The OpenMP atomic operation kind used. 1352 /// 1353 /// \returns wether a flush was emitted or not 1354 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, 1355 AtomicOrdering AO, AtomicKind AK); 1356 1357 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 1358 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 1359 /// Only Scalar data types. 1360 /// 1361 /// \param AllocaIP The insertion point to be used for alloca 1362 /// instructions. 1363 /// \param X The target atomic pointer to be updated 1364 /// \param XElemTy The element type of the atomic pointer. 1365 /// \param Expr The value to update X with. 1366 /// \param AO Atomic ordering of the generated atomic 1367 /// instructions. 1368 /// \param RMWOp The binary operation used for update. If 1369 /// operation is not supported by atomicRMW, 1370 /// or belong to {FADD, FSUB, BAD_BINOP}. 1371 /// Then a `cmpExch` based atomic will be generated. 1372 /// \param UpdateOp Code generator for complex expressions that cannot be 1373 /// expressed through atomicrmw instruction. 1374 /// \param VolatileX true if \a X volatile? 1375 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 1376 /// update expression, false otherwise. 1377 /// (e.g. true for X = X BinOp Expr) 1378 /// 1379 /// \returns A pair of the old value of X before the update, and the value 1380 /// used for the update. 1381 std::pair<Value *, Value *> 1382 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, 1383 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 1384 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, 1385 bool IsXBinopExpr); 1386 1387 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . 1388 /// 1389 /// \Return The instruction 1390 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, 1391 AtomicRMWInst::BinOp RMWOp); 1392 1393 public: 1394 /// a struct to pack relevant information while generating atomic Ops 1395 struct AtomicOpValue { 1396 Value *Var = nullptr; 1397 Type *ElemTy = nullptr; 1398 bool IsSigned = false; 1399 bool IsVolatile = false; 1400 }; 1401 1402 /// Emit atomic Read for : V = X --- Only Scalar data types. 1403 /// 1404 /// \param Loc The insert and source location description. 1405 /// \param X The target pointer to be atomically read 1406 /// \param V Memory address where to store atomically read 1407 /// value 1408 /// \param AO Atomic ordering of the generated atomic 1409 /// instructions. 1410 /// 1411 /// \return Insertion point after generated atomic read IR. 1412 InsertPointTy createAtomicRead(const LocationDescription &Loc, 1413 AtomicOpValue &X, AtomicOpValue &V, 1414 AtomicOrdering AO); 1415 1416 /// Emit atomic write for : X = Expr --- Only Scalar data types. 1417 /// 1418 /// \param Loc The insert and source location description. 1419 /// \param X The target pointer to be atomically written to 1420 /// \param Expr The value to store. 1421 /// \param AO Atomic ordering of the generated atomic 1422 /// instructions. 1423 /// 1424 /// \return Insertion point after generated atomic Write IR. 1425 InsertPointTy createAtomicWrite(const LocationDescription &Loc, 1426 AtomicOpValue &X, Value *Expr, 1427 AtomicOrdering AO); 1428 1429 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 1430 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 1431 /// Only Scalar data types. 1432 /// 1433 /// \param Loc The insert and source location description. 1434 /// \param AllocaIP The insertion point to be used for alloca instructions. 1435 /// \param X The target atomic pointer to be updated 1436 /// \param Expr The value to update X with. 1437 /// \param AO Atomic ordering of the generated atomic instructions. 1438 /// \param RMWOp The binary operation used for update. If operation 1439 /// is not supported by atomicRMW, or belong to 1440 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based 1441 /// atomic will be generated. 1442 /// \param UpdateOp Code generator for complex expressions that cannot be 1443 /// expressed through atomicrmw instruction. 1444 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 1445 /// update expression, false otherwise. 1446 /// (e.g. true for X = X BinOp Expr) 1447 /// 1448 /// \return Insertion point after generated atomic update IR. 1449 InsertPointTy createAtomicUpdate(const LocationDescription &Loc, 1450 InsertPointTy AllocaIP, AtomicOpValue &X, 1451 Value *Expr, AtomicOrdering AO, 1452 AtomicRMWInst::BinOp RMWOp, 1453 AtomicUpdateCallbackTy &UpdateOp, 1454 bool IsXBinopExpr); 1455 1456 /// Emit atomic update for constructs: --- Only Scalar data types 1457 /// V = X; X = X BinOp Expr , 1458 /// X = X BinOp Expr; V = X, 1459 /// V = X; X = Expr BinOp X, 1460 /// X = Expr BinOp X; V = X, 1461 /// V = X; X = UpdateOp(X), 1462 /// X = UpdateOp(X); V = X, 1463 /// 1464 /// \param Loc The insert and source location description. 1465 /// \param AllocaIP The insertion point to be used for alloca instructions. 1466 /// \param X The target atomic pointer to be updated 1467 /// \param V Memory address where to store captured value 1468 /// \param Expr The value to update X with. 1469 /// \param AO Atomic ordering of the generated atomic instructions 1470 /// \param RMWOp The binary operation used for update. If 1471 /// operation is not supported by atomicRMW, or belong to 1472 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based 1473 /// atomic will be generated. 1474 /// \param UpdateOp Code generator for complex expressions that cannot be 1475 /// expressed through atomicrmw instruction. 1476 /// \param UpdateExpr true if X is an in place update of the form 1477 /// X = X BinOp Expr or X = Expr BinOp X 1478 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the 1479 /// update expression, false otherwise. 1480 /// (e.g. true for X = X BinOp Expr) 1481 /// \param IsPostfixUpdate true if original value of 'x' must be stored in 1482 /// 'v', not an updated one. 1483 /// 1484 /// \return Insertion point after generated atomic capture IR. 1485 InsertPointTy 1486 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, 1487 AtomicOpValue &X, AtomicOpValue &V, Value *Expr, 1488 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 1489 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, 1490 bool IsPostfixUpdate, bool IsXBinopExpr); 1491 1492 /// Emit atomic compare for constructs: --- Only scalar data types 1493 /// cond-expr-stmt: 1494 /// x = x ordop expr ? expr : x; 1495 /// x = expr ordop x ? expr : x; 1496 /// x = x == e ? d : x; 1497 /// x = e == x ? d : x; (this one is not in the spec) 1498 /// cond-update-stmt: 1499 /// if (x ordop expr) { x = expr; } 1500 /// if (expr ordop x) { x = expr; } 1501 /// if (x == e) { x = d; } 1502 /// if (e == x) { x = d; } (this one is not in the spec) 1503 /// conditional-update-capture-atomic: 1504 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) 1505 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) 1506 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, 1507 /// IsFailOnly=true) 1508 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) 1509 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, 1510 /// IsFailOnly=true) 1511 /// 1512 /// \param Loc The insert and source location description. 1513 /// \param X The target atomic pointer to be updated. 1514 /// \param V Memory address where to store captured value (for 1515 /// compare capture only). 1516 /// \param R Memory address where to store comparison result 1517 /// (for compare capture with '==' only). 1518 /// \param E The expected value ('e') for forms that use an 1519 /// equality comparison or an expression ('expr') for 1520 /// forms that use 'ordop' (logically an atomic maximum or 1521 /// minimum). 1522 /// \param D The desired value for forms that use an equality 1523 /// comparison. If forms that use 'ordop', it should be 1524 /// \p nullptr. 1525 /// \param AO Atomic ordering of the generated atomic instructions. 1526 /// \param Op Atomic compare operation. It can only be ==, <, or >. 1527 /// \param IsXBinopExpr True if the conditional statement is in the form where 1528 /// x is on LHS. It only matters for < or >. 1529 /// \param IsPostfixUpdate True if original value of 'x' must be stored in 1530 /// 'v', not an updated one (for compare capture 1531 /// only). 1532 /// \param IsFailOnly True if the original value of 'x' is stored to 'v' 1533 /// only when the comparison fails. This is only valid for 1534 /// the case the comparison is '=='. 1535 /// 1536 /// \return Insertion point after generated atomic capture IR. 1537 InsertPointTy 1538 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, 1539 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, 1540 AtomicOrdering AO, omp::OMPAtomicCompareOp Op, 1541 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); 1542 1543 /// Create the control flow structure of a canonical OpenMP loop. 1544 /// 1545 /// The emitted loop will be disconnected, i.e. no edge to the loop's 1546 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's 1547 /// IRBuilder location is not preserved. 1548 /// 1549 /// \param DL DebugLoc used for the instructions in the skeleton. 1550 /// \param TripCount Value to be used for the trip count. 1551 /// \param F Function in which to insert the BasicBlocks. 1552 /// \param PreInsertBefore Where to insert BBs that execute before the body, 1553 /// typically the body itself. 1554 /// \param PostInsertBefore Where to insert BBs that execute after the body. 1555 /// \param Name Base name used to derive BB 1556 /// and instruction names. 1557 /// 1558 /// \returns The CanonicalLoopInfo that represents the emitted loop. 1559 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, 1560 Function *F, 1561 BasicBlock *PreInsertBefore, 1562 BasicBlock *PostInsertBefore, 1563 const Twine &Name = {}); 1564 }; 1565 1566 /// Class to represented the control flow structure of an OpenMP canonical loop. 1567 /// 1568 /// The control-flow structure is standardized for easy consumption by 1569 /// directives associated with loops. For instance, the worksharing-loop 1570 /// construct may change this control flow such that each loop iteration is 1571 /// executed on only one thread. The constraints of a canonical loop in brief 1572 /// are: 1573 /// 1574 /// * The number of loop iterations must have been computed before entering the 1575 /// loop. 1576 /// 1577 /// * Has an (unsigned) logical induction variable that starts at zero and 1578 /// increments by one. 1579 /// 1580 /// * The loop's CFG itself has no side-effects. The OpenMP specification 1581 /// itself allows side-effects, but the order in which they happen, including 1582 /// how often or whether at all, is unspecified. We expect that the frontend 1583 /// will emit those side-effect instructions somewhere (e.g. before the loop) 1584 /// such that the CanonicalLoopInfo itself can be side-effect free. 1585 /// 1586 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated 1587 /// execution of a loop body that satifies these constraints. It does NOT 1588 /// represent arbitrary SESE regions that happen to contain a loop. Do not use 1589 /// CanonicalLoopInfo for such purposes. 1590 /// 1591 /// The control flow can be described as follows: 1592 /// 1593 /// Preheader 1594 /// | 1595 /// /-> Header 1596 /// | | 1597 /// | Cond---\ 1598 /// | | | 1599 /// | Body | 1600 /// | | | | 1601 /// | <...> | 1602 /// | | | | 1603 /// \--Latch | 1604 /// | 1605 /// Exit 1606 /// | 1607 /// After 1608 /// 1609 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, 1610 /// including) and end at AfterIP (at the After's first instruction, excluding). 1611 /// That is, instructions in the Preheader and After blocks (except the 1612 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have 1613 /// side-effects. Typically, the Preheader is used to compute the loop's trip 1614 /// count. The instructions from BodyIP (at the Body block's first instruction, 1615 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's 1616 /// control and thus can have side-effects. The body block is the single entry 1617 /// point into the loop body, which may contain arbitrary control flow as long 1618 /// as all control paths eventually branch to the Latch block. 1619 /// 1620 /// TODO: Consider adding another standardized BasicBlock between Body CFG and 1621 /// Latch to guarantee that there is only a single edge to the latch. It would 1622 /// make loop transformations easier to not needing to consider multiple 1623 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us 1624 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that 1625 /// executes after each body iteration. 1626 /// 1627 /// There must be no loop-carried dependencies through llvm::Values. This is 1628 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is 1629 /// for the induction variable. 1630 /// 1631 /// All code in Header, Cond, Latch and Exit (plus the terminator of the 1632 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked 1633 /// by assertOK(). They are expected to not be modified unless explicitly 1634 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP 1635 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, 1636 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its 1637 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used 1638 /// anymore as its underlying control flow may not exist anymore. 1639 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop 1640 /// may also return a new CanonicalLoopInfo that can be passed to other 1641 /// loop-associated construct implementing methods. These loop-transforming 1642 /// methods may either create a new CanonicalLoopInfo usually using 1643 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and 1644 /// modify one of the input CanonicalLoopInfo and return it as representing the 1645 /// modified loop. What is done is an implementation detail of 1646 /// transformation-implementing method and callers should always assume that the 1647 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. 1648 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one 1649 /// created by createCanonicalLoop, such that transforming methods do not have 1650 /// to special case where the CanonicalLoopInfo originated from. 1651 /// 1652 /// Generally, methods consuming CanonicalLoopInfo do not need an 1653 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the 1654 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless 1655 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate 1656 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, 1657 /// any InsertPoint in the Preheader, After or Block can still be used after 1658 /// calling such a method. 1659 /// 1660 /// TODO: Provide mechanisms for exception handling and cancellation points. 1661 /// 1662 /// Defined outside OpenMPIRBuilder because nested classes cannot be 1663 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. 1664 class CanonicalLoopInfo { 1665 friend class OpenMPIRBuilder; 1666 1667 private: 1668 BasicBlock *Header = nullptr; 1669 BasicBlock *Cond = nullptr; 1670 BasicBlock *Latch = nullptr; 1671 BasicBlock *Exit = nullptr; 1672 1673 /// Add the control blocks of this loop to \p BBs. 1674 /// 1675 /// This does not include any block from the body, including the one returned 1676 /// by getBody(). 1677 /// 1678 /// FIXME: This currently includes the Preheader and After blocks even though 1679 /// their content is (mostly) not under CanonicalLoopInfo's control. 1680 /// Re-evaluated whether this makes sense. 1681 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); 1682 1683 /// Sets the number of loop iterations to the given value. This value must be 1684 /// valid in the condition block (i.e., defined in the preheader) and is 1685 /// interpreted as an unsigned integer. 1686 void setTripCount(Value *TripCount); 1687 1688 /// Replace all uses of the canonical induction variable in the loop body with 1689 /// a new one. 1690 /// 1691 /// The intended use case is to update the induction variable for an updated 1692 /// iteration space such that it can stay normalized in the 0...tripcount-1 1693 /// range. 1694 /// 1695 /// The \p Updater is called with the (presumable updated) current normalized 1696 /// induction variable and is expected to return the value that uses of the 1697 /// pre-updated induction values should use instead, typically dependent on 1698 /// the new induction variable. This is a lambda (instead of e.g. just passing 1699 /// the new value) to be able to distinguish the uses of the pre-updated 1700 /// induction variable and uses of the induction varible to compute the 1701 /// updated induction variable value. 1702 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); 1703 1704 public: 1705 /// Returns whether this object currently represents the IR of a loop. If 1706 /// returning false, it may have been consumed by a loop transformation or not 1707 /// been intialized. Do not use in this case; 1708 bool isValid() const { return Header; } 1709 1710 /// The preheader ensures that there is only a single edge entering the loop. 1711 /// Code that must be execute before any loop iteration can be emitted here, 1712 /// such as computing the loop trip count and begin lifetime markers. Code in 1713 /// the preheader is not considered part of the canonical loop. 1714 BasicBlock *getPreheader() const; 1715 1716 /// The header is the entry for each iteration. In the canonical control flow, 1717 /// it only contains the PHINode for the induction variable. 1718 BasicBlock *getHeader() const { 1719 assert(isValid() && "Requires a valid canonical loop"); 1720 return Header; 1721 } 1722 1723 /// The condition block computes whether there is another loop iteration. If 1724 /// yes, branches to the body; otherwise to the exit block. 1725 BasicBlock *getCond() const { 1726 assert(isValid() && "Requires a valid canonical loop"); 1727 return Cond; 1728 } 1729 1730 /// The body block is the single entry for a loop iteration and not controlled 1731 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must 1732 /// eventually branch to the \p Latch block. 1733 BasicBlock *getBody() const { 1734 assert(isValid() && "Requires a valid canonical loop"); 1735 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); 1736 } 1737 1738 /// Reaching the latch indicates the end of the loop body code. In the 1739 /// canonical control flow, it only contains the increment of the induction 1740 /// variable. 1741 BasicBlock *getLatch() const { 1742 assert(isValid() && "Requires a valid canonical loop"); 1743 return Latch; 1744 } 1745 1746 /// Reaching the exit indicates no more iterations are being executed. 1747 BasicBlock *getExit() const { 1748 assert(isValid() && "Requires a valid canonical loop"); 1749 return Exit; 1750 } 1751 1752 /// The after block is intended for clean-up code such as lifetime end 1753 /// markers. It is separate from the exit block to ensure, analogous to the 1754 /// preheader, it having just a single entry edge and being free from PHI 1755 /// nodes should there be multiple loop exits (such as from break 1756 /// statements/cancellations). 1757 BasicBlock *getAfter() const { 1758 assert(isValid() && "Requires a valid canonical loop"); 1759 return Exit->getSingleSuccessor(); 1760 } 1761 1762 /// Returns the llvm::Value containing the number of loop iterations. It must 1763 /// be valid in the preheader and always interpreted as an unsigned integer of 1764 /// any bit-width. 1765 Value *getTripCount() const { 1766 assert(isValid() && "Requires a valid canonical loop"); 1767 Instruction *CmpI = &Cond->front(); 1768 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1769 return CmpI->getOperand(1); 1770 } 1771 1772 /// Returns the instruction representing the current logical induction 1773 /// variable. Always unsigned, always starting at 0 with an increment of one. 1774 Instruction *getIndVar() const { 1775 assert(isValid() && "Requires a valid canonical loop"); 1776 Instruction *IndVarPHI = &Header->front(); 1777 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); 1778 return IndVarPHI; 1779 } 1780 1781 /// Return the type of the induction variable (and the trip count). 1782 Type *getIndVarType() const { 1783 assert(isValid() && "Requires a valid canonical loop"); 1784 return getIndVar()->getType(); 1785 } 1786 1787 /// Return the insertion point for user code before the loop. 1788 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { 1789 assert(isValid() && "Requires a valid canonical loop"); 1790 BasicBlock *Preheader = getPreheader(); 1791 return {Preheader, std::prev(Preheader->end())}; 1792 }; 1793 1794 /// Return the insertion point for user code in the body. 1795 OpenMPIRBuilder::InsertPointTy getBodyIP() const { 1796 assert(isValid() && "Requires a valid canonical loop"); 1797 BasicBlock *Body = getBody(); 1798 return {Body, Body->begin()}; 1799 }; 1800 1801 /// Return the insertion point for user code after the loop. 1802 OpenMPIRBuilder::InsertPointTy getAfterIP() const { 1803 assert(isValid() && "Requires a valid canonical loop"); 1804 BasicBlock *After = getAfter(); 1805 return {After, After->begin()}; 1806 }; 1807 1808 Function *getFunction() const { 1809 assert(isValid() && "Requires a valid canonical loop"); 1810 return Header->getParent(); 1811 } 1812 1813 /// Consistency self-check. 1814 void assertOK() const; 1815 1816 /// Invalidate this loop. That is, the underlying IR does not fulfill the 1817 /// requirements of an OpenMP canonical loop anymore. 1818 void invalidate(); 1819 }; 1820 1821 } // end namespace llvm 1822 1823 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 1824