1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient 10 // way to create LLVM instructions for OpenMP directives. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 16 17 #include "llvm/Analysis/MemorySSAUpdater.h" 18 #include "llvm/Frontend/OpenMP/OMPConstants.h" 19 #include "llvm/IR/DebugLoc.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/Support/Allocator.h" 22 #include <forward_list> 23 #include <map> 24 #include <optional> 25 26 namespace llvm { 27 class CanonicalLoopInfo; 28 struct TargetRegionEntryInfo; 29 class OffloadEntriesInfoManager; 30 31 /// Move the instruction after an InsertPoint to the beginning of another 32 /// BasicBlock. 33 /// 34 /// The instructions after \p IP are moved to the beginning of \p New which must 35 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to 36 /// \p New will be added such that there is no semantic change. Otherwise, the 37 /// \p IP insert block remains degenerate and it is up to the caller to insert a 38 /// terminator. 39 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, 40 bool CreateBranch); 41 42 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new 43 /// insert location will stick to after the instruction before the insertion 44 /// point (instead of moving with the instruction the InsertPoint stores 45 /// internally). 46 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); 47 48 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate 49 /// (missing the terminator). 50 /// 51 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed 52 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch 53 /// is true, a branch to the new successor will new created such that 54 /// semantically there is no change; otherwise the block of the insertion point 55 /// remains degenerate and it is the caller's responsibility to insert a 56 /// terminator. Returns the new successor block. 57 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, 58 llvm::Twine Name = {}); 59 60 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 61 /// degenerate (missing the terminator). Its new insert location will stick to 62 /// after the instruction before the insertion point (instead of moving with the 63 /// instruction the InsertPoint stores internally). 64 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, 65 llvm::Twine Name = {}); 66 67 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 68 /// degenerate (missing the terminator). Its new insert location will stick to 69 /// after the instruction before the insertion point (instead of moving with the 70 /// instruction the InsertPoint stores internally). 71 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); 72 73 /// Like splitBB, but reuses the current block's name for the new name. 74 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, 75 llvm::Twine Suffix = ".split"); 76 77 /// Captures attributes that affect generating LLVM-IR using the 78 /// OpenMPIRBuilder and related classes. Note that not all attributes are 79 /// required for all classes or functions. In some use cases the configuration 80 /// is not necessary at all, because because the only functions that are called 81 /// are ones that are not dependent on the configuration. 82 class OpenMPIRBuilderConfig { 83 public: 84 /// Flag for specifying if the compilation is done for embedded device code 85 /// or host code. 86 std::optional<bool> IsEmbedded; 87 88 /// Flag for specifying if the compilation is done for an offloading target, 89 /// like GPU. 90 std::optional<bool> IsTargetCodegen; 91 92 /// Flag for specifying weather a requires unified_shared_memory 93 /// directive is present or not. 94 std::optional<bool> HasRequiresUnifiedSharedMemory; 95 96 // Flag for specifying if offloading is mandatory. 97 std::optional<bool> OpenMPOffloadMandatory; 98 99 /// First separator used between the initial two parts of a name. 100 std::optional<StringRef> FirstSeparator; 101 /// Separator used between all of the rest consecutive parts of s name 102 std::optional<StringRef> Separator; 103 OpenMPIRBuilderConfig()104 OpenMPIRBuilderConfig() {} OpenMPIRBuilderConfig(bool IsEmbedded,bool IsTargetCodegen,bool HasRequiresUnifiedSharedMemory,bool OpenMPOffloadMandatory)105 OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen, 106 bool HasRequiresUnifiedSharedMemory, 107 bool OpenMPOffloadMandatory) 108 : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen), 109 HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory), 110 OpenMPOffloadMandatory(OpenMPOffloadMandatory) {} 111 112 // Getters functions that assert if the required values are not present. isEmbedded()113 bool isEmbedded() const { 114 assert(IsEmbedded.has_value() && "IsEmbedded is not set"); 115 return *IsEmbedded; 116 } 117 isTargetCodegen()118 bool isTargetCodegen() const { 119 assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set"); 120 return *IsTargetCodegen; 121 } 122 hasRequiresUnifiedSharedMemory()123 bool hasRequiresUnifiedSharedMemory() const { 124 assert(HasRequiresUnifiedSharedMemory.has_value() && 125 "HasUnifiedSharedMemory is not set"); 126 return *HasRequiresUnifiedSharedMemory; 127 } 128 openMPOffloadMandatory()129 bool openMPOffloadMandatory() const { 130 assert(OpenMPOffloadMandatory.has_value() && 131 "OpenMPOffloadMandatory is not set"); 132 return *OpenMPOffloadMandatory; 133 } 134 // Returns the FirstSeparator if set, otherwise use the default 135 // separator depending on isTargetCodegen firstSeparator()136 StringRef firstSeparator() const { 137 if (FirstSeparator.has_value()) 138 return *FirstSeparator; 139 if (isTargetCodegen()) 140 return "_"; 141 return "."; 142 } 143 144 // Returns the Separator if set, otherwise use the default 145 // separator depending on isTargetCodegen separator()146 StringRef separator() const { 147 if (Separator.has_value()) 148 return *Separator; 149 if (isTargetCodegen()) 150 return "$"; 151 return "."; 152 } 153 setIsEmbedded(bool Value)154 void setIsEmbedded(bool Value) { IsEmbedded = Value; } setIsTargetCodegen(bool Value)155 void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; } setHasRequiresUnifiedSharedMemory(bool Value)156 void setHasRequiresUnifiedSharedMemory(bool Value) { 157 HasRequiresUnifiedSharedMemory = Value; 158 } setFirstSeparator(StringRef FS)159 void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } setSeparator(StringRef S)160 void setSeparator(StringRef S) { Separator = S; } 161 }; 162 163 /// An interface to create LLVM-IR for OpenMP directives. 164 /// 165 /// Each OpenMP directive has a corresponding public generator method. 166 class OpenMPIRBuilder { 167 public: 168 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will 169 /// not have an effect on \p M (see initialize) OpenMPIRBuilder(Module & M)170 OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} 171 ~OpenMPIRBuilder(); 172 173 /// Initialize the internal state, this will put structures types and 174 /// potentially other helpers into the underlying module. Must be called 175 /// before any other method and only once! 176 void initialize(); 177 setConfig(OpenMPIRBuilderConfig C)178 void setConfig(OpenMPIRBuilderConfig C) { Config = C; } 179 180 /// Finalize the underlying module, e.g., by outlining regions. 181 /// \param Fn The function to be finalized. If not used, 182 /// all functions are finalized. 183 void finalize(Function *Fn = nullptr); 184 185 /// Add attributes known for \p FnID to \p Fn. 186 void addAttributes(omp::RuntimeFunction FnID, Function &Fn); 187 188 /// Type used throughout for insertion points. 189 using InsertPointTy = IRBuilder<>::InsertPoint; 190 191 /// Get the create a name using the platform specific separators. 192 /// \param Parts parts of the final name that needs separation 193 /// The created name has a first separator between the first and second part 194 /// and a second separator between all other parts. 195 /// E.g. with FirstSeparator "$" and Separator "." and 196 /// parts: "p1", "p2", "p3", "p4" 197 /// The resulting name is "p1$p2.p3.p4" 198 /// The separators are retrieved from the OpenMPIRBuilderConfig. 199 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const; 200 201 /// Callback type for variable finalization (think destructors). 202 /// 203 /// \param CodeGenIP is the insertion point at which the finalization code 204 /// should be placed. 205 /// 206 /// A finalize callback knows about all objects that need finalization, e.g. 207 /// destruction, when the scope of the currently generated construct is left 208 /// at the time, and location, the callback is invoked. 209 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; 210 211 struct FinalizationInfo { 212 /// The finalization callback provided by the last in-flight invocation of 213 /// createXXXX for the directive of kind DK. 214 FinalizeCallbackTy FiniCB; 215 216 /// The directive kind of the innermost directive that has an associated 217 /// region which might require finalization when it is left. 218 omp::Directive DK; 219 220 /// Flag to indicate if the directive is cancellable. 221 bool IsCancellable; 222 }; 223 224 /// Push a finalization callback on the finalization stack. 225 /// 226 /// NOTE: Temporary solution until Clang CG is gone. pushFinalizationCB(const FinalizationInfo & FI)227 void pushFinalizationCB(const FinalizationInfo &FI) { 228 FinalizationStack.push_back(FI); 229 } 230 231 /// Pop the last finalization callback from the finalization stack. 232 /// 233 /// NOTE: Temporary solution until Clang CG is gone. popFinalizationCB()234 void popFinalizationCB() { FinalizationStack.pop_back(); } 235 236 /// Callback type for body (=inner region) code generation 237 /// 238 /// The callback takes code locations as arguments, each describing a 239 /// location where additional instructions can be inserted. 240 /// 241 /// The CodeGenIP may be in the middle of a basic block or point to the end of 242 /// it. The basic block may have a terminator or be degenerate. The callback 243 /// function may just insert instructions at that position, but also split the 244 /// block (without the Before argument of BasicBlock::splitBasicBlock such 245 /// that the identify of the split predecessor block is preserved) and insert 246 /// additional control flow, including branches that do not lead back to what 247 /// follows the CodeGenIP. Note that since the callback is allowed to split 248 /// the block, callers must assume that InsertPoints to positions in the 249 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If 250 /// such InsertPoints need to be preserved, it can split the block itself 251 /// before calling the callback. 252 /// 253 /// AllocaIP and CodeGenIP must not point to the same position. 254 /// 255 /// \param AllocaIP is the insertion point at which new alloca instructions 256 /// should be placed. The BasicBlock it is pointing to must 257 /// not be split. 258 /// \param CodeGenIP is the insertion point at which the body code should be 259 /// placed. 260 using BodyGenCallbackTy = 261 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 262 263 // This is created primarily for sections construct as llvm::function_ref 264 // (BodyGenCallbackTy) is not storable (as described in the comments of 265 // function_ref class - function_ref contains non-ownable reference 266 // to the callable. 267 using StorableBodyGenCallbackTy = 268 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 269 270 /// Callback type for loop body code generation. 271 /// 272 /// \param CodeGenIP is the insertion point where the loop's body code must be 273 /// placed. This will be a dedicated BasicBlock with a 274 /// conditional branch from the loop condition check and 275 /// terminated with an unconditional branch to the loop 276 /// latch. 277 /// \param IndVar is the induction variable usable at the insertion point. 278 using LoopBodyGenCallbackTy = 279 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; 280 281 /// Callback type for variable privatization (think copy & default 282 /// constructor). 283 /// 284 /// \param AllocaIP is the insertion point at which new alloca instructions 285 /// should be placed. 286 /// \param CodeGenIP is the insertion point at which the privatization code 287 /// should be placed. 288 /// \param Original The value being copied/created, should not be used in the 289 /// generated IR. 290 /// \param Inner The equivalent of \p Original that should be used in the 291 /// generated IR; this is equal to \p Original if the value is 292 /// a pointer and can thus be passed directly, otherwise it is 293 /// an equivalent but different value. 294 /// \param ReplVal The replacement value, thus a copy or new created version 295 /// of \p Inner. 296 /// 297 /// \returns The new insertion point where code generation continues and 298 /// \p ReplVal the replacement value. 299 using PrivatizeCallbackTy = function_ref<InsertPointTy( 300 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, 301 Value &Inner, Value *&ReplVal)>; 302 303 /// Description of a LLVM-IR insertion point (IP) and a debug/source location 304 /// (filename, line, column, ...). 305 struct LocationDescription { LocationDescriptionLocationDescription306 LocationDescription(const IRBuilderBase &IRB) 307 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} LocationDescriptionLocationDescription308 LocationDescription(const InsertPointTy &IP) : IP(IP) {} LocationDescriptionLocationDescription309 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) 310 : IP(IP), DL(DL) {} 311 InsertPointTy IP; 312 DebugLoc DL; 313 }; 314 315 /// Emitter methods for OpenMP directives. 316 /// 317 ///{ 318 319 /// Generator for '#omp barrier' 320 /// 321 /// \param Loc The location where the barrier directive was encountered. 322 /// \param DK The kind of directive that caused the barrier. 323 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 324 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 325 /// should be checked and acted upon. 326 /// 327 /// \returns The insertion point after the barrier. 328 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, 329 bool ForceSimpleCall = false, 330 bool CheckCancelFlag = true); 331 332 /// Generator for '#omp cancel' 333 /// 334 /// \param Loc The location where the directive was encountered. 335 /// \param IfCondition The evaluated 'if' clause expression, if any. 336 /// \param CanceledDirective The kind of directive that is cancled. 337 /// 338 /// \returns The insertion point after the barrier. 339 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, 340 omp::Directive CanceledDirective); 341 342 /// Generator for '#omp parallel' 343 /// 344 /// \param Loc The insert and source location description. 345 /// \param AllocaIP The insertion points to be used for alloca instructions. 346 /// \param BodyGenCB Callback that will generate the region code. 347 /// \param PrivCB Callback to copy a given variable (think copy constructor). 348 /// \param FiniCB Callback to finalize variable copies. 349 /// \param IfCondition The evaluated 'if' clause expression, if any. 350 /// \param NumThreads The evaluated 'num_threads' clause expression, if any. 351 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). 352 /// \param IsCancellable Flag to indicate a cancellable parallel region. 353 /// 354 /// \returns The insertion position *after* the parallel. 355 IRBuilder<>::InsertPoint 356 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, 357 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 358 FinalizeCallbackTy FiniCB, Value *IfCondition, 359 Value *NumThreads, omp::ProcBindKind ProcBind, 360 bool IsCancellable); 361 362 /// Generator for the control flow structure of an OpenMP canonical loop. 363 /// 364 /// This generator operates on the logical iteration space of the loop, i.e. 365 /// the caller only has to provide a loop trip count of the loop as defined by 366 /// base language semantics. The trip count is interpreted as an unsigned 367 /// integer. The induction variable passed to \p BodyGenCB will be of the same 368 /// type and run from 0 to \p TripCount - 1. It is up to the callback to 369 /// convert the logical iteration variable to the loop counter variable in the 370 /// loop body. 371 /// 372 /// \param Loc The insert and source location description. The insert 373 /// location can be between two instructions or the end of a 374 /// degenerate block (e.g. a BB under construction). 375 /// \param BodyGenCB Callback that will generate the loop body code. 376 /// \param TripCount Number of iterations the loop body is executed. 377 /// \param Name Base name used to derive BB and instruction names. 378 /// 379 /// \returns An object representing the created control flow structure which 380 /// can be used for loop-associated directives. 381 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 382 LoopBodyGenCallbackTy BodyGenCB, 383 Value *TripCount, 384 const Twine &Name = "loop"); 385 386 /// Generator for the control flow structure of an OpenMP canonical loop. 387 /// 388 /// Instead of a logical iteration space, this allows specifying user-defined 389 /// loop counter values using increment, upper- and lower bounds. To 390 /// disambiguate the terminology when counting downwards, instead of lower 391 /// bounds we use \p Start for the loop counter value in the first body 392 /// iteration. 393 /// 394 /// Consider the following limitations: 395 /// 396 /// * A loop counter space over all integer values of its bit-width cannot be 397 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be 398 /// stored into an 8 bit integer): 399 /// 400 /// DO I = 0, 255, 1 401 /// 402 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. 403 /// effectively counting downwards: 404 /// 405 /// for (uint8_t i = 100u; i > 0; i += 127u) 406 /// 407 /// 408 /// TODO: May need to add additional parameters to represent: 409 /// 410 /// * Allow representing downcounting with unsigned integers. 411 /// 412 /// * Sign of the step and the comparison operator might disagree: 413 /// 414 /// for (int i = 0; i < 42; i -= 1u) 415 /// 416 // 417 /// \param Loc The insert and source location description. 418 /// \param BodyGenCB Callback that will generate the loop body code. 419 /// \param Start Value of the loop counter for the first iterations. 420 /// \param Stop Loop counter values past this will stop the loop. 421 /// \param Step Loop counter increment after each iteration; negative 422 /// means counting down. 423 /// \param IsSigned Whether Start, Stop and Step are signed integers. 424 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop 425 /// counter. 426 /// \param ComputeIP Insertion point for instructions computing the trip 427 /// count. Can be used to ensure the trip count is available 428 /// at the outermost loop of a loop nest. If not set, 429 /// defaults to the preheader of the generated loop. 430 /// \param Name Base name used to derive BB and instruction names. 431 /// 432 /// \returns An object representing the created control flow structure which 433 /// can be used for loop-associated directives. 434 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 435 LoopBodyGenCallbackTy BodyGenCB, 436 Value *Start, Value *Stop, Value *Step, 437 bool IsSigned, bool InclusiveStop, 438 InsertPointTy ComputeIP = {}, 439 const Twine &Name = "loop"); 440 441 /// Collapse a loop nest into a single loop. 442 /// 443 /// Merges loops of a loop nest into a single CanonicalLoopNest representation 444 /// that has the same number of innermost loop iterations as the origin loop 445 /// nest. The induction variables of the input loops are derived from the 446 /// collapsed loop's induction variable. This is intended to be used to 447 /// implement OpenMP's collapse clause. Before applying a directive, 448 /// collapseLoops normalizes a loop nest to contain only a single loop and the 449 /// directive's implementation does not need to handle multiple loops itself. 450 /// This does not remove the need to handle all loop nest handling by 451 /// directives, such as the ordered(<n>) clause or the simd schedule-clause 452 /// modifier of the worksharing-loop directive. 453 /// 454 /// Example: 455 /// \code 456 /// for (int i = 0; i < 7; ++i) // Canonical loop "i" 457 /// for (int j = 0; j < 9; ++j) // Canonical loop "j" 458 /// body(i, j); 459 /// \endcode 460 /// 461 /// After collapsing with Loops={i,j}, the loop is changed to 462 /// \code 463 /// for (int ij = 0; ij < 63; ++ij) { 464 /// int i = ij / 9; 465 /// int j = ij % 9; 466 /// body(i, j); 467 /// } 468 /// \endcode 469 /// 470 /// In the current implementation, the following limitations apply: 471 /// 472 /// * All input loops have an induction variable of the same type. 473 /// 474 /// * The collapsed loop will have the same trip count integer type as the 475 /// input loops. Therefore it is possible that the collapsed loop cannot 476 /// represent all iterations of the input loops. For instance, assuming a 477 /// 32 bit integer type, and two input loops both iterating 2^16 times, the 478 /// theoretical trip count of the collapsed loop would be 2^32 iteration, 479 /// which cannot be represented in an 32-bit integer. Behavior is undefined 480 /// in this case. 481 /// 482 /// * The trip counts of every input loop must be available at \p ComputeIP. 483 /// Non-rectangular loops are not yet supported. 484 /// 485 /// * At each nest level, code between a surrounding loop and its nested loop 486 /// is hoisted into the loop body, and such code will be executed more 487 /// often than before collapsing (or not at all if any inner loop iteration 488 /// has a trip count of 0). This is permitted by the OpenMP specification. 489 /// 490 /// \param DL Debug location for instructions added for collapsing, 491 /// such as instructions to compute/derive the input loop's 492 /// induction variables. 493 /// \param Loops Loops in the loop nest to collapse. Loops are specified 494 /// from outermost-to-innermost and every control flow of a 495 /// loop's body must pass through its directly nested loop. 496 /// \param ComputeIP Where additional instruction that compute the collapsed 497 /// trip count. If not set, defaults to before the generated 498 /// loop. 499 /// 500 /// \returns The CanonicalLoopInfo object representing the collapsed loop. 501 CanonicalLoopInfo *collapseLoops(DebugLoc DL, 502 ArrayRef<CanonicalLoopInfo *> Loops, 503 InsertPointTy ComputeIP); 504 505 private: 506 /// Modifies the canonical loop to be a statically-scheduled workshare loop. 507 /// 508 /// This takes a \p LoopInfo representing a canonical loop, such as the one 509 /// created by \p createCanonicalLoop and emits additional instructions to 510 /// turn it into a workshare loop. In particular, it calls to an OpenMP 511 /// runtime function in the preheader to obtain the loop bounds to be used in 512 /// the current thread, updates the relevant instructions in the canonical 513 /// loop and calls to an OpenMP runtime finalization function after the loop. 514 /// 515 /// \param DL Debug location for instructions added for the 516 /// workshare-loop construct itself. 517 /// \param CLI A descriptor of the canonical loop to workshare. 518 /// \param AllocaIP An insertion point for Alloca instructions usable in the 519 /// preheader of the loop. 520 /// \param NeedsBarrier Indicates whether a barrier must be inserted after 521 /// the loop. 522 /// 523 /// \returns Point where to insert code after the workshare construct. 524 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 525 InsertPointTy AllocaIP, 526 bool NeedsBarrier); 527 528 /// Modifies the canonical loop a statically-scheduled workshare loop with a 529 /// user-specified chunk size. 530 /// 531 /// \param DL Debug location for instructions added for the 532 /// workshare-loop construct itself. 533 /// \param CLI A descriptor of the canonical loop to workshare. 534 /// \param AllocaIP An insertion point for Alloca instructions usable in 535 /// the preheader of the loop. 536 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the 537 /// loop. 538 /// \param ChunkSize The user-specified chunk size. 539 /// 540 /// \returns Point where to insert code after the workshare construct. 541 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, 542 CanonicalLoopInfo *CLI, 543 InsertPointTy AllocaIP, 544 bool NeedsBarrier, 545 Value *ChunkSize); 546 547 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. 548 /// 549 /// This takes a \p LoopInfo representing a canonical loop, such as the one 550 /// created by \p createCanonicalLoop and emits additional instructions to 551 /// turn it into a workshare loop. In particular, it calls to an OpenMP 552 /// runtime function in the preheader to obtain, and then in each iteration 553 /// to update the loop counter. 554 /// 555 /// \param DL Debug location for instructions added for the 556 /// workshare-loop construct itself. 557 /// \param CLI A descriptor of the canonical loop to workshare. 558 /// \param AllocaIP An insertion point for Alloca instructions usable in the 559 /// preheader of the loop. 560 /// \param SchedType Type of scheduling to be passed to the init function. 561 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 562 /// the loop. 563 /// \param Chunk The size of loop chunk considered as a unit when 564 /// scheduling. If \p nullptr, defaults to 1. 565 /// 566 /// \returns Point where to insert code after the workshare construct. 567 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 568 InsertPointTy AllocaIP, 569 omp::OMPScheduleType SchedType, 570 bool NeedsBarrier, 571 Value *Chunk = nullptr); 572 573 /// Create alternative version of the loop to support if clause 574 /// 575 /// OpenMP if clause can require to generate second loop. This loop 576 /// will be executed when if clause condition is not met. createIfVersion 577 /// adds branch instruction to the copied loop if \p ifCond is not met. 578 /// 579 /// \param Loop Original loop which should be versioned. 580 /// \param IfCond Value which corresponds to if clause condition 581 /// \param VMap Value to value map to define relation between 582 /// original and copied loop values and loop blocks. 583 /// \param NamePrefix Optional name prefix for if.then if.else blocks. 584 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, 585 ValueToValueMapTy &VMap, const Twine &NamePrefix = ""); 586 587 public: 588 /// Modifies the canonical loop to be a workshare loop. 589 /// 590 /// This takes a \p LoopInfo representing a canonical loop, such as the one 591 /// created by \p createCanonicalLoop and emits additional instructions to 592 /// turn it into a workshare loop. In particular, it calls to an OpenMP 593 /// runtime function in the preheader to obtain the loop bounds to be used in 594 /// the current thread, updates the relevant instructions in the canonical 595 /// loop and calls to an OpenMP runtime finalization function after the loop. 596 /// 597 /// The concrete transformation is done by applyStaticWorkshareLoop, 598 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending 599 /// on the value of \p SchedKind and \p ChunkSize. 600 /// 601 /// \param DL Debug location for instructions added for the 602 /// workshare-loop construct itself. 603 /// \param CLI A descriptor of the canonical loop to workshare. 604 /// \param AllocaIP An insertion point for Alloca instructions usable in the 605 /// preheader of the loop. 606 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 607 /// the loop. 608 /// \param SchedKind Scheduling algorithm to use. 609 /// \param ChunkSize The chunk size for the inner loop. 610 /// \param HasSimdModifier Whether the simd modifier is present in the 611 /// schedule clause. 612 /// \param HasMonotonicModifier Whether the monotonic modifier is present in 613 /// the schedule clause. 614 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is 615 /// present in the schedule clause. 616 /// \param HasOrderedClause Whether the (parameterless) ordered clause is 617 /// present. 618 /// 619 /// \returns Point where to insert code after the workshare construct. 620 InsertPointTy applyWorkshareLoop( 621 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, 622 bool NeedsBarrier, 623 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, 624 Value *ChunkSize = nullptr, bool HasSimdModifier = false, 625 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, 626 bool HasOrderedClause = false); 627 628 /// Tile a loop nest. 629 /// 630 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in 631 /// \p/ Loops must be perfectly nested, from outermost to innermost loop 632 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value 633 /// of every loop and every tile sizes must be usable in the outermost 634 /// loop's preheader. This implies that the loop nest is rectangular. 635 /// 636 /// Example: 637 /// \code 638 /// for (int i = 0; i < 15; ++i) // Canonical loop "i" 639 /// for (int j = 0; j < 14; ++j) // Canonical loop "j" 640 /// body(i, j); 641 /// \endcode 642 /// 643 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to 644 /// \code 645 /// for (int i1 = 0; i1 < 3; ++i1) 646 /// for (int j1 = 0; j1 < 2; ++j1) 647 /// for (int i2 = 0; i2 < 5; ++i2) 648 /// for (int j2 = 0; j2 < 7; ++j2) 649 /// body(i1*3+i2, j1*3+j2); 650 /// \endcode 651 /// 652 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are 653 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also 654 /// handles non-constant trip counts, non-constant tile sizes and trip counts 655 /// that are not multiples of the tile size. In the latter case the tile loop 656 /// of the last floor-loop iteration will have fewer iterations than specified 657 /// as its tile size. 658 /// 659 /// 660 /// @param DL Debug location for instructions added by tiling, for 661 /// instance the floor- and tile trip count computation. 662 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are 663 /// invalidated by this method, i.e. should not used after 664 /// tiling. 665 /// @param TileSizes For each loop in \p Loops, the tile size for that 666 /// dimensions. 667 /// 668 /// \returns A list of generated loops. Contains twice as many loops as the 669 /// input loop nest; the first half are the floor loops and the 670 /// second half are the tile loops. 671 std::vector<CanonicalLoopInfo *> 672 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 673 ArrayRef<Value *> TileSizes); 674 675 /// Fully unroll a loop. 676 /// 677 /// Instead of unrolling the loop immediately (and duplicating its body 678 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop 679 /// metadata. 680 /// 681 /// \param DL Debug location for instructions added by unrolling. 682 /// \param Loop The loop to unroll. The loop will be invalidated. 683 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); 684 685 /// Fully or partially unroll a loop. How the loop is unrolled is determined 686 /// using LLVM's LoopUnrollPass. 687 /// 688 /// \param DL Debug location for instructions added by unrolling. 689 /// \param Loop The loop to unroll. The loop will be invalidated. 690 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); 691 692 /// Partially unroll a loop. 693 /// 694 /// The CanonicalLoopInfo of the unrolled loop for use with chained 695 /// loop-associated directive can be requested using \p UnrolledCLI. Not 696 /// needing the CanonicalLoopInfo allows more efficient code generation by 697 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. 698 /// A loop-associated directive applied to the unrolled loop needs to know the 699 /// new trip count which means that if using a heuristically determined unroll 700 /// factor (\p Factor == 0), that factor must be computed immediately. We are 701 /// using the same logic as the LoopUnrollPass to derived the unroll factor, 702 /// but which assumes that some canonicalization has taken place (e.g. 703 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform 704 /// better when the unrolled loop's CanonicalLoopInfo is not needed. 705 /// 706 /// \param DL Debug location for instructions added by unrolling. 707 /// \param Loop The loop to unroll. The loop will be invalidated. 708 /// \param Factor The factor to unroll the loop by. A factor of 0 709 /// indicates that a heuristic should be used to determine 710 /// the unroll-factor. 711 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the 712 /// partially unrolled loop. Otherwise, uses loop metadata 713 /// to defer unrolling to the LoopUnrollPass. 714 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, 715 CanonicalLoopInfo **UnrolledCLI); 716 717 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop 718 /// is cloned. The metadata which prevents vectorization is added to 719 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated 720 /// to false. 721 /// 722 /// \param Loop The loop to simd-ize. 723 /// \param AlignedVars The map which containts pairs of the pointer 724 /// and its corresponding alignment. 725 /// \param IfCond The value which corresponds to the if clause 726 /// condition. 727 /// \param Order The enum to map order clause. 728 /// \param Simdlen The Simdlen length to apply to the simd loop. 729 /// \param Safelen The Safelen length to apply to the simd loop. 730 void applySimd(CanonicalLoopInfo *Loop, 731 MapVector<Value *, Value *> AlignedVars, Value *IfCond, 732 omp::OrderKind Order, ConstantInt *Simdlen, 733 ConstantInt *Safelen); 734 735 /// Generator for '#omp flush' 736 /// 737 /// \param Loc The location where the flush directive was encountered 738 void createFlush(const LocationDescription &Loc); 739 740 /// Generator for '#omp taskwait' 741 /// 742 /// \param Loc The location where the taskwait directive was encountered. 743 void createTaskwait(const LocationDescription &Loc); 744 745 /// Generator for '#omp taskyield' 746 /// 747 /// \param Loc The location where the taskyield directive was encountered. 748 void createTaskyield(const LocationDescription &Loc); 749 750 /// A struct to pack the relevant information for an OpenMP depend clause. 751 struct DependData { 752 omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; 753 Type *DepValueType; 754 Value *DepVal; 755 explicit DependData() = default; DependDataDependData756 DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, 757 Value *DepVal) 758 : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} 759 }; 760 761 /// Generator for `#omp task` 762 /// 763 /// \param Loc The location where the task construct was encountered. 764 /// \param AllocaIP The insertion point to be used for alloca instructions. 765 /// \param BodyGenCB Callback that will generate the region code. 766 /// \param Tied True if the task is tied, false if the task is untied. 767 /// \param Final i1 value which is `true` if the task is final, `false` if the 768 /// task is not final. 769 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred 770 /// task is generated, and the encountering thread must 771 /// suspend the current task region, for which execution 772 /// cannot be resumed until execution of the structured 773 /// block that is associated with the generated task is 774 /// completed. 775 InsertPointTy createTask(const LocationDescription &Loc, 776 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, 777 bool Tied = true, Value *Final = nullptr, 778 Value *IfCondition = nullptr, 779 SmallVector<DependData> Dependencies = {}); 780 781 /// Generator for the taskgroup construct 782 /// 783 /// \param Loc The location where the taskgroup construct was encountered. 784 /// \param AllocaIP The insertion point to be used for alloca instructions. 785 /// \param BodyGenCB Callback that will generate the region code. 786 InsertPointTy createTaskgroup(const LocationDescription &Loc, 787 InsertPointTy AllocaIP, 788 BodyGenCallbackTy BodyGenCB); 789 790 /// Functions used to generate reductions. Such functions take two Values 791 /// representing LHS and RHS of the reduction, respectively, and a reference 792 /// to the value that is updated to refer to the reduction result. 793 using ReductionGenTy = 794 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; 795 796 /// Functions used to generate atomic reductions. Such functions take two 797 /// Values representing pointers to LHS and RHS of the reduction, as well as 798 /// the element type of these pointers. They are expected to atomically 799 /// update the LHS to the reduced value. 800 using AtomicReductionGenTy = 801 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; 802 803 /// Information about an OpenMP reduction. 804 struct ReductionInfo { ReductionInfoReductionInfo805 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, 806 ReductionGenTy ReductionGen, 807 AtomicReductionGenTy AtomicReductionGen) 808 : ElementType(ElementType), Variable(Variable), 809 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), 810 AtomicReductionGen(AtomicReductionGen) { 811 assert(cast<PointerType>(Variable->getType()) 812 ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type"); 813 } 814 815 /// Reduction element type, must match pointee type of variable. 816 Type *ElementType; 817 818 /// Reduction variable of pointer type. 819 Value *Variable; 820 821 /// Thread-private partial reduction variable. 822 Value *PrivateVariable; 823 824 /// Callback for generating the reduction body. The IR produced by this will 825 /// be used to combine two values in a thread-safe context, e.g., under 826 /// lock or within the same thread, and therefore need not be atomic. 827 ReductionGenTy ReductionGen; 828 829 /// Callback for generating the atomic reduction body, may be null. The IR 830 /// produced by this will be used to atomically combine two values during 831 /// reduction. If null, the implementation will use the non-atomic version 832 /// along with the appropriate synchronization mechanisms. 833 AtomicReductionGenTy AtomicReductionGen; 834 }; 835 836 // TODO: provide atomic and non-atomic reduction generators for reduction 837 // operators defined by the OpenMP specification. 838 839 /// Generator for '#omp reduction'. 840 /// 841 /// Emits the IR instructing the runtime to perform the specific kind of 842 /// reductions. Expects reduction variables to have been privatized and 843 /// initialized to reduction-neutral values separately. Emits the calls to 844 /// runtime functions as well as the reduction function and the basic blocks 845 /// performing the reduction atomically and non-atomically. 846 /// 847 /// The code emitted for the following: 848 /// 849 /// \code 850 /// type var_1; 851 /// type var_2; 852 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) 853 /// /* body */; 854 /// \endcode 855 /// 856 /// corresponds to the following sketch. 857 /// 858 /// \code 859 /// void _outlined_par() { 860 /// // N is the number of different reductions. 861 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; 862 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, 863 /// _omp_reduction_func, 864 /// _gomp_critical_user.reduction.var)) { 865 /// case 1: { 866 /// var_1 = var_1 <reduction-op> privatized_var_1; 867 /// var_2 = var_2 <reduction-op> privatized_var_2; 868 /// // ... 869 /// __kmpc_end_reduce(...); 870 /// break; 871 /// } 872 /// case 2: { 873 /// _Atomic<ReductionOp>(var_1, privatized_var_1); 874 /// _Atomic<ReductionOp>(var_2, privatized_var_2); 875 /// // ... 876 /// break; 877 /// } 878 /// default: break; 879 /// } 880 /// } 881 /// 882 /// void _omp_reduction_func(void **lhs, void **rhs) { 883 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; 884 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; 885 /// // ... 886 /// } 887 /// \endcode 888 /// 889 /// \param Loc The location where the reduction was 890 /// encountered. Must be within the associate 891 /// directive and after the last local access to the 892 /// reduction variables. 893 /// \param AllocaIP An insertion point suitable for allocas usable 894 /// in reductions. 895 /// \param ReductionInfos A list of info on each reduction variable. 896 /// \param IsNoWait A flag set if the reduction is marked as nowait. 897 InsertPointTy createReductions(const LocationDescription &Loc, 898 InsertPointTy AllocaIP, 899 ArrayRef<ReductionInfo> ReductionInfos, 900 bool IsNoWait = false); 901 902 ///} 903 904 /// Return the insertion point used by the underlying IRBuilder. getInsertionPoint()905 InsertPointTy getInsertionPoint() { return Builder.saveIP(); } 906 907 /// Update the internal location to \p Loc. updateToLocation(const LocationDescription & Loc)908 bool updateToLocation(const LocationDescription &Loc) { 909 Builder.restoreIP(Loc.IP); 910 Builder.SetCurrentDebugLocation(Loc.DL); 911 return Loc.IP.getBlock() != nullptr; 912 } 913 914 /// Return the function declaration for the runtime function with \p FnID. 915 FunctionCallee getOrCreateRuntimeFunction(Module &M, 916 omp::RuntimeFunction FnID); 917 918 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); 919 920 /// Return the (LLVM-IR) string describing the source location \p LocStr. 921 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); 922 923 /// Return the (LLVM-IR) string describing the default source location. 924 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); 925 926 /// Return the (LLVM-IR) string describing the source location identified by 927 /// the arguments. 928 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, 929 unsigned Line, unsigned Column, 930 uint32_t &SrcLocStrSize); 931 932 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as 933 /// fallback if \p DL does not specify the function name. 934 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, 935 Function *F = nullptr); 936 937 /// Return the (LLVM-IR) string describing the source location \p Loc. 938 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, 939 uint32_t &SrcLocStrSize); 940 941 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. 942 /// TODO: Create a enum class for the Reserve2Flags 943 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, 944 omp::IdentFlag Flags = omp::IdentFlag(0), 945 unsigned Reserve2Flags = 0); 946 947 /// Create a hidden global flag \p Name in the module with initial value \p 948 /// Value. 949 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); 950 951 /// Create an offloading section struct used to register this global at 952 /// runtime. 953 /// 954 /// Type struct __tgt_offload_entry{ 955 /// void *addr; // Pointer to the offload entry info. 956 /// // (function or global) 957 /// char *name; // Name of the function or global. 958 /// size_t size; // Size of the entry info (0 if it a function). 959 /// int32_t flags; 960 /// int32_t reserved; 961 /// }; 962 /// 963 /// \param Addr The pointer to the global being registered. 964 /// \param Name The symbol name associated with the global. 965 /// \param Size The size in bytes of the global (0 for functions). 966 /// \param Flags Flags associated with the entry. 967 /// \param SectionName The section this entry will be placed at. 968 void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, 969 int32_t Flags, 970 StringRef SectionName = "omp_offloading_entries"); 971 972 /// Generate control flow and cleanup for cancellation. 973 /// 974 /// \param CancelFlag Flag indicating if the cancellation is performed. 975 /// \param CanceledDirective The kind of directive that is cancled. 976 /// \param ExitCB Extra code to be generated in the exit block. 977 void emitCancelationCheckImpl(Value *CancelFlag, 978 omp::Directive CanceledDirective, 979 FinalizeCallbackTy ExitCB = {}); 980 981 /// Generate a target region entry call. 982 /// 983 /// \param Loc The location at which the request originated and is fulfilled. 984 /// \param Return Return value of the created function returned by reference. 985 /// \param DeviceID Identifier for the device via the 'device' clause. 986 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause 987 /// or 0 if unspecified and -1 if there is no 'teams' clause. 988 /// \param NumThreads Number of threads via the 'thread_limit' clause. 989 /// \param HostPtr Pointer to the host-side pointer of the target kernel. 990 /// \param KernelArgs Array of arguments to the kernel. 991 InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, 992 Value *Ident, Value *DeviceID, Value *NumTeams, 993 Value *NumThreads, Value *HostPtr, 994 ArrayRef<Value *> KernelArgs); 995 996 /// Generate a barrier runtime call. 997 /// 998 /// \param Loc The location at which the request originated and is fulfilled. 999 /// \param DK The directive which caused the barrier 1000 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 1001 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 1002 /// should be checked and acted upon. 1003 /// 1004 /// \returns The insertion point after the barrier. 1005 InsertPointTy emitBarrierImpl(const LocationDescription &Loc, 1006 omp::Directive DK, bool ForceSimpleCall, 1007 bool CheckCancelFlag); 1008 1009 /// Generate a flush runtime call. 1010 /// 1011 /// \param Loc The location at which the request originated and is fulfilled. 1012 void emitFlush(const LocationDescription &Loc); 1013 1014 /// The finalization stack made up of finalize callbacks currently in-flight, 1015 /// wrapped into FinalizationInfo objects that reference also the finalization 1016 /// target block and the kind of cancellable directive. 1017 SmallVector<FinalizationInfo, 8> FinalizationStack; 1018 1019 /// Return true if the last entry in the finalization stack is of kind \p DK 1020 /// and cancellable. isLastFinalizationInfoCancellable(omp::Directive DK)1021 bool isLastFinalizationInfoCancellable(omp::Directive DK) { 1022 return !FinalizationStack.empty() && 1023 FinalizationStack.back().IsCancellable && 1024 FinalizationStack.back().DK == DK; 1025 } 1026 1027 /// Generate a taskwait runtime call. 1028 /// 1029 /// \param Loc The location at which the request originated and is fulfilled. 1030 void emitTaskwaitImpl(const LocationDescription &Loc); 1031 1032 /// Generate a taskyield runtime call. 1033 /// 1034 /// \param Loc The location at which the request originated and is fulfilled. 1035 void emitTaskyieldImpl(const LocationDescription &Loc); 1036 1037 /// Return the current thread ID. 1038 /// 1039 /// \param Ident The ident (ident_t*) describing the query origin. 1040 Value *getOrCreateThreadID(Value *Ident); 1041 1042 /// The OpenMPIRBuilder Configuration 1043 OpenMPIRBuilderConfig Config; 1044 1045 /// The underlying LLVM-IR module 1046 Module &M; 1047 1048 /// The LLVM-IR Builder used to create IR. 1049 IRBuilder<> Builder; 1050 1051 /// Map to remember source location strings 1052 StringMap<Constant *> SrcLocStrMap; 1053 1054 /// Map to remember existing ident_t*. 1055 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; 1056 1057 /// Helper that contains information about regions we need to outline 1058 /// during finalization. 1059 struct OutlineInfo { 1060 using PostOutlineCBTy = std::function<void(Function &)>; 1061 PostOutlineCBTy PostOutlineCB; 1062 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; 1063 SmallVector<Value *, 2> ExcludeArgsFromAggregate; 1064 1065 /// Collect all blocks in between EntryBB and ExitBB in both the given 1066 /// vector and set. 1067 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, 1068 SmallVectorImpl<BasicBlock *> &BlockVector); 1069 1070 /// Return the function that contains the region to be outlined. getFunctionOutlineInfo1071 Function *getFunction() const { return EntryBB->getParent(); } 1072 }; 1073 1074 /// Collection of regions that need to be outlined during finalization. 1075 SmallVector<OutlineInfo, 16> OutlineInfos; 1076 1077 /// Collection of owned canonical loop objects that eventually need to be 1078 /// free'd. 1079 std::forward_list<CanonicalLoopInfo> LoopInfos; 1080 1081 /// Add a new region that will be outlined later. addOutlineInfo(OutlineInfo && OI)1082 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } 1083 1084 /// An ordered map of auto-generated variables to their unique names. 1085 /// It stores variables with the following names: 1) ".gomp_critical_user_" + 1086 /// <critical_section_name> + ".var" for "omp critical" directives; 2) 1087 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate 1088 /// variables. 1089 StringMap<Constant*, BumpPtrAllocator> InternalVars; 1090 1091 /// Create the global variable holding the offload mappings information. 1092 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 1093 std::string VarName); 1094 1095 /// Create the global variable holding the offload names information. 1096 GlobalVariable * 1097 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 1098 std::string VarName); 1099 1100 struct MapperAllocas { 1101 AllocaInst *ArgsBase = nullptr; 1102 AllocaInst *Args = nullptr; 1103 AllocaInst *ArgSizes = nullptr; 1104 }; 1105 1106 /// Create the allocas instruction used in call to mapper functions. 1107 void createMapperAllocas(const LocationDescription &Loc, 1108 InsertPointTy AllocaIP, unsigned NumOperands, 1109 struct MapperAllocas &MapperAllocas); 1110 1111 /// Create the call for the target mapper function. 1112 /// \param Loc The source location description. 1113 /// \param MapperFunc Function to be called. 1114 /// \param SrcLocInfo Source location information global. 1115 /// \param MaptypesArg The argument types. 1116 /// \param MapnamesArg The argument names. 1117 /// \param MapperAllocas The AllocaInst used for the call. 1118 /// \param DeviceID Device ID for the call. 1119 /// \param NumOperands Number of operands in the call. 1120 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, 1121 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, 1122 struct MapperAllocas &MapperAllocas, int64_t DeviceID, 1123 unsigned NumOperands); 1124 1125 /// Container for the arguments used to pass data to the runtime library. 1126 struct TargetDataRTArgs { TargetDataRTArgsTargetDataRTArgs1127 explicit TargetDataRTArgs() {} 1128 /// The array of base pointer passed to the runtime library. 1129 Value *BasePointersArray = nullptr; 1130 /// The array of section pointers passed to the runtime library. 1131 Value *PointersArray = nullptr; 1132 /// The array of sizes passed to the runtime library. 1133 Value *SizesArray = nullptr; 1134 /// The array of map types passed to the runtime library for the beginning 1135 /// of the region or for the entire region if there are no separate map 1136 /// types for the region end. 1137 Value *MapTypesArray = nullptr; 1138 /// The array of map types passed to the runtime library for the end of the 1139 /// region, or nullptr if there are no separate map types for the region 1140 /// end. 1141 Value *MapTypesArrayEnd = nullptr; 1142 /// The array of user-defined mappers passed to the runtime library. 1143 Value *MappersArray = nullptr; 1144 /// The array of original declaration names of mapped pointers sent to the 1145 /// runtime library for debugging 1146 Value *MapNamesArray = nullptr; 1147 }; 1148 1149 /// Struct that keeps the information that should be kept throughout 1150 /// a 'target data' region. 1151 class TargetDataInfo { 1152 /// Set to true if device pointer information have to be obtained. 1153 bool RequiresDevicePointerInfo = false; 1154 /// Set to true if Clang emits separate runtime calls for the beginning and 1155 /// end of the region. These calls might have separate map type arrays. 1156 bool SeparateBeginEndCalls = false; 1157 1158 public: 1159 TargetDataRTArgs RTArgs; 1160 1161 /// Indicate whether any user-defined mapper exists. 1162 bool HasMapper = false; 1163 /// The total number of pointers passed to the runtime library. 1164 unsigned NumberOfPtrs = 0u; 1165 TargetDataInfo()1166 explicit TargetDataInfo() {} TargetDataInfo(bool RequiresDevicePointerInfo,bool SeparateBeginEndCalls)1167 explicit TargetDataInfo(bool RequiresDevicePointerInfo, 1168 bool SeparateBeginEndCalls) 1169 : RequiresDevicePointerInfo(RequiresDevicePointerInfo), 1170 SeparateBeginEndCalls(SeparateBeginEndCalls) {} 1171 /// Clear information about the data arrays. clearArrayInfo()1172 void clearArrayInfo() { 1173 RTArgs = TargetDataRTArgs(); 1174 HasMapper = false; 1175 NumberOfPtrs = 0u; 1176 } 1177 /// Return true if the current target data information has valid arrays. isValid()1178 bool isValid() { 1179 return RTArgs.BasePointersArray && RTArgs.PointersArray && 1180 RTArgs.SizesArray && RTArgs.MapTypesArray && 1181 (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs; 1182 } requiresDevicePointerInfo()1183 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } separateBeginEndCalls()1184 bool separateBeginEndCalls() { return SeparateBeginEndCalls; } 1185 }; 1186 1187 /// Emit the arguments to be passed to the runtime library based on the 1188 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 1189 /// ForEndCall, emit map types to be passed for the end of the region instead 1190 /// of the beginning. 1191 void emitOffloadingArraysArgument(IRBuilderBase &Builder, 1192 OpenMPIRBuilder::TargetDataRTArgs &RTArgs, 1193 OpenMPIRBuilder::TargetDataInfo &Info, 1194 bool EmitDebug = false, 1195 bool ForEndCall = false); 1196 1197 /// Creates offloading entry for the provided entry ID \a ID, address \a 1198 /// Addr, size \a Size, and flags \a Flags. 1199 void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, 1200 int32_t Flags, GlobalValue::LinkageTypes); 1201 1202 /// The kind of errors that can occur when emitting the offload entries and 1203 /// metadata. 1204 enum EmitMetadataErrorKind { 1205 EMIT_MD_TARGET_REGION_ERROR, 1206 EMIT_MD_DECLARE_TARGET_ERROR, 1207 EMIT_MD_GLOBAL_VAR_LINK_ERROR 1208 }; 1209 1210 /// Callback function type 1211 using EmitMetadataErrorReportFunctionTy = 1212 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>; 1213 1214 // Emit the offloading entries and metadata so that the device codegen side 1215 // can easily figure out what to emit. The produced metadata looks like 1216 // this: 1217 // 1218 // !omp_offload.info = !{!1, ...} 1219 // 1220 // We only generate metadata for function that contain target regions. 1221 void createOffloadEntriesAndInfoMetadata( 1222 OffloadEntriesInfoManager &OffloadEntriesInfoManager, 1223 EmitMetadataErrorReportFunctionTy &ErrorReportFunction); 1224 1225 public: 1226 /// Generator for __kmpc_copyprivate 1227 /// 1228 /// \param Loc The source location description. 1229 /// \param BufSize Number of elements in the buffer. 1230 /// \param CpyBuf List of pointers to data to be copied. 1231 /// \param CpyFn function to call for copying data. 1232 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. 1233 /// 1234 /// \return The insertion position *after* the CopyPrivate call. 1235 1236 InsertPointTy createCopyPrivate(const LocationDescription &Loc, 1237 llvm::Value *BufSize, llvm::Value *CpyBuf, 1238 llvm::Value *CpyFn, llvm::Value *DidIt); 1239 1240 /// Generator for '#omp single' 1241 /// 1242 /// \param Loc The source location description. 1243 /// \param BodyGenCB Callback that will generate the region code. 1244 /// \param FiniCB Callback to finalize variable copies. 1245 /// \param IsNowait If false, a barrier is emitted. 1246 /// \param DidIt Local variable used as a flag to indicate 'single' thread 1247 /// 1248 /// \returns The insertion position *after* the single call. 1249 InsertPointTy createSingle(const LocationDescription &Loc, 1250 BodyGenCallbackTy BodyGenCB, 1251 FinalizeCallbackTy FiniCB, bool IsNowait, 1252 llvm::Value *DidIt); 1253 1254 /// Generator for '#omp master' 1255 /// 1256 /// \param Loc The insert and source location description. 1257 /// \param BodyGenCB Callback that will generate the region code. 1258 /// \param FiniCB Callback to finalize variable copies. 1259 /// 1260 /// \returns The insertion position *after* the master. 1261 InsertPointTy createMaster(const LocationDescription &Loc, 1262 BodyGenCallbackTy BodyGenCB, 1263 FinalizeCallbackTy FiniCB); 1264 1265 /// Generator for '#omp masked' 1266 /// 1267 /// \param Loc The insert and source location description. 1268 /// \param BodyGenCB Callback that will generate the region code. 1269 /// \param FiniCB Callback to finialize variable copies. 1270 /// 1271 /// \returns The insertion position *after* the masked. 1272 InsertPointTy createMasked(const LocationDescription &Loc, 1273 BodyGenCallbackTy BodyGenCB, 1274 FinalizeCallbackTy FiniCB, Value *Filter); 1275 1276 /// Generator for '#omp critical' 1277 /// 1278 /// \param Loc The insert and source location description. 1279 /// \param BodyGenCB Callback that will generate the region body code. 1280 /// \param FiniCB Callback to finalize variable copies. 1281 /// \param CriticalName name of the lock used by the critical directive 1282 /// \param HintInst Hint Instruction for hint clause associated with critical 1283 /// 1284 /// \returns The insertion position *after* the critical. 1285 InsertPointTy createCritical(const LocationDescription &Loc, 1286 BodyGenCallbackTy BodyGenCB, 1287 FinalizeCallbackTy FiniCB, 1288 StringRef CriticalName, Value *HintInst); 1289 1290 /// Generator for '#omp ordered depend (source | sink)' 1291 /// 1292 /// \param Loc The insert and source location description. 1293 /// \param AllocaIP The insertion point to be used for alloca instructions. 1294 /// \param NumLoops The number of loops in depend clause. 1295 /// \param StoreValues The value will be stored in vector address. 1296 /// \param Name The name of alloca instruction. 1297 /// \param IsDependSource If true, depend source; otherwise, depend sink. 1298 /// 1299 /// \return The insertion position *after* the ordered. 1300 InsertPointTy createOrderedDepend(const LocationDescription &Loc, 1301 InsertPointTy AllocaIP, unsigned NumLoops, 1302 ArrayRef<llvm::Value *> StoreValues, 1303 const Twine &Name, bool IsDependSource); 1304 1305 /// Generator for '#omp ordered [threads | simd]' 1306 /// 1307 /// \param Loc The insert and source location description. 1308 /// \param BodyGenCB Callback that will generate the region code. 1309 /// \param FiniCB Callback to finalize variable copies. 1310 /// \param IsThreads If true, with threads clause or without clause; 1311 /// otherwise, with simd clause; 1312 /// 1313 /// \returns The insertion position *after* the ordered. 1314 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, 1315 BodyGenCallbackTy BodyGenCB, 1316 FinalizeCallbackTy FiniCB, 1317 bool IsThreads); 1318 1319 /// Generator for '#omp sections' 1320 /// 1321 /// \param Loc The insert and source location description. 1322 /// \param AllocaIP The insertion points to be used for alloca instructions. 1323 /// \param SectionCBs Callbacks that will generate body of each section. 1324 /// \param PrivCB Callback to copy a given variable (think copy constructor). 1325 /// \param FiniCB Callback to finalize variable copies. 1326 /// \param IsCancellable Flag to indicate a cancellable parallel region. 1327 /// \param IsNowait If true, barrier - to ensure all sections are executed 1328 /// before moving forward will not be generated. 1329 /// \returns The insertion position *after* the sections. 1330 InsertPointTy createSections(const LocationDescription &Loc, 1331 InsertPointTy AllocaIP, 1332 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, 1333 PrivatizeCallbackTy PrivCB, 1334 FinalizeCallbackTy FiniCB, bool IsCancellable, 1335 bool IsNowait); 1336 1337 /// Generator for '#omp section' 1338 /// 1339 /// \param Loc The insert and source location description. 1340 /// \param BodyGenCB Callback that will generate the region body code. 1341 /// \param FiniCB Callback to finalize variable copies. 1342 /// \returns The insertion position *after* the section. 1343 InsertPointTy createSection(const LocationDescription &Loc, 1344 BodyGenCallbackTy BodyGenCB, 1345 FinalizeCallbackTy FiniCB); 1346 1347 /// Generate conditional branch and relevant BasicBlocks through which private 1348 /// threads copy the 'copyin' variables from Master copy to threadprivate 1349 /// copies. 1350 /// 1351 /// \param IP insertion block for copyin conditional 1352 /// \param MasterVarPtr a pointer to the master variable 1353 /// \param PrivateVarPtr a pointer to the threadprivate variable 1354 /// \param IntPtrTy Pointer size type 1355 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks 1356 // and copy.in.end block 1357 /// 1358 /// \returns The insertion point where copying operation to be emitted. 1359 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, 1360 Value *PrivateAddr, 1361 llvm::IntegerType *IntPtrTy, 1362 bool BranchtoEnd = true); 1363 1364 /// Create a runtime call for kmpc_Alloc 1365 /// 1366 /// \param Loc The insert and source location description. 1367 /// \param Size Size of allocated memory space 1368 /// \param Allocator Allocator information instruction 1369 /// \param Name Name of call Instruction for OMP_alloc 1370 /// 1371 /// \returns CallInst to the OMP_Alloc call 1372 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, 1373 Value *Allocator, std::string Name = ""); 1374 1375 /// Create a runtime call for kmpc_free 1376 /// 1377 /// \param Loc The insert and source location description. 1378 /// \param Addr Address of memory space to be freed 1379 /// \param Allocator Allocator information instruction 1380 /// \param Name Name of call Instruction for OMP_Free 1381 /// 1382 /// \returns CallInst to the OMP_Free call 1383 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, 1384 Value *Allocator, std::string Name = ""); 1385 1386 /// Create a runtime call for kmpc_threadprivate_cached 1387 /// 1388 /// \param Loc The insert and source location description. 1389 /// \param Pointer pointer to data to be cached 1390 /// \param Size size of data to be cached 1391 /// \param Name Name of call Instruction for callinst 1392 /// 1393 /// \returns CallInst to the thread private cache call. 1394 CallInst *createCachedThreadPrivate(const LocationDescription &Loc, 1395 llvm::Value *Pointer, 1396 llvm::ConstantInt *Size, 1397 const llvm::Twine &Name = Twine("")); 1398 1399 /// Create a runtime call for __tgt_interop_init 1400 /// 1401 /// \param Loc The insert and source location description. 1402 /// \param InteropVar variable to be allocated 1403 /// \param InteropType type of interop operation 1404 /// \param Device devide to which offloading will occur 1405 /// \param NumDependences number of dependence variables 1406 /// \param DependenceAddress pointer to dependence variables 1407 /// \param HaveNowaitClause does nowait clause exist 1408 /// 1409 /// \returns CallInst to the __tgt_interop_init call 1410 CallInst *createOMPInteropInit(const LocationDescription &Loc, 1411 Value *InteropVar, 1412 omp::OMPInteropType InteropType, Value *Device, 1413 Value *NumDependences, 1414 Value *DependenceAddress, 1415 bool HaveNowaitClause); 1416 1417 /// Create a runtime call for __tgt_interop_destroy 1418 /// 1419 /// \param Loc The insert and source location description. 1420 /// \param InteropVar variable to be allocated 1421 /// \param Device devide to which offloading will occur 1422 /// \param NumDependences number of dependence variables 1423 /// \param DependenceAddress pointer to dependence variables 1424 /// \param HaveNowaitClause does nowait clause exist 1425 /// 1426 /// \returns CallInst to the __tgt_interop_destroy call 1427 CallInst *createOMPInteropDestroy(const LocationDescription &Loc, 1428 Value *InteropVar, Value *Device, 1429 Value *NumDependences, 1430 Value *DependenceAddress, 1431 bool HaveNowaitClause); 1432 1433 /// Create a runtime call for __tgt_interop_use 1434 /// 1435 /// \param Loc The insert and source location description. 1436 /// \param InteropVar variable to be allocated 1437 /// \param Device devide to which offloading will occur 1438 /// \param NumDependences number of dependence variables 1439 /// \param DependenceAddress pointer to dependence variables 1440 /// \param HaveNowaitClause does nowait clause exist 1441 /// 1442 /// \returns CallInst to the __tgt_interop_use call 1443 CallInst *createOMPInteropUse(const LocationDescription &Loc, 1444 Value *InteropVar, Value *Device, 1445 Value *NumDependences, Value *DependenceAddress, 1446 bool HaveNowaitClause); 1447 1448 /// The `omp target` interface 1449 /// 1450 /// For more information about the usage of this interface, 1451 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h 1452 /// 1453 ///{ 1454 1455 /// Create a runtime call for kmpc_target_init 1456 /// 1457 /// \param Loc The insert and source location description. 1458 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 1459 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD); 1460 1461 /// Create a runtime call for kmpc_target_deinit 1462 /// 1463 /// \param Loc The insert and source location description. 1464 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 1465 void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); 1466 1467 ///} 1468 1469 private: 1470 // Sets the function attributes expected for the outlined function 1471 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn, 1472 int32_t NumTeams, 1473 int32_t NumThreads); 1474 1475 // Creates the function ID/Address for the given outlined function. 1476 // In the case of an embedded device function the address of the function is 1477 // used, in the case of a non-offload function a constant is created. 1478 Constant *createOutlinedFunctionID(Function *OutlinedFn, 1479 StringRef EntryFnIDName); 1480 1481 // Creates the region entry address for the outlined function 1482 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction, 1483 StringRef EntryFnName); 1484 1485 public: 1486 /// Functions used to generate a function with the given name. 1487 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>; 1488 1489 /// Create a unique name for the entry function using the source location 1490 /// information of the current target region. The name will be something like: 1491 /// 1492 /// __omp_offloading_DD_FFFF_PP_lBB[_CC] 1493 /// 1494 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 1495 /// mangled name of the function that encloses the target region and BB is the 1496 /// line number of the target region. CC is a count added when more than one 1497 /// region is located at the same location. 1498 /// 1499 /// If this target outline function is not an offload entry, we don't need to 1500 /// register it. This may happen if it is guarded by an if clause that is 1501 /// false at compile time, or no target archs have been specified. 1502 /// 1503 /// The created target region ID is used by the runtime library to identify 1504 /// the current target region, so it only has to be unique and not 1505 /// necessarily point to anything. It could be the pointer to the outlined 1506 /// function that implements the target region, but we aren't using that so 1507 /// that the compiler doesn't need to keep that, and could therefore inline 1508 /// the host function if proven worthwhile during optimization. In the other 1509 /// hand, if emitting code for the device, the ID has to be the function 1510 /// address so that it can retrieved from the offloading entry and launched 1511 /// by the runtime library. We also mark the outlined function to have 1512 /// external linkage in case we are emitting code for the device, because 1513 /// these functions will be entry points to the device. 1514 /// 1515 /// \param InfoManager The info manager keeping track of the offload entries 1516 /// \param EntryInfo The entry information about the function 1517 /// \param GenerateFunctionCallback The callback function to generate the code 1518 /// \param NumTeams Number default teams 1519 /// \param NumThreads Number default threads 1520 /// \param OutlinedFunction Pointer to the outlined function 1521 /// \param EntryFnIDName Name of the ID o be created 1522 void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, 1523 TargetRegionEntryInfo &EntryInfo, 1524 FunctionGenCallback &GenerateFunctionCallback, 1525 int32_t NumTeams, int32_t NumThreads, 1526 bool IsOffloadEntry, Function *&OutlinedFn, 1527 Constant *&OutlinedFnID); 1528 1529 /// Registers the given function and sets up the attribtues of the function 1530 /// Returns the FunctionID. 1531 /// 1532 /// \param InfoManager The info manager keeping track of the offload entries 1533 /// \param EntryInfo The entry information about the function 1534 /// \param OutlinedFunction Pointer to the outlined function 1535 /// \param EntryFnName Name of the outlined function 1536 /// \param EntryFnIDName Name of the ID o be created 1537 /// \param NumTeams Number default teams 1538 /// \param NumThreads Number default threads 1539 Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, 1540 TargetRegionEntryInfo &EntryInfo, 1541 Function *OutlinedFunction, 1542 StringRef EntryFnName, 1543 StringRef EntryFnIDName, 1544 int32_t NumTeams, int32_t NumThreads); 1545 1546 /// Declarations for LLVM-IR types (simple, array, function and structure) are 1547 /// generated below. Their names are defined and used in OpenMPKinds.def. Here 1548 /// we provide the declarations, the initializeTypes function will provide the 1549 /// values. 1550 /// 1551 ///{ 1552 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; 1553 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 1554 ArrayType *VarName##Ty = nullptr; \ 1555 PointerType *VarName##PtrTy = nullptr; 1556 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 1557 FunctionType *VarName = nullptr; \ 1558 PointerType *VarName##Ptr = nullptr; 1559 #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ 1560 StructType *VarName = nullptr; \ 1561 PointerType *VarName##Ptr = nullptr; 1562 #include "llvm/Frontend/OpenMP/OMPKinds.def" 1563 1564 ///} 1565 1566 private: 1567 /// Create all simple and struct types exposed by the runtime and remember 1568 /// the llvm::PointerTypes of them for easy access later. 1569 void initializeTypes(Module &M); 1570 1571 /// Common interface for generating entry calls for OMP Directives. 1572 /// if the directive has a region/body, It will set the insertion 1573 /// point to the body 1574 /// 1575 /// \param OMPD Directive to generate entry blocks for 1576 /// \param EntryCall Call to the entry OMP Runtime Function 1577 /// \param ExitBB block where the region ends. 1578 /// \param Conditional indicate if the entry call result will be used 1579 /// to evaluate a conditional of whether a thread will execute 1580 /// body code or not. 1581 /// 1582 /// \return The insertion position in exit block 1583 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, 1584 BasicBlock *ExitBB, 1585 bool Conditional = false); 1586 1587 /// Common interface to finalize the region 1588 /// 1589 /// \param OMPD Directive to generate exiting code for 1590 /// \param FinIP Insertion point for emitting Finalization code and exit call 1591 /// \param ExitCall Call to the ending OMP Runtime Function 1592 /// \param HasFinalize indicate if the directive will require finalization 1593 /// and has a finalization callback in the stack that 1594 /// should be called. 1595 /// 1596 /// \return The insertion position in exit block 1597 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, 1598 InsertPointTy FinIP, 1599 Instruction *ExitCall, 1600 bool HasFinalize = true); 1601 1602 /// Common Interface to generate OMP inlined regions 1603 /// 1604 /// \param OMPD Directive to generate inlined region for 1605 /// \param EntryCall Call to the entry OMP Runtime Function 1606 /// \param ExitCall Call to the ending OMP Runtime Function 1607 /// \param BodyGenCB Body code generation callback. 1608 /// \param FiniCB Finalization Callback. Will be called when finalizing region 1609 /// \param Conditional indicate if the entry call result will be used 1610 /// to evaluate a conditional of whether a thread will execute 1611 /// body code or not. 1612 /// \param HasFinalize indicate if the directive will require finalization 1613 /// and has a finalization callback in the stack that 1614 /// should be called. 1615 /// \param IsCancellable if HasFinalize is set to true, indicate if the 1616 /// the directive should be cancellable. 1617 /// \return The insertion point after the region 1618 1619 InsertPointTy 1620 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, 1621 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, 1622 FinalizeCallbackTy FiniCB, bool Conditional = false, 1623 bool HasFinalize = true, bool IsCancellable = false); 1624 1625 /// Get the platform-specific name separator. 1626 /// \param Parts different parts of the final name that needs separation 1627 /// \param FirstSeparator First separator used between the initial two 1628 /// parts of the name. 1629 /// \param Separator separator used between all of the rest consecutive 1630 /// parts of the name 1631 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, 1632 StringRef FirstSeparator, 1633 StringRef Separator); 1634 1635 /// Returns corresponding lock object for the specified critical region 1636 /// name. If the lock object does not exist it is created, otherwise the 1637 /// reference to the existing copy is returned. 1638 /// \param CriticalName Name of the critical region. 1639 /// 1640 Value *getOMPCriticalRegionLock(StringRef CriticalName); 1641 1642 /// Callback type for Atomic Expression update 1643 /// ex: 1644 /// \code{.cpp} 1645 /// unsigned x = 0; 1646 /// #pragma omp atomic update 1647 /// x = Expr(x_old); //Expr() is any legal operation 1648 /// \endcode 1649 /// 1650 /// \param XOld the value of the atomic memory address to use for update 1651 /// \param IRB reference to the IRBuilder to use 1652 /// 1653 /// \returns Value to update X to. 1654 using AtomicUpdateCallbackTy = 1655 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; 1656 1657 private: 1658 enum AtomicKind { Read, Write, Update, Capture, Compare }; 1659 1660 /// Determine whether to emit flush or not 1661 /// 1662 /// \param Loc The insert and source location description. 1663 /// \param AO The required atomic ordering 1664 /// \param AK The OpenMP atomic operation kind used. 1665 /// 1666 /// \returns wether a flush was emitted or not 1667 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, 1668 AtomicOrdering AO, AtomicKind AK); 1669 1670 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 1671 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 1672 /// Only Scalar data types. 1673 /// 1674 /// \param AllocaIP The insertion point to be used for alloca 1675 /// instructions. 1676 /// \param X The target atomic pointer to be updated 1677 /// \param XElemTy The element type of the atomic pointer. 1678 /// \param Expr The value to update X with. 1679 /// \param AO Atomic ordering of the generated atomic 1680 /// instructions. 1681 /// \param RMWOp The binary operation used for update. If 1682 /// operation is not supported by atomicRMW, 1683 /// or belong to {FADD, FSUB, BAD_BINOP}. 1684 /// Then a `cmpExch` based atomic will be generated. 1685 /// \param UpdateOp Code generator for complex expressions that cannot be 1686 /// expressed through atomicrmw instruction. 1687 /// \param VolatileX true if \a X volatile? 1688 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 1689 /// update expression, false otherwise. 1690 /// (e.g. true for X = X BinOp Expr) 1691 /// 1692 /// \returns A pair of the old value of X before the update, and the value 1693 /// used for the update. 1694 std::pair<Value *, Value *> 1695 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, 1696 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 1697 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, 1698 bool IsXBinopExpr); 1699 1700 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . 1701 /// 1702 /// \Return The instruction 1703 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, 1704 AtomicRMWInst::BinOp RMWOp); 1705 1706 public: 1707 /// a struct to pack relevant information while generating atomic Ops 1708 struct AtomicOpValue { 1709 Value *Var = nullptr; 1710 Type *ElemTy = nullptr; 1711 bool IsSigned = false; 1712 bool IsVolatile = false; 1713 }; 1714 1715 /// Emit atomic Read for : V = X --- Only Scalar data types. 1716 /// 1717 /// \param Loc The insert and source location description. 1718 /// \param X The target pointer to be atomically read 1719 /// \param V Memory address where to store atomically read 1720 /// value 1721 /// \param AO Atomic ordering of the generated atomic 1722 /// instructions. 1723 /// 1724 /// \return Insertion point after generated atomic read IR. 1725 InsertPointTy createAtomicRead(const LocationDescription &Loc, 1726 AtomicOpValue &X, AtomicOpValue &V, 1727 AtomicOrdering AO); 1728 1729 /// Emit atomic write for : X = Expr --- Only Scalar data types. 1730 /// 1731 /// \param Loc The insert and source location description. 1732 /// \param X The target pointer to be atomically written to 1733 /// \param Expr The value to store. 1734 /// \param AO Atomic ordering of the generated atomic 1735 /// instructions. 1736 /// 1737 /// \return Insertion point after generated atomic Write IR. 1738 InsertPointTy createAtomicWrite(const LocationDescription &Loc, 1739 AtomicOpValue &X, Value *Expr, 1740 AtomicOrdering AO); 1741 1742 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 1743 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 1744 /// Only Scalar data types. 1745 /// 1746 /// \param Loc The insert and source location description. 1747 /// \param AllocaIP The insertion point to be used for alloca instructions. 1748 /// \param X The target atomic pointer to be updated 1749 /// \param Expr The value to update X with. 1750 /// \param AO Atomic ordering of the generated atomic instructions. 1751 /// \param RMWOp The binary operation used for update. If operation 1752 /// is not supported by atomicRMW, or belong to 1753 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based 1754 /// atomic will be generated. 1755 /// \param UpdateOp Code generator for complex expressions that cannot be 1756 /// expressed through atomicrmw instruction. 1757 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 1758 /// update expression, false otherwise. 1759 /// (e.g. true for X = X BinOp Expr) 1760 /// 1761 /// \return Insertion point after generated atomic update IR. 1762 InsertPointTy createAtomicUpdate(const LocationDescription &Loc, 1763 InsertPointTy AllocaIP, AtomicOpValue &X, 1764 Value *Expr, AtomicOrdering AO, 1765 AtomicRMWInst::BinOp RMWOp, 1766 AtomicUpdateCallbackTy &UpdateOp, 1767 bool IsXBinopExpr); 1768 1769 /// Emit atomic update for constructs: --- Only Scalar data types 1770 /// V = X; X = X BinOp Expr , 1771 /// X = X BinOp Expr; V = X, 1772 /// V = X; X = Expr BinOp X, 1773 /// X = Expr BinOp X; V = X, 1774 /// V = X; X = UpdateOp(X), 1775 /// X = UpdateOp(X); V = X, 1776 /// 1777 /// \param Loc The insert and source location description. 1778 /// \param AllocaIP The insertion point to be used for alloca instructions. 1779 /// \param X The target atomic pointer to be updated 1780 /// \param V Memory address where to store captured value 1781 /// \param Expr The value to update X with. 1782 /// \param AO Atomic ordering of the generated atomic instructions 1783 /// \param RMWOp The binary operation used for update. If 1784 /// operation is not supported by atomicRMW, or belong to 1785 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based 1786 /// atomic will be generated. 1787 /// \param UpdateOp Code generator for complex expressions that cannot be 1788 /// expressed through atomicrmw instruction. 1789 /// \param UpdateExpr true if X is an in place update of the form 1790 /// X = X BinOp Expr or X = Expr BinOp X 1791 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the 1792 /// update expression, false otherwise. 1793 /// (e.g. true for X = X BinOp Expr) 1794 /// \param IsPostfixUpdate true if original value of 'x' must be stored in 1795 /// 'v', not an updated one. 1796 /// 1797 /// \return Insertion point after generated atomic capture IR. 1798 InsertPointTy 1799 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, 1800 AtomicOpValue &X, AtomicOpValue &V, Value *Expr, 1801 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 1802 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, 1803 bool IsPostfixUpdate, bool IsXBinopExpr); 1804 1805 /// Emit atomic compare for constructs: --- Only scalar data types 1806 /// cond-expr-stmt: 1807 /// x = x ordop expr ? expr : x; 1808 /// x = expr ordop x ? expr : x; 1809 /// x = x == e ? d : x; 1810 /// x = e == x ? d : x; (this one is not in the spec) 1811 /// cond-update-stmt: 1812 /// if (x ordop expr) { x = expr; } 1813 /// if (expr ordop x) { x = expr; } 1814 /// if (x == e) { x = d; } 1815 /// if (e == x) { x = d; } (this one is not in the spec) 1816 /// conditional-update-capture-atomic: 1817 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) 1818 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) 1819 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, 1820 /// IsFailOnly=true) 1821 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) 1822 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, 1823 /// IsFailOnly=true) 1824 /// 1825 /// \param Loc The insert and source location description. 1826 /// \param X The target atomic pointer to be updated. 1827 /// \param V Memory address where to store captured value (for 1828 /// compare capture only). 1829 /// \param R Memory address where to store comparison result 1830 /// (for compare capture with '==' only). 1831 /// \param E The expected value ('e') for forms that use an 1832 /// equality comparison or an expression ('expr') for 1833 /// forms that use 'ordop' (logically an atomic maximum or 1834 /// minimum). 1835 /// \param D The desired value for forms that use an equality 1836 /// comparison. If forms that use 'ordop', it should be 1837 /// \p nullptr. 1838 /// \param AO Atomic ordering of the generated atomic instructions. 1839 /// \param Op Atomic compare operation. It can only be ==, <, or >. 1840 /// \param IsXBinopExpr True if the conditional statement is in the form where 1841 /// x is on LHS. It only matters for < or >. 1842 /// \param IsPostfixUpdate True if original value of 'x' must be stored in 1843 /// 'v', not an updated one (for compare capture 1844 /// only). 1845 /// \param IsFailOnly True if the original value of 'x' is stored to 'v' 1846 /// only when the comparison fails. This is only valid for 1847 /// the case the comparison is '=='. 1848 /// 1849 /// \return Insertion point after generated atomic capture IR. 1850 InsertPointTy 1851 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, 1852 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, 1853 AtomicOrdering AO, omp::OMPAtomicCompareOp Op, 1854 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); 1855 1856 /// Create the control flow structure of a canonical OpenMP loop. 1857 /// 1858 /// The emitted loop will be disconnected, i.e. no edge to the loop's 1859 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's 1860 /// IRBuilder location is not preserved. 1861 /// 1862 /// \param DL DebugLoc used for the instructions in the skeleton. 1863 /// \param TripCount Value to be used for the trip count. 1864 /// \param F Function in which to insert the BasicBlocks. 1865 /// \param PreInsertBefore Where to insert BBs that execute before the body, 1866 /// typically the body itself. 1867 /// \param PostInsertBefore Where to insert BBs that execute after the body. 1868 /// \param Name Base name used to derive BB 1869 /// and instruction names. 1870 /// 1871 /// \returns The CanonicalLoopInfo that represents the emitted loop. 1872 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, 1873 Function *F, 1874 BasicBlock *PreInsertBefore, 1875 BasicBlock *PostInsertBefore, 1876 const Twine &Name = {}); 1877 /// OMP Offload Info Metadata name string 1878 const std::string ompOffloadInfoName = "omp_offload.info"; 1879 1880 /// Loads all the offload entries information from the host IR 1881 /// metadata. This function is only meant to be used with device code 1882 /// generation. 1883 /// 1884 /// \param M Module to load Metadata info from. Module passed maybe 1885 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. 1886 /// \param OffloadEntriesInfoManager Initialize Offload Entry information. 1887 void 1888 loadOffloadInfoMetadata(Module &M, 1889 OffloadEntriesInfoManager &OffloadEntriesInfoManager); 1890 1891 /// Gets (if variable with the given name already exist) or creates 1892 /// internal global variable with the specified Name. The created variable has 1893 /// linkage CommonLinkage by default and is initialized by null value. 1894 /// \param Ty Type of the global variable. If it is exist already the type 1895 /// must be the same. 1896 /// \param Name Name of the variable. 1897 GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, 1898 unsigned AddressSpace = 0); 1899 }; 1900 1901 /// Data structure to contain the information needed to uniquely identify 1902 /// a target entry. 1903 struct TargetRegionEntryInfo { 1904 std::string ParentName; 1905 unsigned DeviceID; 1906 unsigned FileID; 1907 unsigned Line; 1908 unsigned Count; 1909 TargetRegionEntryInfoTargetRegionEntryInfo1910 TargetRegionEntryInfo() 1911 : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} 1912 TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, 1913 unsigned FileID, unsigned Line, unsigned Count = 0) ParentNameTargetRegionEntryInfo1914 : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), 1915 Count(Count) {} 1916 1917 static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 1918 StringRef ParentName, 1919 unsigned DeviceID, unsigned FileID, 1920 unsigned Line, unsigned Count); 1921 1922 bool operator<(const TargetRegionEntryInfo RHS) const { 1923 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < 1924 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, 1925 RHS.Count); 1926 } 1927 }; 1928 1929 /// Class that manages information about offload code regions and data 1930 class OffloadEntriesInfoManager { 1931 /// Number of entries registered so far. 1932 OpenMPIRBuilderConfig Config; 1933 unsigned OffloadingEntriesNum = 0; 1934 1935 public: setConfig(OpenMPIRBuilderConfig C)1936 void setConfig(OpenMPIRBuilderConfig C) { Config = C; } 1937 1938 /// Base class of the entries info. 1939 class OffloadEntryInfo { 1940 public: 1941 /// Kind of a given entry. 1942 enum OffloadingEntryInfoKinds : unsigned { 1943 /// Entry is a target region. 1944 OffloadingEntryInfoTargetRegion = 0, 1945 /// Entry is a declare target variable. 1946 OffloadingEntryInfoDeviceGlobalVar = 1, 1947 /// Invalid entry info. 1948 OffloadingEntryInfoInvalid = ~0u 1949 }; 1950 1951 protected: 1952 OffloadEntryInfo() = delete; OffloadEntryInfo(OffloadingEntryInfoKinds Kind)1953 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} OffloadEntryInfo(OffloadingEntryInfoKinds Kind,unsigned Order,uint32_t Flags)1954 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, 1955 uint32_t Flags) 1956 : Flags(Flags), Order(Order), Kind(Kind) {} 1957 ~OffloadEntryInfo() = default; 1958 1959 public: isValid()1960 bool isValid() const { return Order != ~0u; } getOrder()1961 unsigned getOrder() const { return Order; } getKind()1962 OffloadingEntryInfoKinds getKind() const { return Kind; } getFlags()1963 uint32_t getFlags() const { return Flags; } setFlags(uint32_t NewFlags)1964 void setFlags(uint32_t NewFlags) { Flags = NewFlags; } getAddress()1965 Constant *getAddress() const { return cast_or_null<Constant>(Addr); } setAddress(Constant * V)1966 void setAddress(Constant *V) { 1967 assert(!Addr.pointsToAliveValue() && "Address has been set before!"); 1968 Addr = V; 1969 } classof(const OffloadEntryInfo * Info)1970 static bool classof(const OffloadEntryInfo *Info) { return true; } 1971 1972 private: 1973 /// Address of the entity that has to be mapped for offloading. 1974 WeakTrackingVH Addr; 1975 1976 /// Flags associated with the device global. 1977 uint32_t Flags = 0u; 1978 1979 /// Order this entry was emitted. 1980 unsigned Order = ~0u; 1981 1982 OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; 1983 }; 1984 1985 /// Return true if a there are no entries defined. 1986 bool empty() const; 1987 /// Return number of entries defined so far. size()1988 unsigned size() const { return OffloadingEntriesNum; } 1989 OffloadEntriesInfoManager()1990 OffloadEntriesInfoManager() : Config() {} 1991 1992 // 1993 // Target region entries related. 1994 // 1995 1996 /// Kind of the target registry entry. 1997 enum OMPTargetRegionEntryKind : uint32_t { 1998 /// Mark the entry as target region. 1999 OMPTargetRegionEntryTargetRegion = 0x0, 2000 /// Mark the entry as a global constructor. 2001 OMPTargetRegionEntryCtor = 0x02, 2002 /// Mark the entry as a global destructor. 2003 OMPTargetRegionEntryDtor = 0x04, 2004 }; 2005 2006 /// Target region entries info. 2007 class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { 2008 /// Address that can be used as the ID of the entry. 2009 Constant *ID = nullptr; 2010 2011 public: OffloadEntryInfoTargetRegion()2012 OffloadEntryInfoTargetRegion() 2013 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} OffloadEntryInfoTargetRegion(unsigned Order,Constant * Addr,Constant * ID,OMPTargetRegionEntryKind Flags)2014 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, 2015 Constant *ID, 2016 OMPTargetRegionEntryKind Flags) 2017 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), 2018 ID(ID) { 2019 setAddress(Addr); 2020 } 2021 getID()2022 Constant *getID() const { return ID; } setID(Constant * V)2023 void setID(Constant *V) { 2024 assert(!ID && "ID has been set before!"); 2025 ID = V; 2026 } classof(const OffloadEntryInfo * Info)2027 static bool classof(const OffloadEntryInfo *Info) { 2028 return Info->getKind() == OffloadingEntryInfoTargetRegion; 2029 } 2030 }; 2031 2032 /// Initialize target region entry. 2033 /// This is ONLY needed for DEVICE compilation. 2034 void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, 2035 unsigned Order); 2036 /// Register target region entry. 2037 void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 2038 Constant *Addr, Constant *ID, 2039 OMPTargetRegionEntryKind Flags); 2040 /// Return true if a target region entry with the provided information 2041 /// exists. 2042 bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 2043 bool IgnoreAddressId = false) const; 2044 2045 // Return the Name based on \a EntryInfo using the next available Count. 2046 void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 2047 const TargetRegionEntryInfo &EntryInfo); 2048 2049 /// brief Applies action \a Action on all registered entries. 2050 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo, 2051 const OffloadEntryInfoTargetRegion &)> 2052 OffloadTargetRegionEntryInfoActTy; 2053 void 2054 actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); 2055 2056 // 2057 // Device global variable entries related. 2058 // 2059 2060 /// Kind of the global variable entry.. 2061 enum OMPTargetGlobalVarEntryKind : uint32_t { 2062 /// Mark the entry as a to declare target. 2063 OMPTargetGlobalVarEntryTo = 0x0, 2064 /// Mark the entry as a to declare target link. 2065 OMPTargetGlobalVarEntryLink = 0x1, 2066 }; 2067 2068 /// Device global variable entries info. 2069 class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { 2070 /// Type of the global variable. 2071 int64_t VarSize; 2072 GlobalValue::LinkageTypes Linkage; 2073 2074 public: OffloadEntryInfoDeviceGlobalVar()2075 OffloadEntryInfoDeviceGlobalVar() 2076 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} OffloadEntryInfoDeviceGlobalVar(unsigned Order,OMPTargetGlobalVarEntryKind Flags)2077 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, 2078 OMPTargetGlobalVarEntryKind Flags) 2079 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} OffloadEntryInfoDeviceGlobalVar(unsigned Order,Constant * Addr,int64_t VarSize,OMPTargetGlobalVarEntryKind Flags,GlobalValue::LinkageTypes Linkage)2080 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, 2081 int64_t VarSize, 2082 OMPTargetGlobalVarEntryKind Flags, 2083 GlobalValue::LinkageTypes Linkage) 2084 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), 2085 VarSize(VarSize), Linkage(Linkage) { 2086 setAddress(Addr); 2087 } 2088 getVarSize()2089 int64_t getVarSize() const { return VarSize; } setVarSize(int64_t Size)2090 void setVarSize(int64_t Size) { VarSize = Size; } getLinkage()2091 GlobalValue::LinkageTypes getLinkage() const { return Linkage; } setLinkage(GlobalValue::LinkageTypes LT)2092 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } classof(const OffloadEntryInfo * Info)2093 static bool classof(const OffloadEntryInfo *Info) { 2094 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; 2095 } 2096 }; 2097 2098 /// Initialize device global variable entry. 2099 /// This is ONLY used for DEVICE compilation. 2100 void initializeDeviceGlobalVarEntryInfo(StringRef Name, 2101 OMPTargetGlobalVarEntryKind Flags, 2102 unsigned Order); 2103 2104 /// Register device global variable entry. 2105 void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, 2106 int64_t VarSize, 2107 OMPTargetGlobalVarEntryKind Flags, 2108 GlobalValue::LinkageTypes Linkage); 2109 /// Checks if the variable with the given name has been registered already. hasDeviceGlobalVarEntryInfo(StringRef VarName)2110 bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { 2111 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; 2112 } 2113 /// Applies action \a Action on all registered entries. 2114 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> 2115 OffloadDeviceGlobalVarEntryInfoActTy; 2116 void actOnDeviceGlobalVarEntriesInfo( 2117 const OffloadDeviceGlobalVarEntryInfoActTy &Action); 2118 2119 private: 2120 /// Return the count of entries at a particular source location. 2121 unsigned 2122 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; 2123 2124 /// Update the count of entries at a particular source location. 2125 void 2126 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); 2127 2128 static TargetRegionEntryInfo getTargetRegionEntryCountKey(const TargetRegionEntryInfo & EntryInfo)2129 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { 2130 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, 2131 EntryInfo.FileID, EntryInfo.Line, 0); 2132 } 2133 2134 // Count of entries at a location. 2135 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount; 2136 2137 // Storage for target region entries kind. 2138 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion> 2139 OffloadEntriesTargetRegionTy; 2140 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; 2141 /// Storage for device global variable entries kind. The storage is to be 2142 /// indexed by mangled name. 2143 typedef StringMap<OffloadEntryInfoDeviceGlobalVar> 2144 OffloadEntriesDeviceGlobalVarTy; 2145 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; 2146 }; 2147 2148 /// Class to represented the control flow structure of an OpenMP canonical loop. 2149 /// 2150 /// The control-flow structure is standardized for easy consumption by 2151 /// directives associated with loops. For instance, the worksharing-loop 2152 /// construct may change this control flow such that each loop iteration is 2153 /// executed on only one thread. The constraints of a canonical loop in brief 2154 /// are: 2155 /// 2156 /// * The number of loop iterations must have been computed before entering the 2157 /// loop. 2158 /// 2159 /// * Has an (unsigned) logical induction variable that starts at zero and 2160 /// increments by one. 2161 /// 2162 /// * The loop's CFG itself has no side-effects. The OpenMP specification 2163 /// itself allows side-effects, but the order in which they happen, including 2164 /// how often or whether at all, is unspecified. We expect that the frontend 2165 /// will emit those side-effect instructions somewhere (e.g. before the loop) 2166 /// such that the CanonicalLoopInfo itself can be side-effect free. 2167 /// 2168 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated 2169 /// execution of a loop body that satifies these constraints. It does NOT 2170 /// represent arbitrary SESE regions that happen to contain a loop. Do not use 2171 /// CanonicalLoopInfo for such purposes. 2172 /// 2173 /// The control flow can be described as follows: 2174 /// 2175 /// Preheader 2176 /// | 2177 /// /-> Header 2178 /// | | 2179 /// | Cond---\ 2180 /// | | | 2181 /// | Body | 2182 /// | | | | 2183 /// | <...> | 2184 /// | | | | 2185 /// \--Latch | 2186 /// | 2187 /// Exit 2188 /// | 2189 /// After 2190 /// 2191 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, 2192 /// including) and end at AfterIP (at the After's first instruction, excluding). 2193 /// That is, instructions in the Preheader and After blocks (except the 2194 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have 2195 /// side-effects. Typically, the Preheader is used to compute the loop's trip 2196 /// count. The instructions from BodyIP (at the Body block's first instruction, 2197 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's 2198 /// control and thus can have side-effects. The body block is the single entry 2199 /// point into the loop body, which may contain arbitrary control flow as long 2200 /// as all control paths eventually branch to the Latch block. 2201 /// 2202 /// TODO: Consider adding another standardized BasicBlock between Body CFG and 2203 /// Latch to guarantee that there is only a single edge to the latch. It would 2204 /// make loop transformations easier to not needing to consider multiple 2205 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us 2206 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that 2207 /// executes after each body iteration. 2208 /// 2209 /// There must be no loop-carried dependencies through llvm::Values. This is 2210 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is 2211 /// for the induction variable. 2212 /// 2213 /// All code in Header, Cond, Latch and Exit (plus the terminator of the 2214 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked 2215 /// by assertOK(). They are expected to not be modified unless explicitly 2216 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP 2217 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, 2218 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its 2219 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used 2220 /// anymore as its underlying control flow may not exist anymore. 2221 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop 2222 /// may also return a new CanonicalLoopInfo that can be passed to other 2223 /// loop-associated construct implementing methods. These loop-transforming 2224 /// methods may either create a new CanonicalLoopInfo usually using 2225 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and 2226 /// modify one of the input CanonicalLoopInfo and return it as representing the 2227 /// modified loop. What is done is an implementation detail of 2228 /// transformation-implementing method and callers should always assume that the 2229 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. 2230 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one 2231 /// created by createCanonicalLoop, such that transforming methods do not have 2232 /// to special case where the CanonicalLoopInfo originated from. 2233 /// 2234 /// Generally, methods consuming CanonicalLoopInfo do not need an 2235 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the 2236 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless 2237 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate 2238 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, 2239 /// any InsertPoint in the Preheader, After or Block can still be used after 2240 /// calling such a method. 2241 /// 2242 /// TODO: Provide mechanisms for exception handling and cancellation points. 2243 /// 2244 /// Defined outside OpenMPIRBuilder because nested classes cannot be 2245 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. 2246 class CanonicalLoopInfo { 2247 friend class OpenMPIRBuilder; 2248 2249 private: 2250 BasicBlock *Header = nullptr; 2251 BasicBlock *Cond = nullptr; 2252 BasicBlock *Latch = nullptr; 2253 BasicBlock *Exit = nullptr; 2254 2255 /// Add the control blocks of this loop to \p BBs. 2256 /// 2257 /// This does not include any block from the body, including the one returned 2258 /// by getBody(). 2259 /// 2260 /// FIXME: This currently includes the Preheader and After blocks even though 2261 /// their content is (mostly) not under CanonicalLoopInfo's control. 2262 /// Re-evaluated whether this makes sense. 2263 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); 2264 2265 /// Sets the number of loop iterations to the given value. This value must be 2266 /// valid in the condition block (i.e., defined in the preheader) and is 2267 /// interpreted as an unsigned integer. 2268 void setTripCount(Value *TripCount); 2269 2270 /// Replace all uses of the canonical induction variable in the loop body with 2271 /// a new one. 2272 /// 2273 /// The intended use case is to update the induction variable for an updated 2274 /// iteration space such that it can stay normalized in the 0...tripcount-1 2275 /// range. 2276 /// 2277 /// The \p Updater is called with the (presumable updated) current normalized 2278 /// induction variable and is expected to return the value that uses of the 2279 /// pre-updated induction values should use instead, typically dependent on 2280 /// the new induction variable. This is a lambda (instead of e.g. just passing 2281 /// the new value) to be able to distinguish the uses of the pre-updated 2282 /// induction variable and uses of the induction varible to compute the 2283 /// updated induction variable value. 2284 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); 2285 2286 public: 2287 /// Returns whether this object currently represents the IR of a loop. If 2288 /// returning false, it may have been consumed by a loop transformation or not 2289 /// been intialized. Do not use in this case; isValid()2290 bool isValid() const { return Header; } 2291 2292 /// The preheader ensures that there is only a single edge entering the loop. 2293 /// Code that must be execute before any loop iteration can be emitted here, 2294 /// such as computing the loop trip count and begin lifetime markers. Code in 2295 /// the preheader is not considered part of the canonical loop. 2296 BasicBlock *getPreheader() const; 2297 2298 /// The header is the entry for each iteration. In the canonical control flow, 2299 /// it only contains the PHINode for the induction variable. getHeader()2300 BasicBlock *getHeader() const { 2301 assert(isValid() && "Requires a valid canonical loop"); 2302 return Header; 2303 } 2304 2305 /// The condition block computes whether there is another loop iteration. If 2306 /// yes, branches to the body; otherwise to the exit block. getCond()2307 BasicBlock *getCond() const { 2308 assert(isValid() && "Requires a valid canonical loop"); 2309 return Cond; 2310 } 2311 2312 /// The body block is the single entry for a loop iteration and not controlled 2313 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must 2314 /// eventually branch to the \p Latch block. getBody()2315 BasicBlock *getBody() const { 2316 assert(isValid() && "Requires a valid canonical loop"); 2317 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); 2318 } 2319 2320 /// Reaching the latch indicates the end of the loop body code. In the 2321 /// canonical control flow, it only contains the increment of the induction 2322 /// variable. getLatch()2323 BasicBlock *getLatch() const { 2324 assert(isValid() && "Requires a valid canonical loop"); 2325 return Latch; 2326 } 2327 2328 /// Reaching the exit indicates no more iterations are being executed. getExit()2329 BasicBlock *getExit() const { 2330 assert(isValid() && "Requires a valid canonical loop"); 2331 return Exit; 2332 } 2333 2334 /// The after block is intended for clean-up code such as lifetime end 2335 /// markers. It is separate from the exit block to ensure, analogous to the 2336 /// preheader, it having just a single entry edge and being free from PHI 2337 /// nodes should there be multiple loop exits (such as from break 2338 /// statements/cancellations). getAfter()2339 BasicBlock *getAfter() const { 2340 assert(isValid() && "Requires a valid canonical loop"); 2341 return Exit->getSingleSuccessor(); 2342 } 2343 2344 /// Returns the llvm::Value containing the number of loop iterations. It must 2345 /// be valid in the preheader and always interpreted as an unsigned integer of 2346 /// any bit-width. getTripCount()2347 Value *getTripCount() const { 2348 assert(isValid() && "Requires a valid canonical loop"); 2349 Instruction *CmpI = &Cond->front(); 2350 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 2351 return CmpI->getOperand(1); 2352 } 2353 2354 /// Returns the instruction representing the current logical induction 2355 /// variable. Always unsigned, always starting at 0 with an increment of one. getIndVar()2356 Instruction *getIndVar() const { 2357 assert(isValid() && "Requires a valid canonical loop"); 2358 Instruction *IndVarPHI = &Header->front(); 2359 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); 2360 return IndVarPHI; 2361 } 2362 2363 /// Return the type of the induction variable (and the trip count). getIndVarType()2364 Type *getIndVarType() const { 2365 assert(isValid() && "Requires a valid canonical loop"); 2366 return getIndVar()->getType(); 2367 } 2368 2369 /// Return the insertion point for user code before the loop. getPreheaderIP()2370 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { 2371 assert(isValid() && "Requires a valid canonical loop"); 2372 BasicBlock *Preheader = getPreheader(); 2373 return {Preheader, std::prev(Preheader->end())}; 2374 }; 2375 2376 /// Return the insertion point for user code in the body. getBodyIP()2377 OpenMPIRBuilder::InsertPointTy getBodyIP() const { 2378 assert(isValid() && "Requires a valid canonical loop"); 2379 BasicBlock *Body = getBody(); 2380 return {Body, Body->begin()}; 2381 }; 2382 2383 /// Return the insertion point for user code after the loop. getAfterIP()2384 OpenMPIRBuilder::InsertPointTy getAfterIP() const { 2385 assert(isValid() && "Requires a valid canonical loop"); 2386 BasicBlock *After = getAfter(); 2387 return {After, After->begin()}; 2388 }; 2389 getFunction()2390 Function *getFunction() const { 2391 assert(isValid() && "Requires a valid canonical loop"); 2392 return Header->getParent(); 2393 } 2394 2395 /// Consistency self-check. 2396 void assertOK() const; 2397 2398 /// Invalidate this loop. That is, the underlying IR does not fulfill the 2399 /// requirements of an OpenMP canonical loop anymore. 2400 void invalidate(); 2401 }; 2402 2403 } // end namespace llvm 2404 2405 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 2406