1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient 10 // way to create LLVM instructions for OpenMP directives. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 16 17 #include "llvm/Analysis/MemorySSAUpdater.h" 18 #include "llvm/Frontend/OpenMP/OMPConstants.h" 19 #include "llvm/IR/DebugLoc.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/Support/Allocator.h" 22 #include <forward_list> 23 #include <map> 24 #include <optional> 25 26 namespace llvm { 27 class CanonicalLoopInfo; 28 struct TargetRegionEntryInfo; 29 class OffloadEntriesInfoManager; 30 class OpenMPIRBuilder; 31 32 /// Move the instruction after an InsertPoint to the beginning of another 33 /// BasicBlock. 34 /// 35 /// The instructions after \p IP are moved to the beginning of \p New which must 36 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to 37 /// \p New will be added such that there is no semantic change. Otherwise, the 38 /// \p IP insert block remains degenerate and it is up to the caller to insert a 39 /// terminator. 40 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, 41 bool CreateBranch); 42 43 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new 44 /// insert location will stick to after the instruction before the insertion 45 /// point (instead of moving with the instruction the InsertPoint stores 46 /// internally). 47 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); 48 49 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate 50 /// (missing the terminator). 51 /// 52 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed 53 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch 54 /// is true, a branch to the new successor will new created such that 55 /// semantically there is no change; otherwise the block of the insertion point 56 /// remains degenerate and it is the caller's responsibility to insert a 57 /// terminator. Returns the new successor block. 58 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, 59 llvm::Twine Name = {}); 60 61 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 62 /// degenerate (missing the terminator). Its new insert location will stick to 63 /// after the instruction before the insertion point (instead of moving with the 64 /// instruction the InsertPoint stores internally). 65 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, 66 llvm::Twine Name = {}); 67 68 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 69 /// degenerate (missing the terminator). Its new insert location will stick to 70 /// after the instruction before the insertion point (instead of moving with the 71 /// instruction the InsertPoint stores internally). 72 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); 73 74 /// Like splitBB, but reuses the current block's name for the new name. 75 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, 76 llvm::Twine Suffix = ".split"); 77 78 /// Captures attributes that affect generating LLVM-IR using the 79 /// OpenMPIRBuilder and related classes. Note that not all attributes are 80 /// required for all classes or functions. In some use cases the configuration 81 /// is not necessary at all, because because the only functions that are called 82 /// are ones that are not dependent on the configuration. 83 class OpenMPIRBuilderConfig { 84 public: 85 /// Flag for specifying if the compilation is done for embedded device code 86 /// or host code. 87 std::optional<bool> IsTargetDevice; 88 89 /// Flag for specifying if the compilation is done for an accelerator. 90 std::optional<bool> IsGPU; 91 92 /// Flag for specifying weather a requires unified_shared_memory 93 /// directive is present or not. 94 std::optional<bool> HasRequiresUnifiedSharedMemory; 95 96 // Flag for specifying if offloading is mandatory. 97 std::optional<bool> OpenMPOffloadMandatory; 98 99 /// First separator used between the initial two parts of a name. 100 std::optional<StringRef> FirstSeparator; 101 /// Separator used between all of the rest consecutive parts of s name 102 std::optional<StringRef> Separator; 103 104 OpenMPIRBuilderConfig() {} 105 OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU, 106 bool HasRequiresUnifiedSharedMemory, 107 bool OpenMPOffloadMandatory) 108 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU), 109 HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory), 110 OpenMPOffloadMandatory(OpenMPOffloadMandatory) {} 111 112 // Getters functions that assert if the required values are not present. 113 bool isTargetDevice() const { 114 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set"); 115 return *IsTargetDevice; 116 } 117 118 bool isGPU() const { 119 assert(IsGPU.has_value() && "IsGPU is not set"); 120 return *IsGPU; 121 } 122 123 bool hasRequiresUnifiedSharedMemory() const { 124 assert(HasRequiresUnifiedSharedMemory.has_value() && 125 "HasUnifiedSharedMemory is not set"); 126 return *HasRequiresUnifiedSharedMemory; 127 } 128 129 bool openMPOffloadMandatory() const { 130 assert(OpenMPOffloadMandatory.has_value() && 131 "OpenMPOffloadMandatory is not set"); 132 return *OpenMPOffloadMandatory; 133 } 134 // Returns the FirstSeparator if set, otherwise use the default separator 135 // depending on isGPU 136 StringRef firstSeparator() const { 137 if (FirstSeparator.has_value()) 138 return *FirstSeparator; 139 if (isGPU()) 140 return "_"; 141 return "."; 142 } 143 144 // Returns the Separator if set, otherwise use the default separator depending 145 // on isGPU 146 StringRef separator() const { 147 if (Separator.has_value()) 148 return *Separator; 149 if (isGPU()) 150 return "$"; 151 return "."; 152 } 153 154 void setIsTargetDevice(bool Value) { IsTargetDevice = Value; } 155 void setIsGPU(bool Value) { IsGPU = Value; } 156 void setHasRequiresUnifiedSharedMemory(bool Value) { 157 HasRequiresUnifiedSharedMemory = Value; 158 } 159 void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } 160 void setSeparator(StringRef S) { Separator = S; } 161 }; 162 163 /// Data structure to contain the information needed to uniquely identify 164 /// a target entry. 165 struct TargetRegionEntryInfo { 166 std::string ParentName; 167 unsigned DeviceID; 168 unsigned FileID; 169 unsigned Line; 170 unsigned Count; 171 172 TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {} 173 TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, 174 unsigned FileID, unsigned Line, unsigned Count = 0) 175 : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), 176 Count(Count) {} 177 178 static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 179 StringRef ParentName, 180 unsigned DeviceID, unsigned FileID, 181 unsigned Line, unsigned Count); 182 183 bool operator<(const TargetRegionEntryInfo RHS) const { 184 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < 185 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, 186 RHS.Count); 187 } 188 }; 189 190 /// Class that manages information about offload code regions and data 191 class OffloadEntriesInfoManager { 192 /// Number of entries registered so far. 193 OpenMPIRBuilder *OMPBuilder; 194 unsigned OffloadingEntriesNum = 0; 195 196 public: 197 /// Base class of the entries info. 198 class OffloadEntryInfo { 199 public: 200 /// Kind of a given entry. 201 enum OffloadingEntryInfoKinds : unsigned { 202 /// Entry is a target region. 203 OffloadingEntryInfoTargetRegion = 0, 204 /// Entry is a declare target variable. 205 OffloadingEntryInfoDeviceGlobalVar = 1, 206 /// Invalid entry info. 207 OffloadingEntryInfoInvalid = ~0u 208 }; 209 210 protected: 211 OffloadEntryInfo() = delete; 212 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} 213 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, 214 uint32_t Flags) 215 : Flags(Flags), Order(Order), Kind(Kind) {} 216 ~OffloadEntryInfo() = default; 217 218 public: 219 bool isValid() const { return Order != ~0u; } 220 unsigned getOrder() const { return Order; } 221 OffloadingEntryInfoKinds getKind() const { return Kind; } 222 uint32_t getFlags() const { return Flags; } 223 void setFlags(uint32_t NewFlags) { Flags = NewFlags; } 224 Constant *getAddress() const { return cast_or_null<Constant>(Addr); } 225 void setAddress(Constant *V) { 226 assert(!Addr.pointsToAliveValue() && "Address has been set before!"); 227 Addr = V; 228 } 229 static bool classof(const OffloadEntryInfo *Info) { return true; } 230 231 private: 232 /// Address of the entity that has to be mapped for offloading. 233 WeakTrackingVH Addr; 234 235 /// Flags associated with the device global. 236 uint32_t Flags = 0u; 237 238 /// Order this entry was emitted. 239 unsigned Order = ~0u; 240 241 OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; 242 }; 243 244 /// Return true if a there are no entries defined. 245 bool empty() const; 246 /// Return number of entries defined so far. 247 unsigned size() const { return OffloadingEntriesNum; } 248 249 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {} 250 251 // 252 // Target region entries related. 253 // 254 255 /// Kind of the target registry entry. 256 enum OMPTargetRegionEntryKind : uint32_t { 257 /// Mark the entry as target region. 258 OMPTargetRegionEntryTargetRegion = 0x0, 259 /// Mark the entry as a global constructor. 260 OMPTargetRegionEntryCtor = 0x02, 261 /// Mark the entry as a global destructor. 262 OMPTargetRegionEntryDtor = 0x04, 263 }; 264 265 /// Target region entries info. 266 class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { 267 /// Address that can be used as the ID of the entry. 268 Constant *ID = nullptr; 269 270 public: 271 OffloadEntryInfoTargetRegion() 272 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} 273 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, 274 Constant *ID, 275 OMPTargetRegionEntryKind Flags) 276 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), 277 ID(ID) { 278 setAddress(Addr); 279 } 280 281 Constant *getID() const { return ID; } 282 void setID(Constant *V) { 283 assert(!ID && "ID has been set before!"); 284 ID = V; 285 } 286 static bool classof(const OffloadEntryInfo *Info) { 287 return Info->getKind() == OffloadingEntryInfoTargetRegion; 288 } 289 }; 290 291 /// Initialize target region entry. 292 /// This is ONLY needed for DEVICE compilation. 293 void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, 294 unsigned Order); 295 /// Register target region entry. 296 void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 297 Constant *Addr, Constant *ID, 298 OMPTargetRegionEntryKind Flags); 299 /// Return true if a target region entry with the provided information 300 /// exists. 301 bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 302 bool IgnoreAddressId = false) const; 303 304 // Return the Name based on \a EntryInfo using the next available Count. 305 void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 306 const TargetRegionEntryInfo &EntryInfo); 307 308 /// brief Applies action \a Action on all registered entries. 309 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo, 310 const OffloadEntryInfoTargetRegion &)> 311 OffloadTargetRegionEntryInfoActTy; 312 void 313 actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); 314 315 // 316 // Device global variable entries related. 317 // 318 319 /// Kind of the global variable entry.. 320 enum OMPTargetGlobalVarEntryKind : uint32_t { 321 /// Mark the entry as a to declare target. 322 OMPTargetGlobalVarEntryTo = 0x0, 323 /// Mark the entry as a to declare target link. 324 OMPTargetGlobalVarEntryLink = 0x1, 325 /// Mark the entry as a declare target enter. 326 OMPTargetGlobalVarEntryEnter = 0x2, 327 /// Mark the entry as having no declare target entry kind. 328 OMPTargetGlobalVarEntryNone = 0x3, 329 }; 330 331 /// Kind of device clause for declare target variables 332 /// and functions 333 /// NOTE: Currently not used as a part of a variable entry 334 /// used for Flang and Clang to interface with the variable 335 /// related registration functions 336 enum OMPTargetDeviceClauseKind : uint32_t { 337 /// The target is marked for all devices 338 OMPTargetDeviceClauseAny = 0x0, 339 /// The target is marked for non-host devices 340 OMPTargetDeviceClauseNoHost = 0x1, 341 /// The target is marked for host devices 342 OMPTargetDeviceClauseHost = 0x2, 343 /// The target is marked as having no clause 344 OMPTargetDeviceClauseNone = 0x3 345 }; 346 347 /// Device global variable entries info. 348 class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { 349 /// Type of the global variable. 350 int64_t VarSize; 351 GlobalValue::LinkageTypes Linkage; 352 353 public: 354 OffloadEntryInfoDeviceGlobalVar() 355 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} 356 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, 357 OMPTargetGlobalVarEntryKind Flags) 358 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} 359 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, 360 int64_t VarSize, 361 OMPTargetGlobalVarEntryKind Flags, 362 GlobalValue::LinkageTypes Linkage) 363 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), 364 VarSize(VarSize), Linkage(Linkage) { 365 setAddress(Addr); 366 } 367 368 int64_t getVarSize() const { return VarSize; } 369 void setVarSize(int64_t Size) { VarSize = Size; } 370 GlobalValue::LinkageTypes getLinkage() const { return Linkage; } 371 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } 372 static bool classof(const OffloadEntryInfo *Info) { 373 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; 374 } 375 }; 376 377 /// Initialize device global variable entry. 378 /// This is ONLY used for DEVICE compilation. 379 void initializeDeviceGlobalVarEntryInfo(StringRef Name, 380 OMPTargetGlobalVarEntryKind Flags, 381 unsigned Order); 382 383 /// Register device global variable entry. 384 void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, 385 int64_t VarSize, 386 OMPTargetGlobalVarEntryKind Flags, 387 GlobalValue::LinkageTypes Linkage); 388 /// Checks if the variable with the given name has been registered already. 389 bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { 390 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; 391 } 392 /// Applies action \a Action on all registered entries. 393 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> 394 OffloadDeviceGlobalVarEntryInfoActTy; 395 void actOnDeviceGlobalVarEntriesInfo( 396 const OffloadDeviceGlobalVarEntryInfoActTy &Action); 397 398 private: 399 /// Return the count of entries at a particular source location. 400 unsigned 401 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; 402 403 /// Update the count of entries at a particular source location. 404 void 405 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); 406 407 static TargetRegionEntryInfo 408 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { 409 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, 410 EntryInfo.FileID, EntryInfo.Line, 0); 411 } 412 413 // Count of entries at a location. 414 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount; 415 416 // Storage for target region entries kind. 417 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion> 418 OffloadEntriesTargetRegionTy; 419 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; 420 /// Storage for device global variable entries kind. The storage is to be 421 /// indexed by mangled name. 422 typedef StringMap<OffloadEntryInfoDeviceGlobalVar> 423 OffloadEntriesDeviceGlobalVarTy; 424 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; 425 }; 426 427 /// An interface to create LLVM-IR for OpenMP directives. 428 /// 429 /// Each OpenMP directive has a corresponding public generator method. 430 class OpenMPIRBuilder { 431 public: 432 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will 433 /// not have an effect on \p M (see initialize) 434 OpenMPIRBuilder(Module &M) 435 : M(M), Builder(M.getContext()), OffloadInfoManager(this) {} 436 ~OpenMPIRBuilder(); 437 438 /// Initialize the internal state, this will put structures types and 439 /// potentially other helpers into the underlying module. Must be called 440 /// before any other method and only once! This internal state includes 441 /// Types used in the OpenMPIRBuilder generated from OMPKinds.def as well 442 /// as loading offload metadata for device from the OpenMP host IR file 443 /// passed in as the HostFilePath argument. 444 /// \param HostFilePath The path to the host IR file, used to load in 445 /// offload metadata for the device, allowing host and device to 446 /// maintain the same metadata mapping. 447 void initialize(StringRef HostFilePath = {}); 448 449 void setConfig(OpenMPIRBuilderConfig C) { Config = C; } 450 451 /// Finalize the underlying module, e.g., by outlining regions. 452 /// \param Fn The function to be finalized. If not used, 453 /// all functions are finalized. 454 void finalize(Function *Fn = nullptr); 455 456 /// Add attributes known for \p FnID to \p Fn. 457 void addAttributes(omp::RuntimeFunction FnID, Function &Fn); 458 459 /// Type used throughout for insertion points. 460 using InsertPointTy = IRBuilder<>::InsertPoint; 461 462 /// Get the create a name using the platform specific separators. 463 /// \param Parts parts of the final name that needs separation 464 /// The created name has a first separator between the first and second part 465 /// and a second separator between all other parts. 466 /// E.g. with FirstSeparator "$" and Separator "." and 467 /// parts: "p1", "p2", "p3", "p4" 468 /// The resulting name is "p1$p2.p3.p4" 469 /// The separators are retrieved from the OpenMPIRBuilderConfig. 470 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const; 471 472 /// Callback type for variable finalization (think destructors). 473 /// 474 /// \param CodeGenIP is the insertion point at which the finalization code 475 /// should be placed. 476 /// 477 /// A finalize callback knows about all objects that need finalization, e.g. 478 /// destruction, when the scope of the currently generated construct is left 479 /// at the time, and location, the callback is invoked. 480 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; 481 482 struct FinalizationInfo { 483 /// The finalization callback provided by the last in-flight invocation of 484 /// createXXXX for the directive of kind DK. 485 FinalizeCallbackTy FiniCB; 486 487 /// The directive kind of the innermost directive that has an associated 488 /// region which might require finalization when it is left. 489 omp::Directive DK; 490 491 /// Flag to indicate if the directive is cancellable. 492 bool IsCancellable; 493 }; 494 495 /// Push a finalization callback on the finalization stack. 496 /// 497 /// NOTE: Temporary solution until Clang CG is gone. 498 void pushFinalizationCB(const FinalizationInfo &FI) { 499 FinalizationStack.push_back(FI); 500 } 501 502 /// Pop the last finalization callback from the finalization stack. 503 /// 504 /// NOTE: Temporary solution until Clang CG is gone. 505 void popFinalizationCB() { FinalizationStack.pop_back(); } 506 507 /// Callback type for body (=inner region) code generation 508 /// 509 /// The callback takes code locations as arguments, each describing a 510 /// location where additional instructions can be inserted. 511 /// 512 /// The CodeGenIP may be in the middle of a basic block or point to the end of 513 /// it. The basic block may have a terminator or be degenerate. The callback 514 /// function may just insert instructions at that position, but also split the 515 /// block (without the Before argument of BasicBlock::splitBasicBlock such 516 /// that the identify of the split predecessor block is preserved) and insert 517 /// additional control flow, including branches that do not lead back to what 518 /// follows the CodeGenIP. Note that since the callback is allowed to split 519 /// the block, callers must assume that InsertPoints to positions in the 520 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If 521 /// such InsertPoints need to be preserved, it can split the block itself 522 /// before calling the callback. 523 /// 524 /// AllocaIP and CodeGenIP must not point to the same position. 525 /// 526 /// \param AllocaIP is the insertion point at which new alloca instructions 527 /// should be placed. The BasicBlock it is pointing to must 528 /// not be split. 529 /// \param CodeGenIP is the insertion point at which the body code should be 530 /// placed. 531 using BodyGenCallbackTy = 532 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 533 534 // This is created primarily for sections construct as llvm::function_ref 535 // (BodyGenCallbackTy) is not storable (as described in the comments of 536 // function_ref class - function_ref contains non-ownable reference 537 // to the callable. 538 using StorableBodyGenCallbackTy = 539 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 540 541 /// Callback type for loop body code generation. 542 /// 543 /// \param CodeGenIP is the insertion point where the loop's body code must be 544 /// placed. This will be a dedicated BasicBlock with a 545 /// conditional branch from the loop condition check and 546 /// terminated with an unconditional branch to the loop 547 /// latch. 548 /// \param IndVar is the induction variable usable at the insertion point. 549 using LoopBodyGenCallbackTy = 550 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; 551 552 /// Callback type for variable privatization (think copy & default 553 /// constructor). 554 /// 555 /// \param AllocaIP is the insertion point at which new alloca instructions 556 /// should be placed. 557 /// \param CodeGenIP is the insertion point at which the privatization code 558 /// should be placed. 559 /// \param Original The value being copied/created, should not be used in the 560 /// generated IR. 561 /// \param Inner The equivalent of \p Original that should be used in the 562 /// generated IR; this is equal to \p Original if the value is 563 /// a pointer and can thus be passed directly, otherwise it is 564 /// an equivalent but different value. 565 /// \param ReplVal The replacement value, thus a copy or new created version 566 /// of \p Inner. 567 /// 568 /// \returns The new insertion point where code generation continues and 569 /// \p ReplVal the replacement value. 570 using PrivatizeCallbackTy = function_ref<InsertPointTy( 571 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, 572 Value &Inner, Value *&ReplVal)>; 573 574 /// Description of a LLVM-IR insertion point (IP) and a debug/source location 575 /// (filename, line, column, ...). 576 struct LocationDescription { 577 LocationDescription(const IRBuilderBase &IRB) 578 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} 579 LocationDescription(const InsertPointTy &IP) : IP(IP) {} 580 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) 581 : IP(IP), DL(DL) {} 582 InsertPointTy IP; 583 DebugLoc DL; 584 }; 585 586 /// Emitter methods for OpenMP directives. 587 /// 588 ///{ 589 590 /// Generator for '#omp barrier' 591 /// 592 /// \param Loc The location where the barrier directive was encountered. 593 /// \param DK The kind of directive that caused the barrier. 594 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 595 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 596 /// should be checked and acted upon. 597 /// 598 /// \returns The insertion point after the barrier. 599 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, 600 bool ForceSimpleCall = false, 601 bool CheckCancelFlag = true); 602 603 /// Generator for '#omp cancel' 604 /// 605 /// \param Loc The location where the directive was encountered. 606 /// \param IfCondition The evaluated 'if' clause expression, if any. 607 /// \param CanceledDirective The kind of directive that is cancled. 608 /// 609 /// \returns The insertion point after the barrier. 610 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, 611 omp::Directive CanceledDirective); 612 613 /// Generator for '#omp parallel' 614 /// 615 /// \param Loc The insert and source location description. 616 /// \param AllocaIP The insertion points to be used for alloca instructions. 617 /// \param BodyGenCB Callback that will generate the region code. 618 /// \param PrivCB Callback to copy a given variable (think copy constructor). 619 /// \param FiniCB Callback to finalize variable copies. 620 /// \param IfCondition The evaluated 'if' clause expression, if any. 621 /// \param NumThreads The evaluated 'num_threads' clause expression, if any. 622 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). 623 /// \param IsCancellable Flag to indicate a cancellable parallel region. 624 /// 625 /// \returns The insertion position *after* the parallel. 626 IRBuilder<>::InsertPoint 627 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, 628 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 629 FinalizeCallbackTy FiniCB, Value *IfCondition, 630 Value *NumThreads, omp::ProcBindKind ProcBind, 631 bool IsCancellable); 632 633 /// Generator for the control flow structure of an OpenMP canonical loop. 634 /// 635 /// This generator operates on the logical iteration space of the loop, i.e. 636 /// the caller only has to provide a loop trip count of the loop as defined by 637 /// base language semantics. The trip count is interpreted as an unsigned 638 /// integer. The induction variable passed to \p BodyGenCB will be of the same 639 /// type and run from 0 to \p TripCount - 1. It is up to the callback to 640 /// convert the logical iteration variable to the loop counter variable in the 641 /// loop body. 642 /// 643 /// \param Loc The insert and source location description. The insert 644 /// location can be between two instructions or the end of a 645 /// degenerate block (e.g. a BB under construction). 646 /// \param BodyGenCB Callback that will generate the loop body code. 647 /// \param TripCount Number of iterations the loop body is executed. 648 /// \param Name Base name used to derive BB and instruction names. 649 /// 650 /// \returns An object representing the created control flow structure which 651 /// can be used for loop-associated directives. 652 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 653 LoopBodyGenCallbackTy BodyGenCB, 654 Value *TripCount, 655 const Twine &Name = "loop"); 656 657 /// Generator for the control flow structure of an OpenMP canonical loop. 658 /// 659 /// Instead of a logical iteration space, this allows specifying user-defined 660 /// loop counter values using increment, upper- and lower bounds. To 661 /// disambiguate the terminology when counting downwards, instead of lower 662 /// bounds we use \p Start for the loop counter value in the first body 663 /// iteration. 664 /// 665 /// Consider the following limitations: 666 /// 667 /// * A loop counter space over all integer values of its bit-width cannot be 668 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be 669 /// stored into an 8 bit integer): 670 /// 671 /// DO I = 0, 255, 1 672 /// 673 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. 674 /// effectively counting downwards: 675 /// 676 /// for (uint8_t i = 100u; i > 0; i += 127u) 677 /// 678 /// 679 /// TODO: May need to add additional parameters to represent: 680 /// 681 /// * Allow representing downcounting with unsigned integers. 682 /// 683 /// * Sign of the step and the comparison operator might disagree: 684 /// 685 /// for (int i = 0; i < 42; i -= 1u) 686 /// 687 // 688 /// \param Loc The insert and source location description. 689 /// \param BodyGenCB Callback that will generate the loop body code. 690 /// \param Start Value of the loop counter for the first iterations. 691 /// \param Stop Loop counter values past this will stop the loop. 692 /// \param Step Loop counter increment after each iteration; negative 693 /// means counting down. 694 /// \param IsSigned Whether Start, Stop and Step are signed integers. 695 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop 696 /// counter. 697 /// \param ComputeIP Insertion point for instructions computing the trip 698 /// count. Can be used to ensure the trip count is available 699 /// at the outermost loop of a loop nest. If not set, 700 /// defaults to the preheader of the generated loop. 701 /// \param Name Base name used to derive BB and instruction names. 702 /// 703 /// \returns An object representing the created control flow structure which 704 /// can be used for loop-associated directives. 705 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 706 LoopBodyGenCallbackTy BodyGenCB, 707 Value *Start, Value *Stop, Value *Step, 708 bool IsSigned, bool InclusiveStop, 709 InsertPointTy ComputeIP = {}, 710 const Twine &Name = "loop"); 711 712 /// Collapse a loop nest into a single loop. 713 /// 714 /// Merges loops of a loop nest into a single CanonicalLoopNest representation 715 /// that has the same number of innermost loop iterations as the origin loop 716 /// nest. The induction variables of the input loops are derived from the 717 /// collapsed loop's induction variable. This is intended to be used to 718 /// implement OpenMP's collapse clause. Before applying a directive, 719 /// collapseLoops normalizes a loop nest to contain only a single loop and the 720 /// directive's implementation does not need to handle multiple loops itself. 721 /// This does not remove the need to handle all loop nest handling by 722 /// directives, such as the ordered(<n>) clause or the simd schedule-clause 723 /// modifier of the worksharing-loop directive. 724 /// 725 /// Example: 726 /// \code 727 /// for (int i = 0; i < 7; ++i) // Canonical loop "i" 728 /// for (int j = 0; j < 9; ++j) // Canonical loop "j" 729 /// body(i, j); 730 /// \endcode 731 /// 732 /// After collapsing with Loops={i,j}, the loop is changed to 733 /// \code 734 /// for (int ij = 0; ij < 63; ++ij) { 735 /// int i = ij / 9; 736 /// int j = ij % 9; 737 /// body(i, j); 738 /// } 739 /// \endcode 740 /// 741 /// In the current implementation, the following limitations apply: 742 /// 743 /// * All input loops have an induction variable of the same type. 744 /// 745 /// * The collapsed loop will have the same trip count integer type as the 746 /// input loops. Therefore it is possible that the collapsed loop cannot 747 /// represent all iterations of the input loops. For instance, assuming a 748 /// 32 bit integer type, and two input loops both iterating 2^16 times, the 749 /// theoretical trip count of the collapsed loop would be 2^32 iteration, 750 /// which cannot be represented in an 32-bit integer. Behavior is undefined 751 /// in this case. 752 /// 753 /// * The trip counts of every input loop must be available at \p ComputeIP. 754 /// Non-rectangular loops are not yet supported. 755 /// 756 /// * At each nest level, code between a surrounding loop and its nested loop 757 /// is hoisted into the loop body, and such code will be executed more 758 /// often than before collapsing (or not at all if any inner loop iteration 759 /// has a trip count of 0). This is permitted by the OpenMP specification. 760 /// 761 /// \param DL Debug location for instructions added for collapsing, 762 /// such as instructions to compute/derive the input loop's 763 /// induction variables. 764 /// \param Loops Loops in the loop nest to collapse. Loops are specified 765 /// from outermost-to-innermost and every control flow of a 766 /// loop's body must pass through its directly nested loop. 767 /// \param ComputeIP Where additional instruction that compute the collapsed 768 /// trip count. If not set, defaults to before the generated 769 /// loop. 770 /// 771 /// \returns The CanonicalLoopInfo object representing the collapsed loop. 772 CanonicalLoopInfo *collapseLoops(DebugLoc DL, 773 ArrayRef<CanonicalLoopInfo *> Loops, 774 InsertPointTy ComputeIP); 775 776 /// Get the default alignment value for given target 777 /// 778 /// \param TargetTriple Target triple 779 /// \param Features StringMap which describes extra CPU features 780 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, 781 const StringMap<bool> &Features); 782 783 /// Retrieve (or create if non-existent) the address of a declare 784 /// target variable, used in conjunction with registerTargetGlobalVariable 785 /// to create declare target global variables. 786 /// 787 /// \param CaptureClause - enumerator corresponding to the OpenMP capture 788 /// clause used in conjunction with the variable being registered (link, 789 /// to, enter). 790 /// \param DeviceClause - enumerator corresponding to the OpenMP capture 791 /// clause used in conjunction with the variable being registered (nohost, 792 /// host, any) 793 /// \param IsDeclaration - boolean stating if the variable being registered 794 /// is a declaration-only and not a definition 795 /// \param IsExternallyVisible - boolean stating if the variable is externally 796 /// visible 797 /// \param EntryInfo - Unique entry information for the value generated 798 /// using getTargetEntryUniqueInfo, used to name generated pointer references 799 /// to the declare target variable 800 /// \param MangledName - the mangled name of the variable being registered 801 /// \param GeneratedRefs - references generated by invocations of 802 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar, 803 /// these are required by Clang for book keeping. 804 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled 805 /// \param TargetTriple - The OpenMP device target triple we are compiling 806 /// for 807 /// \param LlvmPtrTy - The type of the variable we are generating or 808 /// retrieving an address for 809 /// \param GlobalInitializer - a lambda function which creates a constant 810 /// used for initializing a pointer reference to the variable in certain 811 /// cases. If a nullptr is passed, it will default to utilising the original 812 /// variable to initialize the pointer reference. 813 /// \param VariableLinkage - a lambda function which returns the variables 814 /// linkage type, if unspecified and a nullptr is given, it will instead 815 /// utilise the linkage stored on the existing global variable in the 816 /// LLVMModule. 817 Constant *getAddrOfDeclareTargetVar( 818 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, 819 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, 820 bool IsDeclaration, bool IsExternallyVisible, 821 TargetRegionEntryInfo EntryInfo, StringRef MangledName, 822 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, 823 std::vector<Triple> TargetTriple, Type *LlvmPtrTy, 824 std::function<Constant *()> GlobalInitializer, 825 std::function<GlobalValue::LinkageTypes()> VariableLinkage); 826 827 /// Registers a target variable for device or host. 828 /// 829 /// \param CaptureClause - enumerator corresponding to the OpenMP capture 830 /// clause used in conjunction with the variable being registered (link, 831 /// to, enter). 832 /// \param DeviceClause - enumerator corresponding to the OpenMP capture 833 /// clause used in conjunction with the variable being registered (nohost, 834 /// host, any) 835 /// \param IsDeclaration - boolean stating if the variable being registered 836 /// is a declaration-only and not a definition 837 /// \param IsExternallyVisible - boolean stating if the variable is externally 838 /// visible 839 /// \param EntryInfo - Unique entry information for the value generated 840 /// using getTargetEntryUniqueInfo, used to name generated pointer references 841 /// to the declare target variable 842 /// \param MangledName - the mangled name of the variable being registered 843 /// \param GeneratedRefs - references generated by invocations of 844 /// registerTargetGlobalVariable these are required by Clang for book 845 /// keeping. 846 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled 847 /// \param TargetTriple - The OpenMP device target triple we are compiling 848 /// for 849 /// \param GlobalInitializer - a lambda function which creates a constant 850 /// used for initializing a pointer reference to the variable in certain 851 /// cases. If a nullptr is passed, it will default to utilising the original 852 /// variable to initialize the pointer reference. 853 /// \param VariableLinkage - a lambda function which returns the variables 854 /// linkage type, if unspecified and a nullptr is given, it will instead 855 /// utilise the linkage stored on the existing global variable in the 856 /// LLVMModule. 857 /// \param LlvmPtrTy - The type of the variable we are generating or 858 /// retrieving an address for 859 /// \param Addr - the original llvm value (addr) of the variable to be 860 /// registered 861 void registerTargetGlobalVariable( 862 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, 863 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, 864 bool IsDeclaration, bool IsExternallyVisible, 865 TargetRegionEntryInfo EntryInfo, StringRef MangledName, 866 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, 867 std::vector<Triple> TargetTriple, 868 std::function<Constant *()> GlobalInitializer, 869 std::function<GlobalValue::LinkageTypes()> VariableLinkage, 870 Type *LlvmPtrTy, Constant *Addr); 871 872 private: 873 /// Modifies the canonical loop to be a statically-scheduled workshare loop. 874 /// 875 /// This takes a \p LoopInfo representing a canonical loop, such as the one 876 /// created by \p createCanonicalLoop and emits additional instructions to 877 /// turn it into a workshare loop. In particular, it calls to an OpenMP 878 /// runtime function in the preheader to obtain the loop bounds to be used in 879 /// the current thread, updates the relevant instructions in the canonical 880 /// loop and calls to an OpenMP runtime finalization function after the loop. 881 /// 882 /// \param DL Debug location for instructions added for the 883 /// workshare-loop construct itself. 884 /// \param CLI A descriptor of the canonical loop to workshare. 885 /// \param AllocaIP An insertion point for Alloca instructions usable in the 886 /// preheader of the loop. 887 /// \param NeedsBarrier Indicates whether a barrier must be inserted after 888 /// the loop. 889 /// 890 /// \returns Point where to insert code after the workshare construct. 891 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 892 InsertPointTy AllocaIP, 893 bool NeedsBarrier); 894 895 /// Modifies the canonical loop a statically-scheduled workshare loop with a 896 /// user-specified chunk size. 897 /// 898 /// \param DL Debug location for instructions added for the 899 /// workshare-loop construct itself. 900 /// \param CLI A descriptor of the canonical loop to workshare. 901 /// \param AllocaIP An insertion point for Alloca instructions usable in 902 /// the preheader of the loop. 903 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the 904 /// loop. 905 /// \param ChunkSize The user-specified chunk size. 906 /// 907 /// \returns Point where to insert code after the workshare construct. 908 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, 909 CanonicalLoopInfo *CLI, 910 InsertPointTy AllocaIP, 911 bool NeedsBarrier, 912 Value *ChunkSize); 913 914 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. 915 /// 916 /// This takes a \p LoopInfo representing a canonical loop, such as the one 917 /// created by \p createCanonicalLoop and emits additional instructions to 918 /// turn it into a workshare loop. In particular, it calls to an OpenMP 919 /// runtime function in the preheader to obtain, and then in each iteration 920 /// to update the loop counter. 921 /// 922 /// \param DL Debug location for instructions added for the 923 /// workshare-loop construct itself. 924 /// \param CLI A descriptor of the canonical loop to workshare. 925 /// \param AllocaIP An insertion point for Alloca instructions usable in the 926 /// preheader of the loop. 927 /// \param SchedType Type of scheduling to be passed to the init function. 928 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 929 /// the loop. 930 /// \param Chunk The size of loop chunk considered as a unit when 931 /// scheduling. If \p nullptr, defaults to 1. 932 /// 933 /// \returns Point where to insert code after the workshare construct. 934 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 935 InsertPointTy AllocaIP, 936 omp::OMPScheduleType SchedType, 937 bool NeedsBarrier, 938 Value *Chunk = nullptr); 939 940 /// Create alternative version of the loop to support if clause 941 /// 942 /// OpenMP if clause can require to generate second loop. This loop 943 /// will be executed when if clause condition is not met. createIfVersion 944 /// adds branch instruction to the copied loop if \p ifCond is not met. 945 /// 946 /// \param Loop Original loop which should be versioned. 947 /// \param IfCond Value which corresponds to if clause condition 948 /// \param VMap Value to value map to define relation between 949 /// original and copied loop values and loop blocks. 950 /// \param NamePrefix Optional name prefix for if.then if.else blocks. 951 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, 952 ValueToValueMapTy &VMap, const Twine &NamePrefix = ""); 953 954 public: 955 /// Modifies the canonical loop to be a workshare loop. 956 /// 957 /// This takes a \p LoopInfo representing a canonical loop, such as the one 958 /// created by \p createCanonicalLoop and emits additional instructions to 959 /// turn it into a workshare loop. In particular, it calls to an OpenMP 960 /// runtime function in the preheader to obtain the loop bounds to be used in 961 /// the current thread, updates the relevant instructions in the canonical 962 /// loop and calls to an OpenMP runtime finalization function after the loop. 963 /// 964 /// The concrete transformation is done by applyStaticWorkshareLoop, 965 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending 966 /// on the value of \p SchedKind and \p ChunkSize. 967 /// 968 /// \param DL Debug location for instructions added for the 969 /// workshare-loop construct itself. 970 /// \param CLI A descriptor of the canonical loop to workshare. 971 /// \param AllocaIP An insertion point for Alloca instructions usable in the 972 /// preheader of the loop. 973 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 974 /// the loop. 975 /// \param SchedKind Scheduling algorithm to use. 976 /// \param ChunkSize The chunk size for the inner loop. 977 /// \param HasSimdModifier Whether the simd modifier is present in the 978 /// schedule clause. 979 /// \param HasMonotonicModifier Whether the monotonic modifier is present in 980 /// the schedule clause. 981 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is 982 /// present in the schedule clause. 983 /// \param HasOrderedClause Whether the (parameterless) ordered clause is 984 /// present. 985 /// 986 /// \returns Point where to insert code after the workshare construct. 987 InsertPointTy applyWorkshareLoop( 988 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, 989 bool NeedsBarrier, 990 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, 991 Value *ChunkSize = nullptr, bool HasSimdModifier = false, 992 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, 993 bool HasOrderedClause = false); 994 995 /// Tile a loop nest. 996 /// 997 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in 998 /// \p/ Loops must be perfectly nested, from outermost to innermost loop 999 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value 1000 /// of every loop and every tile sizes must be usable in the outermost 1001 /// loop's preheader. This implies that the loop nest is rectangular. 1002 /// 1003 /// Example: 1004 /// \code 1005 /// for (int i = 0; i < 15; ++i) // Canonical loop "i" 1006 /// for (int j = 0; j < 14; ++j) // Canonical loop "j" 1007 /// body(i, j); 1008 /// \endcode 1009 /// 1010 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to 1011 /// \code 1012 /// for (int i1 = 0; i1 < 3; ++i1) 1013 /// for (int j1 = 0; j1 < 2; ++j1) 1014 /// for (int i2 = 0; i2 < 5; ++i2) 1015 /// for (int j2 = 0; j2 < 7; ++j2) 1016 /// body(i1*3+i2, j1*3+j2); 1017 /// \endcode 1018 /// 1019 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are 1020 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also 1021 /// handles non-constant trip counts, non-constant tile sizes and trip counts 1022 /// that are not multiples of the tile size. In the latter case the tile loop 1023 /// of the last floor-loop iteration will have fewer iterations than specified 1024 /// as its tile size. 1025 /// 1026 /// 1027 /// @param DL Debug location for instructions added by tiling, for 1028 /// instance the floor- and tile trip count computation. 1029 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are 1030 /// invalidated by this method, i.e. should not used after 1031 /// tiling. 1032 /// @param TileSizes For each loop in \p Loops, the tile size for that 1033 /// dimensions. 1034 /// 1035 /// \returns A list of generated loops. Contains twice as many loops as the 1036 /// input loop nest; the first half are the floor loops and the 1037 /// second half are the tile loops. 1038 std::vector<CanonicalLoopInfo *> 1039 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1040 ArrayRef<Value *> TileSizes); 1041 1042 /// Fully unroll a loop. 1043 /// 1044 /// Instead of unrolling the loop immediately (and duplicating its body 1045 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop 1046 /// metadata. 1047 /// 1048 /// \param DL Debug location for instructions added by unrolling. 1049 /// \param Loop The loop to unroll. The loop will be invalidated. 1050 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); 1051 1052 /// Fully or partially unroll a loop. How the loop is unrolled is determined 1053 /// using LLVM's LoopUnrollPass. 1054 /// 1055 /// \param DL Debug location for instructions added by unrolling. 1056 /// \param Loop The loop to unroll. The loop will be invalidated. 1057 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); 1058 1059 /// Partially unroll a loop. 1060 /// 1061 /// The CanonicalLoopInfo of the unrolled loop for use with chained 1062 /// loop-associated directive can be requested using \p UnrolledCLI. Not 1063 /// needing the CanonicalLoopInfo allows more efficient code generation by 1064 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. 1065 /// A loop-associated directive applied to the unrolled loop needs to know the 1066 /// new trip count which means that if using a heuristically determined unroll 1067 /// factor (\p Factor == 0), that factor must be computed immediately. We are 1068 /// using the same logic as the LoopUnrollPass to derived the unroll factor, 1069 /// but which assumes that some canonicalization has taken place (e.g. 1070 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform 1071 /// better when the unrolled loop's CanonicalLoopInfo is not needed. 1072 /// 1073 /// \param DL Debug location for instructions added by unrolling. 1074 /// \param Loop The loop to unroll. The loop will be invalidated. 1075 /// \param Factor The factor to unroll the loop by. A factor of 0 1076 /// indicates that a heuristic should be used to determine 1077 /// the unroll-factor. 1078 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the 1079 /// partially unrolled loop. Otherwise, uses loop metadata 1080 /// to defer unrolling to the LoopUnrollPass. 1081 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, 1082 CanonicalLoopInfo **UnrolledCLI); 1083 1084 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop 1085 /// is cloned. The metadata which prevents vectorization is added to 1086 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated 1087 /// to false. 1088 /// 1089 /// \param Loop The loop to simd-ize. 1090 /// \param AlignedVars The map which containts pairs of the pointer 1091 /// and its corresponding alignment. 1092 /// \param IfCond The value which corresponds to the if clause 1093 /// condition. 1094 /// \param Order The enum to map order clause. 1095 /// \param Simdlen The Simdlen length to apply to the simd loop. 1096 /// \param Safelen The Safelen length to apply to the simd loop. 1097 void applySimd(CanonicalLoopInfo *Loop, 1098 MapVector<Value *, Value *> AlignedVars, Value *IfCond, 1099 omp::OrderKind Order, ConstantInt *Simdlen, 1100 ConstantInt *Safelen); 1101 1102 /// Generator for '#omp flush' 1103 /// 1104 /// \param Loc The location where the flush directive was encountered 1105 void createFlush(const LocationDescription &Loc); 1106 1107 /// Generator for '#omp taskwait' 1108 /// 1109 /// \param Loc The location where the taskwait directive was encountered. 1110 void createTaskwait(const LocationDescription &Loc); 1111 1112 /// Generator for '#omp taskyield' 1113 /// 1114 /// \param Loc The location where the taskyield directive was encountered. 1115 void createTaskyield(const LocationDescription &Loc); 1116 1117 /// A struct to pack the relevant information for an OpenMP depend clause. 1118 struct DependData { 1119 omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; 1120 Type *DepValueType; 1121 Value *DepVal; 1122 explicit DependData() = default; 1123 DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, 1124 Value *DepVal) 1125 : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} 1126 }; 1127 1128 /// Generator for `#omp task` 1129 /// 1130 /// \param Loc The location where the task construct was encountered. 1131 /// \param AllocaIP The insertion point to be used for alloca instructions. 1132 /// \param BodyGenCB Callback that will generate the region code. 1133 /// \param Tied True if the task is tied, false if the task is untied. 1134 /// \param Final i1 value which is `true` if the task is final, `false` if the 1135 /// task is not final. 1136 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred 1137 /// task is generated, and the encountering thread must 1138 /// suspend the current task region, for which execution 1139 /// cannot be resumed until execution of the structured 1140 /// block that is associated with the generated task is 1141 /// completed. 1142 InsertPointTy createTask(const LocationDescription &Loc, 1143 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, 1144 bool Tied = true, Value *Final = nullptr, 1145 Value *IfCondition = nullptr, 1146 SmallVector<DependData> Dependencies = {}); 1147 1148 /// Generator for the taskgroup construct 1149 /// 1150 /// \param Loc The location where the taskgroup construct was encountered. 1151 /// \param AllocaIP The insertion point to be used for alloca instructions. 1152 /// \param BodyGenCB Callback that will generate the region code. 1153 InsertPointTy createTaskgroup(const LocationDescription &Loc, 1154 InsertPointTy AllocaIP, 1155 BodyGenCallbackTy BodyGenCB); 1156 1157 1158 using FileIdentifierInfoCallbackTy = std::function<std::tuple<std::string, uint64_t>()>; 1159 1160 /// Creates a unique info for a target entry when provided a filename and 1161 /// line number from. 1162 /// 1163 /// \param CallBack A callback function which should return filename the entry 1164 /// resides in as well as the line number for the target entry 1165 /// \param ParentName The name of the parent the target entry resides in, if 1166 /// any. 1167 static TargetRegionEntryInfo 1168 getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, 1169 StringRef ParentName = ""); 1170 1171 /// Functions used to generate reductions. Such functions take two Values 1172 /// representing LHS and RHS of the reduction, respectively, and a reference 1173 /// to the value that is updated to refer to the reduction result. 1174 using ReductionGenTy = 1175 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; 1176 1177 /// Functions used to generate atomic reductions. Such functions take two 1178 /// Values representing pointers to LHS and RHS of the reduction, as well as 1179 /// the element type of these pointers. They are expected to atomically 1180 /// update the LHS to the reduced value. 1181 using AtomicReductionGenTy = 1182 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; 1183 1184 /// Information about an OpenMP reduction. 1185 struct ReductionInfo { 1186 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, 1187 ReductionGenTy ReductionGen, 1188 AtomicReductionGenTy AtomicReductionGen) 1189 : ElementType(ElementType), Variable(Variable), 1190 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), 1191 AtomicReductionGen(AtomicReductionGen) {} 1192 1193 /// Reduction element type, must match pointee type of variable. 1194 Type *ElementType; 1195 1196 /// Reduction variable of pointer type. 1197 Value *Variable; 1198 1199 /// Thread-private partial reduction variable. 1200 Value *PrivateVariable; 1201 1202 /// Callback for generating the reduction body. The IR produced by this will 1203 /// be used to combine two values in a thread-safe context, e.g., under 1204 /// lock or within the same thread, and therefore need not be atomic. 1205 ReductionGenTy ReductionGen; 1206 1207 /// Callback for generating the atomic reduction body, may be null. The IR 1208 /// produced by this will be used to atomically combine two values during 1209 /// reduction. If null, the implementation will use the non-atomic version 1210 /// along with the appropriate synchronization mechanisms. 1211 AtomicReductionGenTy AtomicReductionGen; 1212 }; 1213 1214 // TODO: provide atomic and non-atomic reduction generators for reduction 1215 // operators defined by the OpenMP specification. 1216 1217 /// Generator for '#omp reduction'. 1218 /// 1219 /// Emits the IR instructing the runtime to perform the specific kind of 1220 /// reductions. Expects reduction variables to have been privatized and 1221 /// initialized to reduction-neutral values separately. Emits the calls to 1222 /// runtime functions as well as the reduction function and the basic blocks 1223 /// performing the reduction atomically and non-atomically. 1224 /// 1225 /// The code emitted for the following: 1226 /// 1227 /// \code 1228 /// type var_1; 1229 /// type var_2; 1230 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) 1231 /// /* body */; 1232 /// \endcode 1233 /// 1234 /// corresponds to the following sketch. 1235 /// 1236 /// \code 1237 /// void _outlined_par() { 1238 /// // N is the number of different reductions. 1239 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; 1240 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, 1241 /// _omp_reduction_func, 1242 /// _gomp_critical_user.reduction.var)) { 1243 /// case 1: { 1244 /// var_1 = var_1 <reduction-op> privatized_var_1; 1245 /// var_2 = var_2 <reduction-op> privatized_var_2; 1246 /// // ... 1247 /// __kmpc_end_reduce(...); 1248 /// break; 1249 /// } 1250 /// case 2: { 1251 /// _Atomic<ReductionOp>(var_1, privatized_var_1); 1252 /// _Atomic<ReductionOp>(var_2, privatized_var_2); 1253 /// // ... 1254 /// break; 1255 /// } 1256 /// default: break; 1257 /// } 1258 /// } 1259 /// 1260 /// void _omp_reduction_func(void **lhs, void **rhs) { 1261 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; 1262 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; 1263 /// // ... 1264 /// } 1265 /// \endcode 1266 /// 1267 /// \param Loc The location where the reduction was 1268 /// encountered. Must be within the associate 1269 /// directive and after the last local access to the 1270 /// reduction variables. 1271 /// \param AllocaIP An insertion point suitable for allocas usable 1272 /// in reductions. 1273 /// \param ReductionInfos A list of info on each reduction variable. 1274 /// \param IsNoWait A flag set if the reduction is marked as nowait. 1275 InsertPointTy createReductions(const LocationDescription &Loc, 1276 InsertPointTy AllocaIP, 1277 ArrayRef<ReductionInfo> ReductionInfos, 1278 bool IsNoWait = false); 1279 1280 ///} 1281 1282 /// Return the insertion point used by the underlying IRBuilder. 1283 InsertPointTy getInsertionPoint() { return Builder.saveIP(); } 1284 1285 /// Update the internal location to \p Loc. 1286 bool updateToLocation(const LocationDescription &Loc) { 1287 Builder.restoreIP(Loc.IP); 1288 Builder.SetCurrentDebugLocation(Loc.DL); 1289 return Loc.IP.getBlock() != nullptr; 1290 } 1291 1292 /// Return the function declaration for the runtime function with \p FnID. 1293 FunctionCallee getOrCreateRuntimeFunction(Module &M, 1294 omp::RuntimeFunction FnID); 1295 1296 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); 1297 1298 /// Return the (LLVM-IR) string describing the source location \p LocStr. 1299 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); 1300 1301 /// Return the (LLVM-IR) string describing the default source location. 1302 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); 1303 1304 /// Return the (LLVM-IR) string describing the source location identified by 1305 /// the arguments. 1306 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, 1307 unsigned Line, unsigned Column, 1308 uint32_t &SrcLocStrSize); 1309 1310 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as 1311 /// fallback if \p DL does not specify the function name. 1312 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, 1313 Function *F = nullptr); 1314 1315 /// Return the (LLVM-IR) string describing the source location \p Loc. 1316 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, 1317 uint32_t &SrcLocStrSize); 1318 1319 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. 1320 /// TODO: Create a enum class for the Reserve2Flags 1321 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, 1322 omp::IdentFlag Flags = omp::IdentFlag(0), 1323 unsigned Reserve2Flags = 0); 1324 1325 /// Create a hidden global flag \p Name in the module with initial value \p 1326 /// Value. 1327 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); 1328 1329 /// Create an offloading section struct used to register this global at 1330 /// runtime. 1331 /// 1332 /// Type struct __tgt_offload_entry{ 1333 /// void *addr; // Pointer to the offload entry info. 1334 /// // (function or global) 1335 /// char *name; // Name of the function or global. 1336 /// size_t size; // Size of the entry info (0 if it a function). 1337 /// int32_t flags; 1338 /// int32_t reserved; 1339 /// }; 1340 /// 1341 /// \param Addr The pointer to the global being registered. 1342 /// \param Name The symbol name associated with the global. 1343 /// \param Size The size in bytes of the global (0 for functions). 1344 /// \param Flags Flags associated with the entry. 1345 /// \param SectionName The section this entry will be placed at. 1346 void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, 1347 int32_t Flags, 1348 StringRef SectionName = "omp_offloading_entries"); 1349 1350 /// Generate control flow and cleanup for cancellation. 1351 /// 1352 /// \param CancelFlag Flag indicating if the cancellation is performed. 1353 /// \param CanceledDirective The kind of directive that is cancled. 1354 /// \param ExitCB Extra code to be generated in the exit block. 1355 void emitCancelationCheckImpl(Value *CancelFlag, 1356 omp::Directive CanceledDirective, 1357 FinalizeCallbackTy ExitCB = {}); 1358 1359 /// Generate a target region entry call. 1360 /// 1361 /// \param Loc The location at which the request originated and is fulfilled. 1362 /// \param AllocaIP The insertion point to be used for alloca instructions. 1363 /// \param Return Return value of the created function returned by reference. 1364 /// \param DeviceID Identifier for the device via the 'device' clause. 1365 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause 1366 /// or 0 if unspecified and -1 if there is no 'teams' clause. 1367 /// \param NumThreads Number of threads via the 'thread_limit' clause. 1368 /// \param HostPtr Pointer to the host-side pointer of the target kernel. 1369 /// \param KernelArgs Array of arguments to the kernel. 1370 InsertPointTy emitTargetKernel(const LocationDescription &Loc, 1371 InsertPointTy AllocaIP, Value *&Return, 1372 Value *Ident, Value *DeviceID, Value *NumTeams, 1373 Value *NumThreads, Value *HostPtr, 1374 ArrayRef<Value *> KernelArgs); 1375 1376 /// Generate a barrier runtime call. 1377 /// 1378 /// \param Loc The location at which the request originated and is fulfilled. 1379 /// \param DK The directive which caused the barrier 1380 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 1381 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 1382 /// should be checked and acted upon. 1383 /// 1384 /// \returns The insertion point after the barrier. 1385 InsertPointTy emitBarrierImpl(const LocationDescription &Loc, 1386 omp::Directive DK, bool ForceSimpleCall, 1387 bool CheckCancelFlag); 1388 1389 /// Generate a flush runtime call. 1390 /// 1391 /// \param Loc The location at which the request originated and is fulfilled. 1392 void emitFlush(const LocationDescription &Loc); 1393 1394 /// The finalization stack made up of finalize callbacks currently in-flight, 1395 /// wrapped into FinalizationInfo objects that reference also the finalization 1396 /// target block and the kind of cancellable directive. 1397 SmallVector<FinalizationInfo, 8> FinalizationStack; 1398 1399 /// Return true if the last entry in the finalization stack is of kind \p DK 1400 /// and cancellable. 1401 bool isLastFinalizationInfoCancellable(omp::Directive DK) { 1402 return !FinalizationStack.empty() && 1403 FinalizationStack.back().IsCancellable && 1404 FinalizationStack.back().DK == DK; 1405 } 1406 1407 /// Generate a taskwait runtime call. 1408 /// 1409 /// \param Loc The location at which the request originated and is fulfilled. 1410 void emitTaskwaitImpl(const LocationDescription &Loc); 1411 1412 /// Generate a taskyield runtime call. 1413 /// 1414 /// \param Loc The location at which the request originated and is fulfilled. 1415 void emitTaskyieldImpl(const LocationDescription &Loc); 1416 1417 /// Return the current thread ID. 1418 /// 1419 /// \param Ident The ident (ident_t*) describing the query origin. 1420 Value *getOrCreateThreadID(Value *Ident); 1421 1422 /// The OpenMPIRBuilder Configuration 1423 OpenMPIRBuilderConfig Config; 1424 1425 /// The underlying LLVM-IR module 1426 Module &M; 1427 1428 /// The LLVM-IR Builder used to create IR. 1429 IRBuilder<> Builder; 1430 1431 /// Map to remember source location strings 1432 StringMap<Constant *> SrcLocStrMap; 1433 1434 /// Map to remember existing ident_t*. 1435 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; 1436 1437 /// Info manager to keep track of target regions. 1438 OffloadEntriesInfoManager OffloadInfoManager; 1439 1440 /// Helper that contains information about regions we need to outline 1441 /// during finalization. 1442 struct OutlineInfo { 1443 using PostOutlineCBTy = std::function<void(Function &)>; 1444 PostOutlineCBTy PostOutlineCB; 1445 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; 1446 SmallVector<Value *, 2> ExcludeArgsFromAggregate; 1447 1448 /// Collect all blocks in between EntryBB and ExitBB in both the given 1449 /// vector and set. 1450 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, 1451 SmallVectorImpl<BasicBlock *> &BlockVector); 1452 1453 /// Return the function that contains the region to be outlined. 1454 Function *getFunction() const { return EntryBB->getParent(); } 1455 }; 1456 1457 /// Collection of regions that need to be outlined during finalization. 1458 SmallVector<OutlineInfo, 16> OutlineInfos; 1459 1460 /// Collection of owned canonical loop objects that eventually need to be 1461 /// free'd. 1462 std::forward_list<CanonicalLoopInfo> LoopInfos; 1463 1464 /// Add a new region that will be outlined later. 1465 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } 1466 1467 /// An ordered map of auto-generated variables to their unique names. 1468 /// It stores variables with the following names: 1) ".gomp_critical_user_" + 1469 /// <critical_section_name> + ".var" for "omp critical" directives; 2) 1470 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate 1471 /// variables. 1472 StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars; 1473 1474 /// Computes the size of type in bytes. 1475 Value *getSizeInBytes(Value *BasePtr); 1476 1477 // Emit a branch from the current block to the Target block only if 1478 // the current block has a terminator. 1479 void emitBranch(BasicBlock *Target); 1480 1481 // If BB has no use then delete it and return. Else place BB after the current 1482 // block, if possible, or else at the end of the function. Also add a branch 1483 // from current block to BB if current block does not have a terminator. 1484 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false); 1485 1486 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy 1487 /// Here is the logic: 1488 /// if (Cond) { 1489 /// ThenGen(); 1490 /// } else { 1491 /// ElseGen(); 1492 /// } 1493 void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, 1494 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {}); 1495 1496 /// Create the global variable holding the offload mappings information. 1497 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 1498 std::string VarName); 1499 1500 /// Create the global variable holding the offload names information. 1501 GlobalVariable * 1502 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 1503 std::string VarName); 1504 1505 struct MapperAllocas { 1506 AllocaInst *ArgsBase = nullptr; 1507 AllocaInst *Args = nullptr; 1508 AllocaInst *ArgSizes = nullptr; 1509 }; 1510 1511 /// Create the allocas instruction used in call to mapper functions. 1512 void createMapperAllocas(const LocationDescription &Loc, 1513 InsertPointTy AllocaIP, unsigned NumOperands, 1514 struct MapperAllocas &MapperAllocas); 1515 1516 /// Create the call for the target mapper function. 1517 /// \param Loc The source location description. 1518 /// \param MapperFunc Function to be called. 1519 /// \param SrcLocInfo Source location information global. 1520 /// \param MaptypesArg The argument types. 1521 /// \param MapnamesArg The argument names. 1522 /// \param MapperAllocas The AllocaInst used for the call. 1523 /// \param DeviceID Device ID for the call. 1524 /// \param NumOperands Number of operands in the call. 1525 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, 1526 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, 1527 struct MapperAllocas &MapperAllocas, int64_t DeviceID, 1528 unsigned NumOperands); 1529 1530 /// Container for the arguments used to pass data to the runtime library. 1531 struct TargetDataRTArgs { 1532 /// The array of base pointer passed to the runtime library. 1533 Value *BasePointersArray = nullptr; 1534 /// The array of section pointers passed to the runtime library. 1535 Value *PointersArray = nullptr; 1536 /// The array of sizes passed to the runtime library. 1537 Value *SizesArray = nullptr; 1538 /// The array of map types passed to the runtime library for the beginning 1539 /// of the region or for the entire region if there are no separate map 1540 /// types for the region end. 1541 Value *MapTypesArray = nullptr; 1542 /// The array of map types passed to the runtime library for the end of the 1543 /// region, or nullptr if there are no separate map types for the region 1544 /// end. 1545 Value *MapTypesArrayEnd = nullptr; 1546 /// The array of user-defined mappers passed to the runtime library. 1547 Value *MappersArray = nullptr; 1548 /// The array of original declaration names of mapped pointers sent to the 1549 /// runtime library for debugging 1550 Value *MapNamesArray = nullptr; 1551 1552 explicit TargetDataRTArgs() {} 1553 explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, 1554 Value *SizesArray, Value *MapTypesArray, 1555 Value *MapTypesArrayEnd, Value *MappersArray, 1556 Value *MapNamesArray) 1557 : BasePointersArray(BasePointersArray), PointersArray(PointersArray), 1558 SizesArray(SizesArray), MapTypesArray(MapTypesArray), 1559 MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray), 1560 MapNamesArray(MapNamesArray) {} 1561 }; 1562 1563 /// Data structure that contains the needed information to construct the 1564 /// kernel args vector. 1565 struct TargetKernelArgs { 1566 /// Number of arguments passed to the runtime library. 1567 unsigned NumTargetItems; 1568 /// Arguments passed to the runtime library 1569 TargetDataRTArgs RTArgs; 1570 /// The number of iterations 1571 Value *NumIterations; 1572 /// The number of teams. 1573 Value *NumTeams; 1574 /// The number of threads. 1575 Value *NumThreads; 1576 /// The size of the dynamic shared memory. 1577 Value *DynCGGroupMem; 1578 /// True if the kernel has 'no wait' clause. 1579 bool HasNoWait; 1580 1581 /// Constructor for TargetKernelArgs 1582 TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, 1583 Value *NumIterations, Value *NumTeams, Value *NumThreads, 1584 Value *DynCGGroupMem, bool HasNoWait) 1585 : NumTargetItems(NumTargetItems), RTArgs(RTArgs), 1586 NumIterations(NumIterations), NumTeams(NumTeams), 1587 NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem), 1588 HasNoWait(HasNoWait) {} 1589 }; 1590 1591 /// Create the kernel args vector used by emitTargetKernel. This function 1592 /// creates various constant values that are used in the resulting args 1593 /// vector. 1594 static void getKernelArgsVector(TargetKernelArgs &KernelArgs, 1595 IRBuilderBase &Builder, 1596 SmallVector<Value *> &ArgsVector); 1597 1598 /// Struct that keeps the information that should be kept throughout 1599 /// a 'target data' region. 1600 class TargetDataInfo { 1601 /// Set to true if device pointer information have to be obtained. 1602 bool RequiresDevicePointerInfo = false; 1603 /// Set to true if Clang emits separate runtime calls for the beginning and 1604 /// end of the region. These calls might have separate map type arrays. 1605 bool SeparateBeginEndCalls = false; 1606 1607 public: 1608 TargetDataRTArgs RTArgs; 1609 1610 SmallMapVector<const Value *, std::pair<Value *, Value *>, 4> 1611 DevicePtrInfoMap; 1612 1613 /// Indicate whether any user-defined mapper exists. 1614 bool HasMapper = false; 1615 /// The total number of pointers passed to the runtime library. 1616 unsigned NumberOfPtrs = 0u; 1617 1618 explicit TargetDataInfo() {} 1619 explicit TargetDataInfo(bool RequiresDevicePointerInfo, 1620 bool SeparateBeginEndCalls) 1621 : RequiresDevicePointerInfo(RequiresDevicePointerInfo), 1622 SeparateBeginEndCalls(SeparateBeginEndCalls) {} 1623 /// Clear information about the data arrays. 1624 void clearArrayInfo() { 1625 RTArgs = TargetDataRTArgs(); 1626 HasMapper = false; 1627 NumberOfPtrs = 0u; 1628 } 1629 /// Return true if the current target data information has valid arrays. 1630 bool isValid() { 1631 return RTArgs.BasePointersArray && RTArgs.PointersArray && 1632 RTArgs.SizesArray && RTArgs.MapTypesArray && 1633 (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs; 1634 } 1635 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } 1636 bool separateBeginEndCalls() { return SeparateBeginEndCalls; } 1637 }; 1638 1639 enum class DeviceInfoTy { None, Pointer, Address }; 1640 using MapValuesArrayTy = SmallVector<Value *, 4>; 1641 using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>; 1642 using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>; 1643 using MapNamesArrayTy = SmallVector<Constant *, 4>; 1644 using MapDimArrayTy = SmallVector<uint64_t, 4>; 1645 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 1646 1647 /// This structure contains combined information generated for mappable 1648 /// clauses, including base pointers, pointers, sizes, map types, user-defined 1649 /// mappers, and non-contiguous information. 1650 struct MapInfosTy { 1651 struct StructNonContiguousInfo { 1652 bool IsNonContiguous = false; 1653 MapDimArrayTy Dims; 1654 MapNonContiguousArrayTy Offsets; 1655 MapNonContiguousArrayTy Counts; 1656 MapNonContiguousArrayTy Strides; 1657 }; 1658 MapValuesArrayTy BasePointers; 1659 MapValuesArrayTy Pointers; 1660 MapDeviceInfoArrayTy DevicePointers; 1661 MapValuesArrayTy Sizes; 1662 MapFlagsArrayTy Types; 1663 MapNamesArrayTy Names; 1664 StructNonContiguousInfo NonContigInfo; 1665 1666 /// Append arrays in \a CurInfo. 1667 void append(MapInfosTy &CurInfo) { 1668 BasePointers.append(CurInfo.BasePointers.begin(), 1669 CurInfo.BasePointers.end()); 1670 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 1671 DevicePointers.append(CurInfo.DevicePointers.begin(), 1672 CurInfo.DevicePointers.end()); 1673 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 1674 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 1675 Names.append(CurInfo.Names.begin(), CurInfo.Names.end()); 1676 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 1677 CurInfo.NonContigInfo.Dims.end()); 1678 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 1679 CurInfo.NonContigInfo.Offsets.end()); 1680 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 1681 CurInfo.NonContigInfo.Counts.end()); 1682 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 1683 CurInfo.NonContigInfo.Strides.end()); 1684 } 1685 }; 1686 1687 /// Callback function type for functions emitting the host fallback code that 1688 /// is executed when the kernel launch fails. It takes an insertion point as 1689 /// parameter where the code should be emitted. It returns an insertion point 1690 /// that points right after after the emitted code. 1691 using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>; 1692 1693 /// Generate a target region entry call and host fallback call. 1694 /// 1695 /// \param Loc The location at which the request originated and is fulfilled. 1696 /// \param OutlinedFn The outlined kernel function. 1697 /// \param OutlinedFnID The ooulined function ID. 1698 /// \param EmitTargetCallFallbackCB Call back function to generate host 1699 /// fallback code. 1700 /// \param Args Data structure holding information about the kernel arguments. 1701 /// \param DeviceID Identifier for the device via the 'device' clause. 1702 /// \param RTLoc Source location identifier 1703 /// \param AllocaIP The insertion point to be used for alloca instructions. 1704 InsertPointTy emitKernelLaunch( 1705 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, 1706 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, 1707 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP); 1708 1709 /// Emit the arguments to be passed to the runtime library based on the 1710 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 1711 /// ForEndCall, emit map types to be passed for the end of the region instead 1712 /// of the beginning. 1713 void emitOffloadingArraysArgument(IRBuilderBase &Builder, 1714 OpenMPIRBuilder::TargetDataRTArgs &RTArgs, 1715 OpenMPIRBuilder::TargetDataInfo &Info, 1716 bool EmitDebug = false, 1717 bool ForEndCall = false); 1718 1719 /// Emit an array of struct descriptors to be assigned to the offload args. 1720 void emitNonContiguousDescriptor(InsertPointTy AllocaIP, 1721 InsertPointTy CodeGenIP, 1722 MapInfosTy &CombinedInfo, 1723 TargetDataInfo &Info); 1724 1725 /// Emit the arrays used to pass the captures and map information to the 1726 /// offloading runtime library. If there is no map or capture information, 1727 /// return nullptr by reference. 1728 void emitOffloadingArrays( 1729 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, 1730 TargetDataInfo &Info, bool IsNonContiguous = false, 1731 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, 1732 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); 1733 1734 /// Creates offloading entry for the provided entry ID \a ID, address \a 1735 /// Addr, size \a Size, and flags \a Flags. 1736 void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, 1737 int32_t Flags, GlobalValue::LinkageTypes); 1738 1739 /// The kind of errors that can occur when emitting the offload entries and 1740 /// metadata. 1741 enum EmitMetadataErrorKind { 1742 EMIT_MD_TARGET_REGION_ERROR, 1743 EMIT_MD_DECLARE_TARGET_ERROR, 1744 EMIT_MD_GLOBAL_VAR_LINK_ERROR 1745 }; 1746 1747 /// Callback function type 1748 using EmitMetadataErrorReportFunctionTy = 1749 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>; 1750 1751 // Emit the offloading entries and metadata so that the device codegen side 1752 // can easily figure out what to emit. The produced metadata looks like 1753 // this: 1754 // 1755 // !omp_offload.info = !{!1, ...} 1756 // 1757 // We only generate metadata for function that contain target regions. 1758 void createOffloadEntriesAndInfoMetadata( 1759 EmitMetadataErrorReportFunctionTy &ErrorReportFunction); 1760 1761 public: 1762 /// Generator for __kmpc_copyprivate 1763 /// 1764 /// \param Loc The source location description. 1765 /// \param BufSize Number of elements in the buffer. 1766 /// \param CpyBuf List of pointers to data to be copied. 1767 /// \param CpyFn function to call for copying data. 1768 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. 1769 /// 1770 /// \return The insertion position *after* the CopyPrivate call. 1771 1772 InsertPointTy createCopyPrivate(const LocationDescription &Loc, 1773 llvm::Value *BufSize, llvm::Value *CpyBuf, 1774 llvm::Value *CpyFn, llvm::Value *DidIt); 1775 1776 /// Generator for '#omp single' 1777 /// 1778 /// \param Loc The source location description. 1779 /// \param BodyGenCB Callback that will generate the region code. 1780 /// \param FiniCB Callback to finalize variable copies. 1781 /// \param IsNowait If false, a barrier is emitted. 1782 /// \param DidIt Local variable used as a flag to indicate 'single' thread 1783 /// 1784 /// \returns The insertion position *after* the single call. 1785 InsertPointTy createSingle(const LocationDescription &Loc, 1786 BodyGenCallbackTy BodyGenCB, 1787 FinalizeCallbackTy FiniCB, bool IsNowait, 1788 llvm::Value *DidIt); 1789 1790 /// Generator for '#omp master' 1791 /// 1792 /// \param Loc The insert and source location description. 1793 /// \param BodyGenCB Callback that will generate the region code. 1794 /// \param FiniCB Callback to finalize variable copies. 1795 /// 1796 /// \returns The insertion position *after* the master. 1797 InsertPointTy createMaster(const LocationDescription &Loc, 1798 BodyGenCallbackTy BodyGenCB, 1799 FinalizeCallbackTy FiniCB); 1800 1801 /// Generator for '#omp masked' 1802 /// 1803 /// \param Loc The insert and source location description. 1804 /// \param BodyGenCB Callback that will generate the region code. 1805 /// \param FiniCB Callback to finialize variable copies. 1806 /// 1807 /// \returns The insertion position *after* the masked. 1808 InsertPointTy createMasked(const LocationDescription &Loc, 1809 BodyGenCallbackTy BodyGenCB, 1810 FinalizeCallbackTy FiniCB, Value *Filter); 1811 1812 /// Generator for '#omp critical' 1813 /// 1814 /// \param Loc The insert and source location description. 1815 /// \param BodyGenCB Callback that will generate the region body code. 1816 /// \param FiniCB Callback to finalize variable copies. 1817 /// \param CriticalName name of the lock used by the critical directive 1818 /// \param HintInst Hint Instruction for hint clause associated with critical 1819 /// 1820 /// \returns The insertion position *after* the critical. 1821 InsertPointTy createCritical(const LocationDescription &Loc, 1822 BodyGenCallbackTy BodyGenCB, 1823 FinalizeCallbackTy FiniCB, 1824 StringRef CriticalName, Value *HintInst); 1825 1826 /// Generator for '#omp ordered depend (source | sink)' 1827 /// 1828 /// \param Loc The insert and source location description. 1829 /// \param AllocaIP The insertion point to be used for alloca instructions. 1830 /// \param NumLoops The number of loops in depend clause. 1831 /// \param StoreValues The value will be stored in vector address. 1832 /// \param Name The name of alloca instruction. 1833 /// \param IsDependSource If true, depend source; otherwise, depend sink. 1834 /// 1835 /// \return The insertion position *after* the ordered. 1836 InsertPointTy createOrderedDepend(const LocationDescription &Loc, 1837 InsertPointTy AllocaIP, unsigned NumLoops, 1838 ArrayRef<llvm::Value *> StoreValues, 1839 const Twine &Name, bool IsDependSource); 1840 1841 /// Generator for '#omp ordered [threads | simd]' 1842 /// 1843 /// \param Loc The insert and source location description. 1844 /// \param BodyGenCB Callback that will generate the region code. 1845 /// \param FiniCB Callback to finalize variable copies. 1846 /// \param IsThreads If true, with threads clause or without clause; 1847 /// otherwise, with simd clause; 1848 /// 1849 /// \returns The insertion position *after* the ordered. 1850 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, 1851 BodyGenCallbackTy BodyGenCB, 1852 FinalizeCallbackTy FiniCB, 1853 bool IsThreads); 1854 1855 /// Generator for '#omp sections' 1856 /// 1857 /// \param Loc The insert and source location description. 1858 /// \param AllocaIP The insertion points to be used for alloca instructions. 1859 /// \param SectionCBs Callbacks that will generate body of each section. 1860 /// \param PrivCB Callback to copy a given variable (think copy constructor). 1861 /// \param FiniCB Callback to finalize variable copies. 1862 /// \param IsCancellable Flag to indicate a cancellable parallel region. 1863 /// \param IsNowait If true, barrier - to ensure all sections are executed 1864 /// before moving forward will not be generated. 1865 /// \returns The insertion position *after* the sections. 1866 InsertPointTy createSections(const LocationDescription &Loc, 1867 InsertPointTy AllocaIP, 1868 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, 1869 PrivatizeCallbackTy PrivCB, 1870 FinalizeCallbackTy FiniCB, bool IsCancellable, 1871 bool IsNowait); 1872 1873 /// Generator for '#omp section' 1874 /// 1875 /// \param Loc The insert and source location description. 1876 /// \param BodyGenCB Callback that will generate the region body code. 1877 /// \param FiniCB Callback to finalize variable copies. 1878 /// \returns The insertion position *after* the section. 1879 InsertPointTy createSection(const LocationDescription &Loc, 1880 BodyGenCallbackTy BodyGenCB, 1881 FinalizeCallbackTy FiniCB); 1882 1883 /// Generate conditional branch and relevant BasicBlocks through which private 1884 /// threads copy the 'copyin' variables from Master copy to threadprivate 1885 /// copies. 1886 /// 1887 /// \param IP insertion block for copyin conditional 1888 /// \param MasterVarPtr a pointer to the master variable 1889 /// \param PrivateVarPtr a pointer to the threadprivate variable 1890 /// \param IntPtrTy Pointer size type 1891 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks 1892 // and copy.in.end block 1893 /// 1894 /// \returns The insertion point where copying operation to be emitted. 1895 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, 1896 Value *PrivateAddr, 1897 llvm::IntegerType *IntPtrTy, 1898 bool BranchtoEnd = true); 1899 1900 /// Create a runtime call for kmpc_Alloc 1901 /// 1902 /// \param Loc The insert and source location description. 1903 /// \param Size Size of allocated memory space 1904 /// \param Allocator Allocator information instruction 1905 /// \param Name Name of call Instruction for OMP_alloc 1906 /// 1907 /// \returns CallInst to the OMP_Alloc call 1908 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, 1909 Value *Allocator, std::string Name = ""); 1910 1911 /// Create a runtime call for kmpc_free 1912 /// 1913 /// \param Loc The insert and source location description. 1914 /// \param Addr Address of memory space to be freed 1915 /// \param Allocator Allocator information instruction 1916 /// \param Name Name of call Instruction for OMP_Free 1917 /// 1918 /// \returns CallInst to the OMP_Free call 1919 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, 1920 Value *Allocator, std::string Name = ""); 1921 1922 /// Create a runtime call for kmpc_threadprivate_cached 1923 /// 1924 /// \param Loc The insert and source location description. 1925 /// \param Pointer pointer to data to be cached 1926 /// \param Size size of data to be cached 1927 /// \param Name Name of call Instruction for callinst 1928 /// 1929 /// \returns CallInst to the thread private cache call. 1930 CallInst *createCachedThreadPrivate(const LocationDescription &Loc, 1931 llvm::Value *Pointer, 1932 llvm::ConstantInt *Size, 1933 const llvm::Twine &Name = Twine("")); 1934 1935 /// Create a runtime call for __tgt_interop_init 1936 /// 1937 /// \param Loc The insert and source location description. 1938 /// \param InteropVar variable to be allocated 1939 /// \param InteropType type of interop operation 1940 /// \param Device devide to which offloading will occur 1941 /// \param NumDependences number of dependence variables 1942 /// \param DependenceAddress pointer to dependence variables 1943 /// \param HaveNowaitClause does nowait clause exist 1944 /// 1945 /// \returns CallInst to the __tgt_interop_init call 1946 CallInst *createOMPInteropInit(const LocationDescription &Loc, 1947 Value *InteropVar, 1948 omp::OMPInteropType InteropType, Value *Device, 1949 Value *NumDependences, 1950 Value *DependenceAddress, 1951 bool HaveNowaitClause); 1952 1953 /// Create a runtime call for __tgt_interop_destroy 1954 /// 1955 /// \param Loc The insert and source location description. 1956 /// \param InteropVar variable to be allocated 1957 /// \param Device devide to which offloading will occur 1958 /// \param NumDependences number of dependence variables 1959 /// \param DependenceAddress pointer to dependence variables 1960 /// \param HaveNowaitClause does nowait clause exist 1961 /// 1962 /// \returns CallInst to the __tgt_interop_destroy call 1963 CallInst *createOMPInteropDestroy(const LocationDescription &Loc, 1964 Value *InteropVar, Value *Device, 1965 Value *NumDependences, 1966 Value *DependenceAddress, 1967 bool HaveNowaitClause); 1968 1969 /// Create a runtime call for __tgt_interop_use 1970 /// 1971 /// \param Loc The insert and source location description. 1972 /// \param InteropVar variable to be allocated 1973 /// \param Device devide to which offloading will occur 1974 /// \param NumDependences number of dependence variables 1975 /// \param DependenceAddress pointer to dependence variables 1976 /// \param HaveNowaitClause does nowait clause exist 1977 /// 1978 /// \returns CallInst to the __tgt_interop_use call 1979 CallInst *createOMPInteropUse(const LocationDescription &Loc, 1980 Value *InteropVar, Value *Device, 1981 Value *NumDependences, Value *DependenceAddress, 1982 bool HaveNowaitClause); 1983 1984 /// The `omp target` interface 1985 /// 1986 /// For more information about the usage of this interface, 1987 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h 1988 /// 1989 ///{ 1990 1991 /// Create a runtime call for kmpc_target_init 1992 /// 1993 /// \param Loc The insert and source location description. 1994 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 1995 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD); 1996 1997 /// Create a runtime call for kmpc_target_deinit 1998 /// 1999 /// \param Loc The insert and source location description. 2000 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 2001 void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); 2002 2003 ///} 2004 2005 private: 2006 // Sets the function attributes expected for the outlined function 2007 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn, 2008 int32_t NumTeams, 2009 int32_t NumThreads); 2010 2011 // Creates the function ID/Address for the given outlined function. 2012 // In the case of an embedded device function the address of the function is 2013 // used, in the case of a non-offload function a constant is created. 2014 Constant *createOutlinedFunctionID(Function *OutlinedFn, 2015 StringRef EntryFnIDName); 2016 2017 // Creates the region entry address for the outlined function 2018 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction, 2019 StringRef EntryFnName); 2020 2021 public: 2022 /// Functions used to generate a function with the given name. 2023 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>; 2024 2025 /// Create a unique name for the entry function using the source location 2026 /// information of the current target region. The name will be something like: 2027 /// 2028 /// __omp_offloading_DD_FFFF_PP_lBB[_CC] 2029 /// 2030 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 2031 /// mangled name of the function that encloses the target region and BB is the 2032 /// line number of the target region. CC is a count added when more than one 2033 /// region is located at the same location. 2034 /// 2035 /// If this target outline function is not an offload entry, we don't need to 2036 /// register it. This may happen if it is guarded by an if clause that is 2037 /// false at compile time, or no target archs have been specified. 2038 /// 2039 /// The created target region ID is used by the runtime library to identify 2040 /// the current target region, so it only has to be unique and not 2041 /// necessarily point to anything. It could be the pointer to the outlined 2042 /// function that implements the target region, but we aren't using that so 2043 /// that the compiler doesn't need to keep that, and could therefore inline 2044 /// the host function if proven worthwhile during optimization. In the other 2045 /// hand, if emitting code for the device, the ID has to be the function 2046 /// address so that it can retrieved from the offloading entry and launched 2047 /// by the runtime library. We also mark the outlined function to have 2048 /// external linkage in case we are emitting code for the device, because 2049 /// these functions will be entry points to the device. 2050 /// 2051 /// \param InfoManager The info manager keeping track of the offload entries 2052 /// \param EntryInfo The entry information about the function 2053 /// \param GenerateFunctionCallback The callback function to generate the code 2054 /// \param NumTeams Number default teams 2055 /// \param NumThreads Number default threads 2056 /// \param OutlinedFunction Pointer to the outlined function 2057 /// \param EntryFnIDName Name of the ID o be created 2058 void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, 2059 FunctionGenCallback &GenerateFunctionCallback, 2060 int32_t NumTeams, int32_t NumThreads, 2061 bool IsOffloadEntry, Function *&OutlinedFn, 2062 Constant *&OutlinedFnID); 2063 2064 /// Registers the given function and sets up the attribtues of the function 2065 /// Returns the FunctionID. 2066 /// 2067 /// \param InfoManager The info manager keeping track of the offload entries 2068 /// \param EntryInfo The entry information about the function 2069 /// \param OutlinedFunction Pointer to the outlined function 2070 /// \param EntryFnName Name of the outlined function 2071 /// \param EntryFnIDName Name of the ID o be created 2072 /// \param NumTeams Number default teams 2073 /// \param NumThreads Number default threads 2074 Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, 2075 Function *OutlinedFunction, 2076 StringRef EntryFnName, 2077 StringRef EntryFnIDName, 2078 int32_t NumTeams, int32_t NumThreads); 2079 /// Type of BodyGen to use for region codegen 2080 /// 2081 /// Priv: If device pointer privatization is required, emit the body of the 2082 /// region here. It will have to be duplicated: with and without 2083 /// privatization. 2084 /// DupNoPriv: If we need device pointer privatization, we need 2085 /// to emit the body of the region with no privatization in the 'else' branch 2086 /// of the conditional. 2087 /// NoPriv: If we don't require privatization of device 2088 /// pointers, we emit the body in between the runtime calls. This avoids 2089 /// duplicating the body code. 2090 enum BodyGenTy { Priv, DupNoPriv, NoPriv }; 2091 2092 /// Generator for '#omp target data' 2093 /// 2094 /// \param Loc The location where the target data construct was encountered. 2095 /// \param AllocaIP The insertion points to be used for alloca instructions. 2096 /// \param CodeGenIP The insertion point at which the target directive code 2097 /// should be placed. 2098 /// \param IsBegin If true then emits begin mapper call otherwise emits 2099 /// end mapper call. 2100 /// \param DeviceID Stores the DeviceID from the device clause. 2101 /// \param IfCond Value which corresponds to the if clause condition. 2102 /// \param Info Stores all information realted to the Target Data directive. 2103 /// \param GenMapInfoCB Callback that populates the MapInfos and returns. 2104 /// \param BodyGenCB Optional Callback to generate the region code. 2105 /// \param DeviceAddrCB Optional callback to generate code related to 2106 /// use_device_ptr and use_device_addr. 2107 /// \param CustomMapperCB Optional callback to generate code related to 2108 /// custom mappers. 2109 OpenMPIRBuilder::InsertPointTy createTargetData( 2110 const LocationDescription &Loc, InsertPointTy AllocaIP, 2111 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, 2112 TargetDataInfo &Info, 2113 function_ref<MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCB, 2114 omp::RuntimeFunction *MapperFunc = nullptr, 2115 function_ref<InsertPointTy(InsertPointTy CodeGenIP, 2116 BodyGenTy BodyGenType)> 2117 BodyGenCB = nullptr, 2118 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, 2119 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr, 2120 Value *SrcLocInfo = nullptr); 2121 2122 using TargetBodyGenCallbackTy = function_ref<InsertPointTy( 2123 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 2124 2125 /// Generator for '#omp target' 2126 /// 2127 /// \param Loc where the target data construct was encountered. 2128 /// \param CodeGenIP The insertion point where the call to the outlined 2129 /// function should be emitted. 2130 /// \param EntryInfo The entry information about the function. 2131 /// \param NumTeams Number of teams specified in the num_teams clause. 2132 /// \param NumThreads Number of teams specified in the thread_limit clause. 2133 /// \param Inputs The input values to the region that will be passed. 2134 /// as arguments to the outlined function. 2135 /// \param BodyGenCB Callback that will generate the region code. 2136 InsertPointTy createTarget(const LocationDescription &Loc, 2137 OpenMPIRBuilder::InsertPointTy CodeGenIP, 2138 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, 2139 int32_t NumThreads, 2140 SmallVectorImpl<Value *> &Inputs, 2141 TargetBodyGenCallbackTy BodyGenCB); 2142 2143 /// Declarations for LLVM-IR types (simple, array, function and structure) are 2144 /// generated below. Their names are defined and used in OpenMPKinds.def. Here 2145 /// we provide the declarations, the initializeTypes function will provide the 2146 /// values. 2147 /// 2148 ///{ 2149 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; 2150 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2151 ArrayType *VarName##Ty = nullptr; \ 2152 PointerType *VarName##PtrTy = nullptr; 2153 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2154 FunctionType *VarName = nullptr; \ 2155 PointerType *VarName##Ptr = nullptr; 2156 #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ 2157 StructType *VarName = nullptr; \ 2158 PointerType *VarName##Ptr = nullptr; 2159 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2160 2161 ///} 2162 2163 private: 2164 /// Create all simple and struct types exposed by the runtime and remember 2165 /// the llvm::PointerTypes of them for easy access later. 2166 void initializeTypes(Module &M); 2167 2168 /// Common interface for generating entry calls for OMP Directives. 2169 /// if the directive has a region/body, It will set the insertion 2170 /// point to the body 2171 /// 2172 /// \param OMPD Directive to generate entry blocks for 2173 /// \param EntryCall Call to the entry OMP Runtime Function 2174 /// \param ExitBB block where the region ends. 2175 /// \param Conditional indicate if the entry call result will be used 2176 /// to evaluate a conditional of whether a thread will execute 2177 /// body code or not. 2178 /// 2179 /// \return The insertion position in exit block 2180 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, 2181 BasicBlock *ExitBB, 2182 bool Conditional = false); 2183 2184 /// Common interface to finalize the region 2185 /// 2186 /// \param OMPD Directive to generate exiting code for 2187 /// \param FinIP Insertion point for emitting Finalization code and exit call 2188 /// \param ExitCall Call to the ending OMP Runtime Function 2189 /// \param HasFinalize indicate if the directive will require finalization 2190 /// and has a finalization callback in the stack that 2191 /// should be called. 2192 /// 2193 /// \return The insertion position in exit block 2194 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, 2195 InsertPointTy FinIP, 2196 Instruction *ExitCall, 2197 bool HasFinalize = true); 2198 2199 /// Common Interface to generate OMP inlined regions 2200 /// 2201 /// \param OMPD Directive to generate inlined region for 2202 /// \param EntryCall Call to the entry OMP Runtime Function 2203 /// \param ExitCall Call to the ending OMP Runtime Function 2204 /// \param BodyGenCB Body code generation callback. 2205 /// \param FiniCB Finalization Callback. Will be called when finalizing region 2206 /// \param Conditional indicate if the entry call result will be used 2207 /// to evaluate a conditional of whether a thread will execute 2208 /// body code or not. 2209 /// \param HasFinalize indicate if the directive will require finalization 2210 /// and has a finalization callback in the stack that 2211 /// should be called. 2212 /// \param IsCancellable if HasFinalize is set to true, indicate if the 2213 /// the directive should be cancellable. 2214 /// \return The insertion point after the region 2215 2216 InsertPointTy 2217 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, 2218 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, 2219 FinalizeCallbackTy FiniCB, bool Conditional = false, 2220 bool HasFinalize = true, bool IsCancellable = false); 2221 2222 /// Get the platform-specific name separator. 2223 /// \param Parts different parts of the final name that needs separation 2224 /// \param FirstSeparator First separator used between the initial two 2225 /// parts of the name. 2226 /// \param Separator separator used between all of the rest consecutive 2227 /// parts of the name 2228 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, 2229 StringRef FirstSeparator, 2230 StringRef Separator); 2231 2232 /// Returns corresponding lock object for the specified critical region 2233 /// name. If the lock object does not exist it is created, otherwise the 2234 /// reference to the existing copy is returned. 2235 /// \param CriticalName Name of the critical region. 2236 /// 2237 Value *getOMPCriticalRegionLock(StringRef CriticalName); 2238 2239 /// Callback type for Atomic Expression update 2240 /// ex: 2241 /// \code{.cpp} 2242 /// unsigned x = 0; 2243 /// #pragma omp atomic update 2244 /// x = Expr(x_old); //Expr() is any legal operation 2245 /// \endcode 2246 /// 2247 /// \param XOld the value of the atomic memory address to use for update 2248 /// \param IRB reference to the IRBuilder to use 2249 /// 2250 /// \returns Value to update X to. 2251 using AtomicUpdateCallbackTy = 2252 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; 2253 2254 private: 2255 enum AtomicKind { Read, Write, Update, Capture, Compare }; 2256 2257 /// Determine whether to emit flush or not 2258 /// 2259 /// \param Loc The insert and source location description. 2260 /// \param AO The required atomic ordering 2261 /// \param AK The OpenMP atomic operation kind used. 2262 /// 2263 /// \returns wether a flush was emitted or not 2264 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, 2265 AtomicOrdering AO, AtomicKind AK); 2266 2267 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 2268 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 2269 /// Only Scalar data types. 2270 /// 2271 /// \param AllocaIP The insertion point to be used for alloca 2272 /// instructions. 2273 /// \param X The target atomic pointer to be updated 2274 /// \param XElemTy The element type of the atomic pointer. 2275 /// \param Expr The value to update X with. 2276 /// \param AO Atomic ordering of the generated atomic 2277 /// instructions. 2278 /// \param RMWOp The binary operation used for update. If 2279 /// operation is not supported by atomicRMW, 2280 /// or belong to {FADD, FSUB, BAD_BINOP}. 2281 /// Then a `cmpExch` based atomic will be generated. 2282 /// \param UpdateOp Code generator for complex expressions that cannot be 2283 /// expressed through atomicrmw instruction. 2284 /// \param VolatileX true if \a X volatile? 2285 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 2286 /// update expression, false otherwise. 2287 /// (e.g. true for X = X BinOp Expr) 2288 /// 2289 /// \returns A pair of the old value of X before the update, and the value 2290 /// used for the update. 2291 std::pair<Value *, Value *> 2292 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, 2293 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2294 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, 2295 bool IsXBinopExpr); 2296 2297 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . 2298 /// 2299 /// \Return The instruction 2300 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2301 AtomicRMWInst::BinOp RMWOp); 2302 2303 public: 2304 /// a struct to pack relevant information while generating atomic Ops 2305 struct AtomicOpValue { 2306 Value *Var = nullptr; 2307 Type *ElemTy = nullptr; 2308 bool IsSigned = false; 2309 bool IsVolatile = false; 2310 }; 2311 2312 /// Emit atomic Read for : V = X --- Only Scalar data types. 2313 /// 2314 /// \param Loc The insert and source location description. 2315 /// \param X The target pointer to be atomically read 2316 /// \param V Memory address where to store atomically read 2317 /// value 2318 /// \param AO Atomic ordering of the generated atomic 2319 /// instructions. 2320 /// 2321 /// \return Insertion point after generated atomic read IR. 2322 InsertPointTy createAtomicRead(const LocationDescription &Loc, 2323 AtomicOpValue &X, AtomicOpValue &V, 2324 AtomicOrdering AO); 2325 2326 /// Emit atomic write for : X = Expr --- Only Scalar data types. 2327 /// 2328 /// \param Loc The insert and source location description. 2329 /// \param X The target pointer to be atomically written to 2330 /// \param Expr The value to store. 2331 /// \param AO Atomic ordering of the generated atomic 2332 /// instructions. 2333 /// 2334 /// \return Insertion point after generated atomic Write IR. 2335 InsertPointTy createAtomicWrite(const LocationDescription &Loc, 2336 AtomicOpValue &X, Value *Expr, 2337 AtomicOrdering AO); 2338 2339 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 2340 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 2341 /// Only Scalar data types. 2342 /// 2343 /// \param Loc The insert and source location description. 2344 /// \param AllocaIP The insertion point to be used for alloca instructions. 2345 /// \param X The target atomic pointer to be updated 2346 /// \param Expr The value to update X with. 2347 /// \param AO Atomic ordering of the generated atomic instructions. 2348 /// \param RMWOp The binary operation used for update. If operation 2349 /// is not supported by atomicRMW, or belong to 2350 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based 2351 /// atomic will be generated. 2352 /// \param UpdateOp Code generator for complex expressions that cannot be 2353 /// expressed through atomicrmw instruction. 2354 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 2355 /// update expression, false otherwise. 2356 /// (e.g. true for X = X BinOp Expr) 2357 /// 2358 /// \return Insertion point after generated atomic update IR. 2359 InsertPointTy createAtomicUpdate(const LocationDescription &Loc, 2360 InsertPointTy AllocaIP, AtomicOpValue &X, 2361 Value *Expr, AtomicOrdering AO, 2362 AtomicRMWInst::BinOp RMWOp, 2363 AtomicUpdateCallbackTy &UpdateOp, 2364 bool IsXBinopExpr); 2365 2366 /// Emit atomic update for constructs: --- Only Scalar data types 2367 /// V = X; X = X BinOp Expr , 2368 /// X = X BinOp Expr; V = X, 2369 /// V = X; X = Expr BinOp X, 2370 /// X = Expr BinOp X; V = X, 2371 /// V = X; X = UpdateOp(X), 2372 /// X = UpdateOp(X); V = X, 2373 /// 2374 /// \param Loc The insert and source location description. 2375 /// \param AllocaIP The insertion point to be used for alloca instructions. 2376 /// \param X The target atomic pointer to be updated 2377 /// \param V Memory address where to store captured value 2378 /// \param Expr The value to update X with. 2379 /// \param AO Atomic ordering of the generated atomic instructions 2380 /// \param RMWOp The binary operation used for update. If 2381 /// operation is not supported by atomicRMW, or belong to 2382 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based 2383 /// atomic will be generated. 2384 /// \param UpdateOp Code generator for complex expressions that cannot be 2385 /// expressed through atomicrmw instruction. 2386 /// \param UpdateExpr true if X is an in place update of the form 2387 /// X = X BinOp Expr or X = Expr BinOp X 2388 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the 2389 /// update expression, false otherwise. 2390 /// (e.g. true for X = X BinOp Expr) 2391 /// \param IsPostfixUpdate true if original value of 'x' must be stored in 2392 /// 'v', not an updated one. 2393 /// 2394 /// \return Insertion point after generated atomic capture IR. 2395 InsertPointTy 2396 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, 2397 AtomicOpValue &X, AtomicOpValue &V, Value *Expr, 2398 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2399 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, 2400 bool IsPostfixUpdate, bool IsXBinopExpr); 2401 2402 /// Emit atomic compare for constructs: --- Only scalar data types 2403 /// cond-expr-stmt: 2404 /// x = x ordop expr ? expr : x; 2405 /// x = expr ordop x ? expr : x; 2406 /// x = x == e ? d : x; 2407 /// x = e == x ? d : x; (this one is not in the spec) 2408 /// cond-update-stmt: 2409 /// if (x ordop expr) { x = expr; } 2410 /// if (expr ordop x) { x = expr; } 2411 /// if (x == e) { x = d; } 2412 /// if (e == x) { x = d; } (this one is not in the spec) 2413 /// conditional-update-capture-atomic: 2414 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) 2415 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) 2416 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, 2417 /// IsFailOnly=true) 2418 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) 2419 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, 2420 /// IsFailOnly=true) 2421 /// 2422 /// \param Loc The insert and source location description. 2423 /// \param X The target atomic pointer to be updated. 2424 /// \param V Memory address where to store captured value (for 2425 /// compare capture only). 2426 /// \param R Memory address where to store comparison result 2427 /// (for compare capture with '==' only). 2428 /// \param E The expected value ('e') for forms that use an 2429 /// equality comparison or an expression ('expr') for 2430 /// forms that use 'ordop' (logically an atomic maximum or 2431 /// minimum). 2432 /// \param D The desired value for forms that use an equality 2433 /// comparison. If forms that use 'ordop', it should be 2434 /// \p nullptr. 2435 /// \param AO Atomic ordering of the generated atomic instructions. 2436 /// \param Op Atomic compare operation. It can only be ==, <, or >. 2437 /// \param IsXBinopExpr True if the conditional statement is in the form where 2438 /// x is on LHS. It only matters for < or >. 2439 /// \param IsPostfixUpdate True if original value of 'x' must be stored in 2440 /// 'v', not an updated one (for compare capture 2441 /// only). 2442 /// \param IsFailOnly True if the original value of 'x' is stored to 'v' 2443 /// only when the comparison fails. This is only valid for 2444 /// the case the comparison is '=='. 2445 /// 2446 /// \return Insertion point after generated atomic capture IR. 2447 InsertPointTy 2448 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, 2449 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, 2450 AtomicOrdering AO, omp::OMPAtomicCompareOp Op, 2451 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); 2452 2453 /// Create the control flow structure of a canonical OpenMP loop. 2454 /// 2455 /// The emitted loop will be disconnected, i.e. no edge to the loop's 2456 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's 2457 /// IRBuilder location is not preserved. 2458 /// 2459 /// \param DL DebugLoc used for the instructions in the skeleton. 2460 /// \param TripCount Value to be used for the trip count. 2461 /// \param F Function in which to insert the BasicBlocks. 2462 /// \param PreInsertBefore Where to insert BBs that execute before the body, 2463 /// typically the body itself. 2464 /// \param PostInsertBefore Where to insert BBs that execute after the body. 2465 /// \param Name Base name used to derive BB 2466 /// and instruction names. 2467 /// 2468 /// \returns The CanonicalLoopInfo that represents the emitted loop. 2469 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, 2470 Function *F, 2471 BasicBlock *PreInsertBefore, 2472 BasicBlock *PostInsertBefore, 2473 const Twine &Name = {}); 2474 /// OMP Offload Info Metadata name string 2475 const std::string ompOffloadInfoName = "omp_offload.info"; 2476 2477 /// Loads all the offload entries information from the host IR 2478 /// metadata. This function is only meant to be used with device code 2479 /// generation. 2480 /// 2481 /// \param M Module to load Metadata info from. Module passed maybe 2482 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. 2483 void loadOffloadInfoMetadata(Module &M); 2484 2485 /// Gets (if variable with the given name already exist) or creates 2486 /// internal global variable with the specified Name. The created variable has 2487 /// linkage CommonLinkage by default and is initialized by null value. 2488 /// \param Ty Type of the global variable. If it is exist already the type 2489 /// must be the same. 2490 /// \param Name Name of the variable. 2491 GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, 2492 unsigned AddressSpace = 0); 2493 }; 2494 2495 /// Class to represented the control flow structure of an OpenMP canonical loop. 2496 /// 2497 /// The control-flow structure is standardized for easy consumption by 2498 /// directives associated with loops. For instance, the worksharing-loop 2499 /// construct may change this control flow such that each loop iteration is 2500 /// executed on only one thread. The constraints of a canonical loop in brief 2501 /// are: 2502 /// 2503 /// * The number of loop iterations must have been computed before entering the 2504 /// loop. 2505 /// 2506 /// * Has an (unsigned) logical induction variable that starts at zero and 2507 /// increments by one. 2508 /// 2509 /// * The loop's CFG itself has no side-effects. The OpenMP specification 2510 /// itself allows side-effects, but the order in which they happen, including 2511 /// how often or whether at all, is unspecified. We expect that the frontend 2512 /// will emit those side-effect instructions somewhere (e.g. before the loop) 2513 /// such that the CanonicalLoopInfo itself can be side-effect free. 2514 /// 2515 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated 2516 /// execution of a loop body that satifies these constraints. It does NOT 2517 /// represent arbitrary SESE regions that happen to contain a loop. Do not use 2518 /// CanonicalLoopInfo for such purposes. 2519 /// 2520 /// The control flow can be described as follows: 2521 /// 2522 /// Preheader 2523 /// | 2524 /// /-> Header 2525 /// | | 2526 /// | Cond---\ 2527 /// | | | 2528 /// | Body | 2529 /// | | | | 2530 /// | <...> | 2531 /// | | | | 2532 /// \--Latch | 2533 /// | 2534 /// Exit 2535 /// | 2536 /// After 2537 /// 2538 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, 2539 /// including) and end at AfterIP (at the After's first instruction, excluding). 2540 /// That is, instructions in the Preheader and After blocks (except the 2541 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have 2542 /// side-effects. Typically, the Preheader is used to compute the loop's trip 2543 /// count. The instructions from BodyIP (at the Body block's first instruction, 2544 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's 2545 /// control and thus can have side-effects. The body block is the single entry 2546 /// point into the loop body, which may contain arbitrary control flow as long 2547 /// as all control paths eventually branch to the Latch block. 2548 /// 2549 /// TODO: Consider adding another standardized BasicBlock between Body CFG and 2550 /// Latch to guarantee that there is only a single edge to the latch. It would 2551 /// make loop transformations easier to not needing to consider multiple 2552 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us 2553 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that 2554 /// executes after each body iteration. 2555 /// 2556 /// There must be no loop-carried dependencies through llvm::Values. This is 2557 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is 2558 /// for the induction variable. 2559 /// 2560 /// All code in Header, Cond, Latch and Exit (plus the terminator of the 2561 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked 2562 /// by assertOK(). They are expected to not be modified unless explicitly 2563 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP 2564 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, 2565 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its 2566 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used 2567 /// anymore as its underlying control flow may not exist anymore. 2568 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop 2569 /// may also return a new CanonicalLoopInfo that can be passed to other 2570 /// loop-associated construct implementing methods. These loop-transforming 2571 /// methods may either create a new CanonicalLoopInfo usually using 2572 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and 2573 /// modify one of the input CanonicalLoopInfo and return it as representing the 2574 /// modified loop. What is done is an implementation detail of 2575 /// transformation-implementing method and callers should always assume that the 2576 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. 2577 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one 2578 /// created by createCanonicalLoop, such that transforming methods do not have 2579 /// to special case where the CanonicalLoopInfo originated from. 2580 /// 2581 /// Generally, methods consuming CanonicalLoopInfo do not need an 2582 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the 2583 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless 2584 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate 2585 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, 2586 /// any InsertPoint in the Preheader, After or Block can still be used after 2587 /// calling such a method. 2588 /// 2589 /// TODO: Provide mechanisms for exception handling and cancellation points. 2590 /// 2591 /// Defined outside OpenMPIRBuilder because nested classes cannot be 2592 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. 2593 class CanonicalLoopInfo { 2594 friend class OpenMPIRBuilder; 2595 2596 private: 2597 BasicBlock *Header = nullptr; 2598 BasicBlock *Cond = nullptr; 2599 BasicBlock *Latch = nullptr; 2600 BasicBlock *Exit = nullptr; 2601 2602 /// Add the control blocks of this loop to \p BBs. 2603 /// 2604 /// This does not include any block from the body, including the one returned 2605 /// by getBody(). 2606 /// 2607 /// FIXME: This currently includes the Preheader and After blocks even though 2608 /// their content is (mostly) not under CanonicalLoopInfo's control. 2609 /// Re-evaluated whether this makes sense. 2610 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); 2611 2612 /// Sets the number of loop iterations to the given value. This value must be 2613 /// valid in the condition block (i.e., defined in the preheader) and is 2614 /// interpreted as an unsigned integer. 2615 void setTripCount(Value *TripCount); 2616 2617 /// Replace all uses of the canonical induction variable in the loop body with 2618 /// a new one. 2619 /// 2620 /// The intended use case is to update the induction variable for an updated 2621 /// iteration space such that it can stay normalized in the 0...tripcount-1 2622 /// range. 2623 /// 2624 /// The \p Updater is called with the (presumable updated) current normalized 2625 /// induction variable and is expected to return the value that uses of the 2626 /// pre-updated induction values should use instead, typically dependent on 2627 /// the new induction variable. This is a lambda (instead of e.g. just passing 2628 /// the new value) to be able to distinguish the uses of the pre-updated 2629 /// induction variable and uses of the induction varible to compute the 2630 /// updated induction variable value. 2631 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); 2632 2633 public: 2634 /// Returns whether this object currently represents the IR of a loop. If 2635 /// returning false, it may have been consumed by a loop transformation or not 2636 /// been intialized. Do not use in this case; 2637 bool isValid() const { return Header; } 2638 2639 /// The preheader ensures that there is only a single edge entering the loop. 2640 /// Code that must be execute before any loop iteration can be emitted here, 2641 /// such as computing the loop trip count and begin lifetime markers. Code in 2642 /// the preheader is not considered part of the canonical loop. 2643 BasicBlock *getPreheader() const; 2644 2645 /// The header is the entry for each iteration. In the canonical control flow, 2646 /// it only contains the PHINode for the induction variable. 2647 BasicBlock *getHeader() const { 2648 assert(isValid() && "Requires a valid canonical loop"); 2649 return Header; 2650 } 2651 2652 /// The condition block computes whether there is another loop iteration. If 2653 /// yes, branches to the body; otherwise to the exit block. 2654 BasicBlock *getCond() const { 2655 assert(isValid() && "Requires a valid canonical loop"); 2656 return Cond; 2657 } 2658 2659 /// The body block is the single entry for a loop iteration and not controlled 2660 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must 2661 /// eventually branch to the \p Latch block. 2662 BasicBlock *getBody() const { 2663 assert(isValid() && "Requires a valid canonical loop"); 2664 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); 2665 } 2666 2667 /// Reaching the latch indicates the end of the loop body code. In the 2668 /// canonical control flow, it only contains the increment of the induction 2669 /// variable. 2670 BasicBlock *getLatch() const { 2671 assert(isValid() && "Requires a valid canonical loop"); 2672 return Latch; 2673 } 2674 2675 /// Reaching the exit indicates no more iterations are being executed. 2676 BasicBlock *getExit() const { 2677 assert(isValid() && "Requires a valid canonical loop"); 2678 return Exit; 2679 } 2680 2681 /// The after block is intended for clean-up code such as lifetime end 2682 /// markers. It is separate from the exit block to ensure, analogous to the 2683 /// preheader, it having just a single entry edge and being free from PHI 2684 /// nodes should there be multiple loop exits (such as from break 2685 /// statements/cancellations). 2686 BasicBlock *getAfter() const { 2687 assert(isValid() && "Requires a valid canonical loop"); 2688 return Exit->getSingleSuccessor(); 2689 } 2690 2691 /// Returns the llvm::Value containing the number of loop iterations. It must 2692 /// be valid in the preheader and always interpreted as an unsigned integer of 2693 /// any bit-width. 2694 Value *getTripCount() const { 2695 assert(isValid() && "Requires a valid canonical loop"); 2696 Instruction *CmpI = &Cond->front(); 2697 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 2698 return CmpI->getOperand(1); 2699 } 2700 2701 /// Returns the instruction representing the current logical induction 2702 /// variable. Always unsigned, always starting at 0 with an increment of one. 2703 Instruction *getIndVar() const { 2704 assert(isValid() && "Requires a valid canonical loop"); 2705 Instruction *IndVarPHI = &Header->front(); 2706 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); 2707 return IndVarPHI; 2708 } 2709 2710 /// Return the type of the induction variable (and the trip count). 2711 Type *getIndVarType() const { 2712 assert(isValid() && "Requires a valid canonical loop"); 2713 return getIndVar()->getType(); 2714 } 2715 2716 /// Return the insertion point for user code before the loop. 2717 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { 2718 assert(isValid() && "Requires a valid canonical loop"); 2719 BasicBlock *Preheader = getPreheader(); 2720 return {Preheader, std::prev(Preheader->end())}; 2721 }; 2722 2723 /// Return the insertion point for user code in the body. 2724 OpenMPIRBuilder::InsertPointTy getBodyIP() const { 2725 assert(isValid() && "Requires a valid canonical loop"); 2726 BasicBlock *Body = getBody(); 2727 return {Body, Body->begin()}; 2728 }; 2729 2730 /// Return the insertion point for user code after the loop. 2731 OpenMPIRBuilder::InsertPointTy getAfterIP() const { 2732 assert(isValid() && "Requires a valid canonical loop"); 2733 BasicBlock *After = getAfter(); 2734 return {After, After->begin()}; 2735 }; 2736 2737 Function *getFunction() const { 2738 assert(isValid() && "Requires a valid canonical loop"); 2739 return Header->getParent(); 2740 } 2741 2742 /// Consistency self-check. 2743 void assertOK() const; 2744 2745 /// Invalidate this loop. That is, the underlying IR does not fulfill the 2746 /// requirements of an OpenMP canonical loop anymore. 2747 void invalidate(); 2748 }; 2749 2750 } // end namespace llvm 2751 2752 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 2753