1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient 10 // way to create LLVM instructions for OpenMP directives. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 16 17 #include "llvm/Analysis/MemorySSAUpdater.h" 18 #include "llvm/Frontend/OpenMP/OMPConstants.h" 19 #include "llvm/IR/DebugLoc.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/Support/Allocator.h" 22 #include "llvm/TargetParser/Triple.h" 23 #include <forward_list> 24 #include <map> 25 #include <optional> 26 27 namespace llvm { 28 class CanonicalLoopInfo; 29 struct TargetRegionEntryInfo; 30 class OffloadEntriesInfoManager; 31 class OpenMPIRBuilder; 32 33 /// Move the instruction after an InsertPoint to the beginning of another 34 /// BasicBlock. 35 /// 36 /// The instructions after \p IP are moved to the beginning of \p New which must 37 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to 38 /// \p New will be added such that there is no semantic change. Otherwise, the 39 /// \p IP insert block remains degenerate and it is up to the caller to insert a 40 /// terminator. 41 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, 42 bool CreateBranch); 43 44 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new 45 /// insert location will stick to after the instruction before the insertion 46 /// point (instead of moving with the instruction the InsertPoint stores 47 /// internally). 48 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); 49 50 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate 51 /// (missing the terminator). 52 /// 53 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed 54 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch 55 /// is true, a branch to the new successor will new created such that 56 /// semantically there is no change; otherwise the block of the insertion point 57 /// remains degenerate and it is the caller's responsibility to insert a 58 /// terminator. Returns the new successor block. 59 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, 60 llvm::Twine Name = {}); 61 62 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 63 /// degenerate (missing the terminator). Its new insert location will stick to 64 /// after the instruction before the insertion point (instead of moving with the 65 /// instruction the InsertPoint stores internally). 66 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, 67 llvm::Twine Name = {}); 68 69 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 70 /// degenerate (missing the terminator). Its new insert location will stick to 71 /// after the instruction before the insertion point (instead of moving with the 72 /// instruction the InsertPoint stores internally). 73 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); 74 75 /// Like splitBB, but reuses the current block's name for the new name. 76 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, 77 llvm::Twine Suffix = ".split"); 78 79 /// Captures attributes that affect generating LLVM-IR using the 80 /// OpenMPIRBuilder and related classes. Note that not all attributes are 81 /// required for all classes or functions. In some use cases the configuration 82 /// is not necessary at all, because because the only functions that are called 83 /// are ones that are not dependent on the configuration. 84 class OpenMPIRBuilderConfig { 85 public: 86 /// Flag for specifying if the compilation is done for embedded device code 87 /// or host code. 88 std::optional<bool> IsTargetDevice; 89 90 /// Flag for specifying if the compilation is done for an accelerator. 91 std::optional<bool> IsGPU; 92 93 // Flag for specifying if offloading is mandatory. 94 std::optional<bool> OpenMPOffloadMandatory; 95 96 /// First separator used between the initial two parts of a name. 97 std::optional<StringRef> FirstSeparator; 98 /// Separator used between all of the rest consecutive parts of s name 99 std::optional<StringRef> Separator; 100 101 OpenMPIRBuilderConfig(); 102 OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU, 103 bool OpenMPOffloadMandatory, 104 bool HasRequiresReverseOffload, 105 bool HasRequiresUnifiedAddress, 106 bool HasRequiresUnifiedSharedMemory, 107 bool HasRequiresDynamicAllocators); 108 109 // Getters functions that assert if the required values are not present. isTargetDevice()110 bool isTargetDevice() const { 111 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set"); 112 return *IsTargetDevice; 113 } 114 isGPU()115 bool isGPU() const { 116 assert(IsGPU.has_value() && "IsGPU is not set"); 117 return *IsGPU; 118 } 119 openMPOffloadMandatory()120 bool openMPOffloadMandatory() const { 121 assert(OpenMPOffloadMandatory.has_value() && 122 "OpenMPOffloadMandatory is not set"); 123 return *OpenMPOffloadMandatory; 124 } 125 hasRequiresFlags()126 bool hasRequiresFlags() const { return RequiresFlags; } 127 bool hasRequiresReverseOffload() const; 128 bool hasRequiresUnifiedAddress() const; 129 bool hasRequiresUnifiedSharedMemory() const; 130 bool hasRequiresDynamicAllocators() const; 131 132 /// Returns requires directive clauses as flags compatible with those expected 133 /// by libomptarget. 134 int64_t getRequiresFlags() const; 135 136 // Returns the FirstSeparator if set, otherwise use the default separator 137 // depending on isGPU firstSeparator()138 StringRef firstSeparator() const { 139 if (FirstSeparator.has_value()) 140 return *FirstSeparator; 141 if (isGPU()) 142 return "_"; 143 return "."; 144 } 145 146 // Returns the Separator if set, otherwise use the default separator depending 147 // on isGPU separator()148 StringRef separator() const { 149 if (Separator.has_value()) 150 return *Separator; 151 if (isGPU()) 152 return "$"; 153 return "."; 154 } 155 setIsTargetDevice(bool Value)156 void setIsTargetDevice(bool Value) { IsTargetDevice = Value; } setIsGPU(bool Value)157 void setIsGPU(bool Value) { IsGPU = Value; } setOpenMPOffloadMandatory(bool Value)158 void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; } setFirstSeparator(StringRef FS)159 void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } setSeparator(StringRef S)160 void setSeparator(StringRef S) { Separator = S; } 161 162 void setHasRequiresReverseOffload(bool Value); 163 void setHasRequiresUnifiedAddress(bool Value); 164 void setHasRequiresUnifiedSharedMemory(bool Value); 165 void setHasRequiresDynamicAllocators(bool Value); 166 167 private: 168 /// Flags for specifying which requires directive clauses are present. 169 int64_t RequiresFlags; 170 }; 171 172 /// Data structure to contain the information needed to uniquely identify 173 /// a target entry. 174 struct TargetRegionEntryInfo { 175 std::string ParentName; 176 unsigned DeviceID; 177 unsigned FileID; 178 unsigned Line; 179 unsigned Count; 180 TargetRegionEntryInfoTargetRegionEntryInfo181 TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {} 182 TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, 183 unsigned FileID, unsigned Line, unsigned Count = 0) ParentNameTargetRegionEntryInfo184 : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), 185 Count(Count) {} 186 187 static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 188 StringRef ParentName, 189 unsigned DeviceID, unsigned FileID, 190 unsigned Line, unsigned Count); 191 192 bool operator<(const TargetRegionEntryInfo RHS) const { 193 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < 194 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, 195 RHS.Count); 196 } 197 }; 198 199 /// Class that manages information about offload code regions and data 200 class OffloadEntriesInfoManager { 201 /// Number of entries registered so far. 202 OpenMPIRBuilder *OMPBuilder; 203 unsigned OffloadingEntriesNum = 0; 204 205 public: 206 /// Base class of the entries info. 207 class OffloadEntryInfo { 208 public: 209 /// Kind of a given entry. 210 enum OffloadingEntryInfoKinds : unsigned { 211 /// Entry is a target region. 212 OffloadingEntryInfoTargetRegion = 0, 213 /// Entry is a declare target variable. 214 OffloadingEntryInfoDeviceGlobalVar = 1, 215 /// Invalid entry info. 216 OffloadingEntryInfoInvalid = ~0u 217 }; 218 219 protected: 220 OffloadEntryInfo() = delete; OffloadEntryInfo(OffloadingEntryInfoKinds Kind)221 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} OffloadEntryInfo(OffloadingEntryInfoKinds Kind,unsigned Order,uint32_t Flags)222 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, 223 uint32_t Flags) 224 : Flags(Flags), Order(Order), Kind(Kind) {} 225 ~OffloadEntryInfo() = default; 226 227 public: isValid()228 bool isValid() const { return Order != ~0u; } getOrder()229 unsigned getOrder() const { return Order; } getKind()230 OffloadingEntryInfoKinds getKind() const { return Kind; } getFlags()231 uint32_t getFlags() const { return Flags; } setFlags(uint32_t NewFlags)232 void setFlags(uint32_t NewFlags) { Flags = NewFlags; } getAddress()233 Constant *getAddress() const { return cast_or_null<Constant>(Addr); } setAddress(Constant * V)234 void setAddress(Constant *V) { 235 assert(!Addr.pointsToAliveValue() && "Address has been set before!"); 236 Addr = V; 237 } classof(const OffloadEntryInfo * Info)238 static bool classof(const OffloadEntryInfo *Info) { return true; } 239 240 private: 241 /// Address of the entity that has to be mapped for offloading. 242 WeakTrackingVH Addr; 243 244 /// Flags associated with the device global. 245 uint32_t Flags = 0u; 246 247 /// Order this entry was emitted. 248 unsigned Order = ~0u; 249 250 OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; 251 }; 252 253 /// Return true if a there are no entries defined. 254 bool empty() const; 255 /// Return number of entries defined so far. size()256 unsigned size() const { return OffloadingEntriesNum; } 257 OffloadEntriesInfoManager(OpenMPIRBuilder * builder)258 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {} 259 260 // 261 // Target region entries related. 262 // 263 264 /// Kind of the target registry entry. 265 enum OMPTargetRegionEntryKind : uint32_t { 266 /// Mark the entry as target region. 267 OMPTargetRegionEntryTargetRegion = 0x0, 268 }; 269 270 /// Target region entries info. 271 class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { 272 /// Address that can be used as the ID of the entry. 273 Constant *ID = nullptr; 274 275 public: OffloadEntryInfoTargetRegion()276 OffloadEntryInfoTargetRegion() 277 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} OffloadEntryInfoTargetRegion(unsigned Order,Constant * Addr,Constant * ID,OMPTargetRegionEntryKind Flags)278 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, 279 Constant *ID, 280 OMPTargetRegionEntryKind Flags) 281 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), 282 ID(ID) { 283 setAddress(Addr); 284 } 285 getID()286 Constant *getID() const { return ID; } setID(Constant * V)287 void setID(Constant *V) { 288 assert(!ID && "ID has been set before!"); 289 ID = V; 290 } classof(const OffloadEntryInfo * Info)291 static bool classof(const OffloadEntryInfo *Info) { 292 return Info->getKind() == OffloadingEntryInfoTargetRegion; 293 } 294 }; 295 296 /// Initialize target region entry. 297 /// This is ONLY needed for DEVICE compilation. 298 void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, 299 unsigned Order); 300 /// Register target region entry. 301 void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 302 Constant *Addr, Constant *ID, 303 OMPTargetRegionEntryKind Flags); 304 /// Return true if a target region entry with the provided information 305 /// exists. 306 bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 307 bool IgnoreAddressId = false) const; 308 309 // Return the Name based on \a EntryInfo using the next available Count. 310 void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 311 const TargetRegionEntryInfo &EntryInfo); 312 313 /// brief Applies action \a Action on all registered entries. 314 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo, 315 const OffloadEntryInfoTargetRegion &)> 316 OffloadTargetRegionEntryInfoActTy; 317 void 318 actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); 319 320 // 321 // Device global variable entries related. 322 // 323 324 /// Kind of the global variable entry.. 325 enum OMPTargetGlobalVarEntryKind : uint32_t { 326 /// Mark the entry as a to declare target. 327 OMPTargetGlobalVarEntryTo = 0x0, 328 /// Mark the entry as a to declare target link. 329 OMPTargetGlobalVarEntryLink = 0x1, 330 /// Mark the entry as a declare target enter. 331 OMPTargetGlobalVarEntryEnter = 0x2, 332 /// Mark the entry as having no declare target entry kind. 333 OMPTargetGlobalVarEntryNone = 0x3, 334 /// Mark the entry as a declare target indirect global. 335 OMPTargetGlobalVarEntryIndirect = 0x8, 336 }; 337 338 /// Kind of device clause for declare target variables 339 /// and functions 340 /// NOTE: Currently not used as a part of a variable entry 341 /// used for Flang and Clang to interface with the variable 342 /// related registration functions 343 enum OMPTargetDeviceClauseKind : uint32_t { 344 /// The target is marked for all devices 345 OMPTargetDeviceClauseAny = 0x0, 346 /// The target is marked for non-host devices 347 OMPTargetDeviceClauseNoHost = 0x1, 348 /// The target is marked for host devices 349 OMPTargetDeviceClauseHost = 0x2, 350 /// The target is marked as having no clause 351 OMPTargetDeviceClauseNone = 0x3 352 }; 353 354 /// Device global variable entries info. 355 class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { 356 /// Type of the global variable. 357 int64_t VarSize; 358 GlobalValue::LinkageTypes Linkage; 359 const std::string VarName; 360 361 public: OffloadEntryInfoDeviceGlobalVar()362 OffloadEntryInfoDeviceGlobalVar() 363 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} OffloadEntryInfoDeviceGlobalVar(unsigned Order,OMPTargetGlobalVarEntryKind Flags)364 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, 365 OMPTargetGlobalVarEntryKind Flags) 366 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} OffloadEntryInfoDeviceGlobalVar(unsigned Order,Constant * Addr,int64_t VarSize,OMPTargetGlobalVarEntryKind Flags,GlobalValue::LinkageTypes Linkage,const std::string & VarName)367 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, 368 int64_t VarSize, 369 OMPTargetGlobalVarEntryKind Flags, 370 GlobalValue::LinkageTypes Linkage, 371 const std::string &VarName) 372 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), 373 VarSize(VarSize), Linkage(Linkage), VarName(VarName) { 374 setAddress(Addr); 375 } 376 getVarSize()377 int64_t getVarSize() const { return VarSize; } getVarName()378 StringRef getVarName() const { return VarName; } setVarSize(int64_t Size)379 void setVarSize(int64_t Size) { VarSize = Size; } getLinkage()380 GlobalValue::LinkageTypes getLinkage() const { return Linkage; } setLinkage(GlobalValue::LinkageTypes LT)381 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } classof(const OffloadEntryInfo * Info)382 static bool classof(const OffloadEntryInfo *Info) { 383 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; 384 } 385 }; 386 387 /// Initialize device global variable entry. 388 /// This is ONLY used for DEVICE compilation. 389 void initializeDeviceGlobalVarEntryInfo(StringRef Name, 390 OMPTargetGlobalVarEntryKind Flags, 391 unsigned Order); 392 393 /// Register device global variable entry. 394 void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, 395 int64_t VarSize, 396 OMPTargetGlobalVarEntryKind Flags, 397 GlobalValue::LinkageTypes Linkage); 398 /// Checks if the variable with the given name has been registered already. hasDeviceGlobalVarEntryInfo(StringRef VarName)399 bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { 400 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; 401 } 402 /// Applies action \a Action on all registered entries. 403 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> 404 OffloadDeviceGlobalVarEntryInfoActTy; 405 void actOnDeviceGlobalVarEntriesInfo( 406 const OffloadDeviceGlobalVarEntryInfoActTy &Action); 407 408 private: 409 /// Return the count of entries at a particular source location. 410 unsigned 411 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; 412 413 /// Update the count of entries at a particular source location. 414 void 415 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); 416 417 static TargetRegionEntryInfo getTargetRegionEntryCountKey(const TargetRegionEntryInfo & EntryInfo)418 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { 419 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, 420 EntryInfo.FileID, EntryInfo.Line, 0); 421 } 422 423 // Count of entries at a location. 424 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount; 425 426 // Storage for target region entries kind. 427 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion> 428 OffloadEntriesTargetRegionTy; 429 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; 430 /// Storage for device global variable entries kind. The storage is to be 431 /// indexed by mangled name. 432 typedef StringMap<OffloadEntryInfoDeviceGlobalVar> 433 OffloadEntriesDeviceGlobalVarTy; 434 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; 435 }; 436 437 /// An interface to create LLVM-IR for OpenMP directives. 438 /// 439 /// Each OpenMP directive has a corresponding public generator method. 440 class OpenMPIRBuilder { 441 public: 442 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will 443 /// not have an effect on \p M (see initialize) OpenMPIRBuilder(Module & M)444 OpenMPIRBuilder(Module &M) 445 : M(M), Builder(M.getContext()), OffloadInfoManager(this), 446 T(Triple(M.getTargetTriple())) {} 447 ~OpenMPIRBuilder(); 448 449 /// Initialize the internal state, this will put structures types and 450 /// potentially other helpers into the underlying module. Must be called 451 /// before any other method and only once! This internal state includes types 452 /// used in the OpenMPIRBuilder generated from OMPKinds.def. 453 void initialize(); 454 setConfig(OpenMPIRBuilderConfig C)455 void setConfig(OpenMPIRBuilderConfig C) { Config = C; } 456 457 /// Finalize the underlying module, e.g., by outlining regions. 458 /// \param Fn The function to be finalized. If not used, 459 /// all functions are finalized. 460 void finalize(Function *Fn = nullptr); 461 462 /// Add attributes known for \p FnID to \p Fn. 463 void addAttributes(omp::RuntimeFunction FnID, Function &Fn); 464 465 /// Type used throughout for insertion points. 466 using InsertPointTy = IRBuilder<>::InsertPoint; 467 468 /// Get the create a name using the platform specific separators. 469 /// \param Parts parts of the final name that needs separation 470 /// The created name has a first separator between the first and second part 471 /// and a second separator between all other parts. 472 /// E.g. with FirstSeparator "$" and Separator "." and 473 /// parts: "p1", "p2", "p3", "p4" 474 /// The resulting name is "p1$p2.p3.p4" 475 /// The separators are retrieved from the OpenMPIRBuilderConfig. 476 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const; 477 478 /// Callback type for variable finalization (think destructors). 479 /// 480 /// \param CodeGenIP is the insertion point at which the finalization code 481 /// should be placed. 482 /// 483 /// A finalize callback knows about all objects that need finalization, e.g. 484 /// destruction, when the scope of the currently generated construct is left 485 /// at the time, and location, the callback is invoked. 486 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; 487 488 struct FinalizationInfo { 489 /// The finalization callback provided by the last in-flight invocation of 490 /// createXXXX for the directive of kind DK. 491 FinalizeCallbackTy FiniCB; 492 493 /// The directive kind of the innermost directive that has an associated 494 /// region which might require finalization when it is left. 495 omp::Directive DK; 496 497 /// Flag to indicate if the directive is cancellable. 498 bool IsCancellable; 499 }; 500 501 /// Push a finalization callback on the finalization stack. 502 /// 503 /// NOTE: Temporary solution until Clang CG is gone. pushFinalizationCB(const FinalizationInfo & FI)504 void pushFinalizationCB(const FinalizationInfo &FI) { 505 FinalizationStack.push_back(FI); 506 } 507 508 /// Pop the last finalization callback from the finalization stack. 509 /// 510 /// NOTE: Temporary solution until Clang CG is gone. popFinalizationCB()511 void popFinalizationCB() { FinalizationStack.pop_back(); } 512 513 /// Callback type for body (=inner region) code generation 514 /// 515 /// The callback takes code locations as arguments, each describing a 516 /// location where additional instructions can be inserted. 517 /// 518 /// The CodeGenIP may be in the middle of a basic block or point to the end of 519 /// it. The basic block may have a terminator or be degenerate. The callback 520 /// function may just insert instructions at that position, but also split the 521 /// block (without the Before argument of BasicBlock::splitBasicBlock such 522 /// that the identify of the split predecessor block is preserved) and insert 523 /// additional control flow, including branches that do not lead back to what 524 /// follows the CodeGenIP. Note that since the callback is allowed to split 525 /// the block, callers must assume that InsertPoints to positions in the 526 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If 527 /// such InsertPoints need to be preserved, it can split the block itself 528 /// before calling the callback. 529 /// 530 /// AllocaIP and CodeGenIP must not point to the same position. 531 /// 532 /// \param AllocaIP is the insertion point at which new alloca instructions 533 /// should be placed. The BasicBlock it is pointing to must 534 /// not be split. 535 /// \param CodeGenIP is the insertion point at which the body code should be 536 /// placed. 537 using BodyGenCallbackTy = 538 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 539 540 // This is created primarily for sections construct as llvm::function_ref 541 // (BodyGenCallbackTy) is not storable (as described in the comments of 542 // function_ref class - function_ref contains non-ownable reference 543 // to the callable. 544 using StorableBodyGenCallbackTy = 545 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 546 547 /// Callback type for loop body code generation. 548 /// 549 /// \param CodeGenIP is the insertion point where the loop's body code must be 550 /// placed. This will be a dedicated BasicBlock with a 551 /// conditional branch from the loop condition check and 552 /// terminated with an unconditional branch to the loop 553 /// latch. 554 /// \param IndVar is the induction variable usable at the insertion point. 555 using LoopBodyGenCallbackTy = 556 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; 557 558 /// Callback type for variable privatization (think copy & default 559 /// constructor). 560 /// 561 /// \param AllocaIP is the insertion point at which new alloca instructions 562 /// should be placed. 563 /// \param CodeGenIP is the insertion point at which the privatization code 564 /// should be placed. 565 /// \param Original The value being copied/created, should not be used in the 566 /// generated IR. 567 /// \param Inner The equivalent of \p Original that should be used in the 568 /// generated IR; this is equal to \p Original if the value is 569 /// a pointer and can thus be passed directly, otherwise it is 570 /// an equivalent but different value. 571 /// \param ReplVal The replacement value, thus a copy or new created version 572 /// of \p Inner. 573 /// 574 /// \returns The new insertion point where code generation continues and 575 /// \p ReplVal the replacement value. 576 using PrivatizeCallbackTy = function_ref<InsertPointTy( 577 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, 578 Value &Inner, Value *&ReplVal)>; 579 580 /// Description of a LLVM-IR insertion point (IP) and a debug/source location 581 /// (filename, line, column, ...). 582 struct LocationDescription { LocationDescriptionLocationDescription583 LocationDescription(const IRBuilderBase &IRB) 584 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} LocationDescriptionLocationDescription585 LocationDescription(const InsertPointTy &IP) : IP(IP) {} LocationDescriptionLocationDescription586 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) 587 : IP(IP), DL(DL) {} 588 InsertPointTy IP; 589 DebugLoc DL; 590 }; 591 592 /// Emitter methods for OpenMP directives. 593 /// 594 ///{ 595 596 /// Generator for '#omp barrier' 597 /// 598 /// \param Loc The location where the barrier directive was encountered. 599 /// \param DK The kind of directive that caused the barrier. 600 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 601 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 602 /// should be checked and acted upon. 603 /// 604 /// \returns The insertion point after the barrier. 605 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, 606 bool ForceSimpleCall = false, 607 bool CheckCancelFlag = true); 608 609 /// Generator for '#omp cancel' 610 /// 611 /// \param Loc The location where the directive was encountered. 612 /// \param IfCondition The evaluated 'if' clause expression, if any. 613 /// \param CanceledDirective The kind of directive that is cancled. 614 /// 615 /// \returns The insertion point after the barrier. 616 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, 617 omp::Directive CanceledDirective); 618 619 /// Generator for '#omp parallel' 620 /// 621 /// \param Loc The insert and source location description. 622 /// \param AllocaIP The insertion points to be used for alloca instructions. 623 /// \param BodyGenCB Callback that will generate the region code. 624 /// \param PrivCB Callback to copy a given variable (think copy constructor). 625 /// \param FiniCB Callback to finalize variable copies. 626 /// \param IfCondition The evaluated 'if' clause expression, if any. 627 /// \param NumThreads The evaluated 'num_threads' clause expression, if any. 628 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). 629 /// \param IsCancellable Flag to indicate a cancellable parallel region. 630 /// 631 /// \returns The insertion position *after* the parallel. 632 IRBuilder<>::InsertPoint 633 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, 634 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 635 FinalizeCallbackTy FiniCB, Value *IfCondition, 636 Value *NumThreads, omp::ProcBindKind ProcBind, 637 bool IsCancellable); 638 639 /// Generator for the control flow structure of an OpenMP canonical loop. 640 /// 641 /// This generator operates on the logical iteration space of the loop, i.e. 642 /// the caller only has to provide a loop trip count of the loop as defined by 643 /// base language semantics. The trip count is interpreted as an unsigned 644 /// integer. The induction variable passed to \p BodyGenCB will be of the same 645 /// type and run from 0 to \p TripCount - 1. It is up to the callback to 646 /// convert the logical iteration variable to the loop counter variable in the 647 /// loop body. 648 /// 649 /// \param Loc The insert and source location description. The insert 650 /// location can be between two instructions or the end of a 651 /// degenerate block (e.g. a BB under construction). 652 /// \param BodyGenCB Callback that will generate the loop body code. 653 /// \param TripCount Number of iterations the loop body is executed. 654 /// \param Name Base name used to derive BB and instruction names. 655 /// 656 /// \returns An object representing the created control flow structure which 657 /// can be used for loop-associated directives. 658 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 659 LoopBodyGenCallbackTy BodyGenCB, 660 Value *TripCount, 661 const Twine &Name = "loop"); 662 663 /// Generator for the control flow structure of an OpenMP canonical loop. 664 /// 665 /// Instead of a logical iteration space, this allows specifying user-defined 666 /// loop counter values using increment, upper- and lower bounds. To 667 /// disambiguate the terminology when counting downwards, instead of lower 668 /// bounds we use \p Start for the loop counter value in the first body 669 /// iteration. 670 /// 671 /// Consider the following limitations: 672 /// 673 /// * A loop counter space over all integer values of its bit-width cannot be 674 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be 675 /// stored into an 8 bit integer): 676 /// 677 /// DO I = 0, 255, 1 678 /// 679 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. 680 /// effectively counting downwards: 681 /// 682 /// for (uint8_t i = 100u; i > 0; i += 127u) 683 /// 684 /// 685 /// TODO: May need to add additional parameters to represent: 686 /// 687 /// * Allow representing downcounting with unsigned integers. 688 /// 689 /// * Sign of the step and the comparison operator might disagree: 690 /// 691 /// for (int i = 0; i < 42; i -= 1u) 692 /// 693 // 694 /// \param Loc The insert and source location description. 695 /// \param BodyGenCB Callback that will generate the loop body code. 696 /// \param Start Value of the loop counter for the first iterations. 697 /// \param Stop Loop counter values past this will stop the loop. 698 /// \param Step Loop counter increment after each iteration; negative 699 /// means counting down. 700 /// \param IsSigned Whether Start, Stop and Step are signed integers. 701 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop 702 /// counter. 703 /// \param ComputeIP Insertion point for instructions computing the trip 704 /// count. Can be used to ensure the trip count is available 705 /// at the outermost loop of a loop nest. If not set, 706 /// defaults to the preheader of the generated loop. 707 /// \param Name Base name used to derive BB and instruction names. 708 /// 709 /// \returns An object representing the created control flow structure which 710 /// can be used for loop-associated directives. 711 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 712 LoopBodyGenCallbackTy BodyGenCB, 713 Value *Start, Value *Stop, Value *Step, 714 bool IsSigned, bool InclusiveStop, 715 InsertPointTy ComputeIP = {}, 716 const Twine &Name = "loop"); 717 718 /// Collapse a loop nest into a single loop. 719 /// 720 /// Merges loops of a loop nest into a single CanonicalLoopNest representation 721 /// that has the same number of innermost loop iterations as the origin loop 722 /// nest. The induction variables of the input loops are derived from the 723 /// collapsed loop's induction variable. This is intended to be used to 724 /// implement OpenMP's collapse clause. Before applying a directive, 725 /// collapseLoops normalizes a loop nest to contain only a single loop and the 726 /// directive's implementation does not need to handle multiple loops itself. 727 /// This does not remove the need to handle all loop nest handling by 728 /// directives, such as the ordered(<n>) clause or the simd schedule-clause 729 /// modifier of the worksharing-loop directive. 730 /// 731 /// Example: 732 /// \code 733 /// for (int i = 0; i < 7; ++i) // Canonical loop "i" 734 /// for (int j = 0; j < 9; ++j) // Canonical loop "j" 735 /// body(i, j); 736 /// \endcode 737 /// 738 /// After collapsing with Loops={i,j}, the loop is changed to 739 /// \code 740 /// for (int ij = 0; ij < 63; ++ij) { 741 /// int i = ij / 9; 742 /// int j = ij % 9; 743 /// body(i, j); 744 /// } 745 /// \endcode 746 /// 747 /// In the current implementation, the following limitations apply: 748 /// 749 /// * All input loops have an induction variable of the same type. 750 /// 751 /// * The collapsed loop will have the same trip count integer type as the 752 /// input loops. Therefore it is possible that the collapsed loop cannot 753 /// represent all iterations of the input loops. For instance, assuming a 754 /// 32 bit integer type, and two input loops both iterating 2^16 times, the 755 /// theoretical trip count of the collapsed loop would be 2^32 iteration, 756 /// which cannot be represented in an 32-bit integer. Behavior is undefined 757 /// in this case. 758 /// 759 /// * The trip counts of every input loop must be available at \p ComputeIP. 760 /// Non-rectangular loops are not yet supported. 761 /// 762 /// * At each nest level, code between a surrounding loop and its nested loop 763 /// is hoisted into the loop body, and such code will be executed more 764 /// often than before collapsing (or not at all if any inner loop iteration 765 /// has a trip count of 0). This is permitted by the OpenMP specification. 766 /// 767 /// \param DL Debug location for instructions added for collapsing, 768 /// such as instructions to compute/derive the input loop's 769 /// induction variables. 770 /// \param Loops Loops in the loop nest to collapse. Loops are specified 771 /// from outermost-to-innermost and every control flow of a 772 /// loop's body must pass through its directly nested loop. 773 /// \param ComputeIP Where additional instruction that compute the collapsed 774 /// trip count. If not set, defaults to before the generated 775 /// loop. 776 /// 777 /// \returns The CanonicalLoopInfo object representing the collapsed loop. 778 CanonicalLoopInfo *collapseLoops(DebugLoc DL, 779 ArrayRef<CanonicalLoopInfo *> Loops, 780 InsertPointTy ComputeIP); 781 782 /// Get the default alignment value for given target 783 /// 784 /// \param TargetTriple Target triple 785 /// \param Features StringMap which describes extra CPU features 786 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, 787 const StringMap<bool> &Features); 788 789 /// Retrieve (or create if non-existent) the address of a declare 790 /// target variable, used in conjunction with registerTargetGlobalVariable 791 /// to create declare target global variables. 792 /// 793 /// \param CaptureClause - enumerator corresponding to the OpenMP capture 794 /// clause used in conjunction with the variable being registered (link, 795 /// to, enter). 796 /// \param DeviceClause - enumerator corresponding to the OpenMP capture 797 /// clause used in conjunction with the variable being registered (nohost, 798 /// host, any) 799 /// \param IsDeclaration - boolean stating if the variable being registered 800 /// is a declaration-only and not a definition 801 /// \param IsExternallyVisible - boolean stating if the variable is externally 802 /// visible 803 /// \param EntryInfo - Unique entry information for the value generated 804 /// using getTargetEntryUniqueInfo, used to name generated pointer references 805 /// to the declare target variable 806 /// \param MangledName - the mangled name of the variable being registered 807 /// \param GeneratedRefs - references generated by invocations of 808 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar, 809 /// these are required by Clang for book keeping. 810 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled 811 /// \param TargetTriple - The OpenMP device target triple we are compiling 812 /// for 813 /// \param LlvmPtrTy - The type of the variable we are generating or 814 /// retrieving an address for 815 /// \param GlobalInitializer - a lambda function which creates a constant 816 /// used for initializing a pointer reference to the variable in certain 817 /// cases. If a nullptr is passed, it will default to utilising the original 818 /// variable to initialize the pointer reference. 819 /// \param VariableLinkage - a lambda function which returns the variables 820 /// linkage type, if unspecified and a nullptr is given, it will instead 821 /// utilise the linkage stored on the existing global variable in the 822 /// LLVMModule. 823 Constant *getAddrOfDeclareTargetVar( 824 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, 825 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, 826 bool IsDeclaration, bool IsExternallyVisible, 827 TargetRegionEntryInfo EntryInfo, StringRef MangledName, 828 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, 829 std::vector<Triple> TargetTriple, Type *LlvmPtrTy, 830 std::function<Constant *()> GlobalInitializer, 831 std::function<GlobalValue::LinkageTypes()> VariableLinkage); 832 833 /// Registers a target variable for device or host. 834 /// 835 /// \param CaptureClause - enumerator corresponding to the OpenMP capture 836 /// clause used in conjunction with the variable being registered (link, 837 /// to, enter). 838 /// \param DeviceClause - enumerator corresponding to the OpenMP capture 839 /// clause used in conjunction with the variable being registered (nohost, 840 /// host, any) 841 /// \param IsDeclaration - boolean stating if the variable being registered 842 /// is a declaration-only and not a definition 843 /// \param IsExternallyVisible - boolean stating if the variable is externally 844 /// visible 845 /// \param EntryInfo - Unique entry information for the value generated 846 /// using getTargetEntryUniqueInfo, used to name generated pointer references 847 /// to the declare target variable 848 /// \param MangledName - the mangled name of the variable being registered 849 /// \param GeneratedRefs - references generated by invocations of 850 /// registerTargetGlobalVariable these are required by Clang for book 851 /// keeping. 852 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled 853 /// \param TargetTriple - The OpenMP device target triple we are compiling 854 /// for 855 /// \param GlobalInitializer - a lambda function which creates a constant 856 /// used for initializing a pointer reference to the variable in certain 857 /// cases. If a nullptr is passed, it will default to utilising the original 858 /// variable to initialize the pointer reference. 859 /// \param VariableLinkage - a lambda function which returns the variables 860 /// linkage type, if unspecified and a nullptr is given, it will instead 861 /// utilise the linkage stored on the existing global variable in the 862 /// LLVMModule. 863 /// \param LlvmPtrTy - The type of the variable we are generating or 864 /// retrieving an address for 865 /// \param Addr - the original llvm value (addr) of the variable to be 866 /// registered 867 void registerTargetGlobalVariable( 868 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, 869 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, 870 bool IsDeclaration, bool IsExternallyVisible, 871 TargetRegionEntryInfo EntryInfo, StringRef MangledName, 872 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, 873 std::vector<Triple> TargetTriple, 874 std::function<Constant *()> GlobalInitializer, 875 std::function<GlobalValue::LinkageTypes()> VariableLinkage, 876 Type *LlvmPtrTy, Constant *Addr); 877 878 /// Get the offset of the OMP_MAP_MEMBER_OF field. 879 unsigned getFlagMemberOffset(); 880 881 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on 882 /// the position given. 883 /// \param Position - A value indicating the position of the parent 884 /// of the member in the kernel argument structure, often retrieved 885 /// by the parents position in the combined information vectors used 886 /// to generate the structure itself. Multiple children (member's of) 887 /// with the same parent will use the same returned member flag. 888 omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position); 889 890 /// Given an initial flag set, this function modifies it to contain 891 /// the passed in MemberOfFlag generated from the getMemberOfFlag 892 /// function. The results are dependent on the existing flag bits 893 /// set in the original flag set. 894 /// \param Flags - The original set of flags to be modified with the 895 /// passed in MemberOfFlag. 896 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted 897 /// slightly based on the getMemberOfFlag which adjusts the flag bits 898 /// based on the members position in its parent. 899 void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, 900 omp::OpenMPOffloadMappingFlags MemberOfFlag); 901 902 private: 903 /// Modifies the canonical loop to be a statically-scheduled workshare loop 904 /// which is executed on the device 905 /// 906 /// This takes a \p CLI representing a canonical loop, such as the one 907 /// created by \see createCanonicalLoop and emits additional instructions to 908 /// turn it into a workshare loop. In particular, it calls to an OpenMP 909 /// runtime function in the preheader to call OpenMP device rtl function 910 /// which handles worksharing of loop body interations. 911 /// 912 /// \param DL Debug location for instructions added for the 913 /// workshare-loop construct itself. 914 /// \param CLI A descriptor of the canonical loop to workshare. 915 /// \param AllocaIP An insertion point for Alloca instructions usable in the 916 /// preheader of the loop. 917 /// \param LoopType Information about type of loop worksharing. 918 /// It corresponds to type of loop workshare OpenMP pragma. 919 /// 920 /// \returns Point where to insert code after the workshare construct. 921 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI, 922 InsertPointTy AllocaIP, 923 omp::WorksharingLoopType LoopType); 924 925 /// Modifies the canonical loop to be a statically-scheduled workshare loop. 926 /// 927 /// This takes a \p LoopInfo representing a canonical loop, such as the one 928 /// created by \p createCanonicalLoop and emits additional instructions to 929 /// turn it into a workshare loop. In particular, it calls to an OpenMP 930 /// runtime function in the preheader to obtain the loop bounds to be used in 931 /// the current thread, updates the relevant instructions in the canonical 932 /// loop and calls to an OpenMP runtime finalization function after the loop. 933 /// 934 /// \param DL Debug location for instructions added for the 935 /// workshare-loop construct itself. 936 /// \param CLI A descriptor of the canonical loop to workshare. 937 /// \param AllocaIP An insertion point for Alloca instructions usable in the 938 /// preheader of the loop. 939 /// \param NeedsBarrier Indicates whether a barrier must be inserted after 940 /// the loop. 941 /// 942 /// \returns Point where to insert code after the workshare construct. 943 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 944 InsertPointTy AllocaIP, 945 bool NeedsBarrier); 946 947 /// Modifies the canonical loop a statically-scheduled workshare loop with a 948 /// user-specified chunk size. 949 /// 950 /// \param DL Debug location for instructions added for the 951 /// workshare-loop construct itself. 952 /// \param CLI A descriptor of the canonical loop to workshare. 953 /// \param AllocaIP An insertion point for Alloca instructions usable in 954 /// the preheader of the loop. 955 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the 956 /// loop. 957 /// \param ChunkSize The user-specified chunk size. 958 /// 959 /// \returns Point where to insert code after the workshare construct. 960 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, 961 CanonicalLoopInfo *CLI, 962 InsertPointTy AllocaIP, 963 bool NeedsBarrier, 964 Value *ChunkSize); 965 966 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. 967 /// 968 /// This takes a \p LoopInfo representing a canonical loop, such as the one 969 /// created by \p createCanonicalLoop and emits additional instructions to 970 /// turn it into a workshare loop. In particular, it calls to an OpenMP 971 /// runtime function in the preheader to obtain, and then in each iteration 972 /// to update the loop counter. 973 /// 974 /// \param DL Debug location for instructions added for the 975 /// workshare-loop construct itself. 976 /// \param CLI A descriptor of the canonical loop to workshare. 977 /// \param AllocaIP An insertion point for Alloca instructions usable in the 978 /// preheader of the loop. 979 /// \param SchedType Type of scheduling to be passed to the init function. 980 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 981 /// the loop. 982 /// \param Chunk The size of loop chunk considered as a unit when 983 /// scheduling. If \p nullptr, defaults to 1. 984 /// 985 /// \returns Point where to insert code after the workshare construct. 986 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 987 InsertPointTy AllocaIP, 988 omp::OMPScheduleType SchedType, 989 bool NeedsBarrier, 990 Value *Chunk = nullptr); 991 992 /// Create alternative version of the loop to support if clause 993 /// 994 /// OpenMP if clause can require to generate second loop. This loop 995 /// will be executed when if clause condition is not met. createIfVersion 996 /// adds branch instruction to the copied loop if \p ifCond is not met. 997 /// 998 /// \param Loop Original loop which should be versioned. 999 /// \param IfCond Value which corresponds to if clause condition 1000 /// \param VMap Value to value map to define relation between 1001 /// original and copied loop values and loop blocks. 1002 /// \param NamePrefix Optional name prefix for if.then if.else blocks. 1003 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, 1004 ValueToValueMapTy &VMap, const Twine &NamePrefix = ""); 1005 1006 public: 1007 /// Modifies the canonical loop to be a workshare loop. 1008 /// 1009 /// This takes a \p LoopInfo representing a canonical loop, such as the one 1010 /// created by \p createCanonicalLoop and emits additional instructions to 1011 /// turn it into a workshare loop. In particular, it calls to an OpenMP 1012 /// runtime function in the preheader to obtain the loop bounds to be used in 1013 /// the current thread, updates the relevant instructions in the canonical 1014 /// loop and calls to an OpenMP runtime finalization function after the loop. 1015 /// 1016 /// The concrete transformation is done by applyStaticWorkshareLoop, 1017 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending 1018 /// on the value of \p SchedKind and \p ChunkSize. 1019 /// 1020 /// \param DL Debug location for instructions added for the 1021 /// workshare-loop construct itself. 1022 /// \param CLI A descriptor of the canonical loop to workshare. 1023 /// \param AllocaIP An insertion point for Alloca instructions usable in the 1024 /// preheader of the loop. 1025 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 1026 /// the loop. 1027 /// \param SchedKind Scheduling algorithm to use. 1028 /// \param ChunkSize The chunk size for the inner loop. 1029 /// \param HasSimdModifier Whether the simd modifier is present in the 1030 /// schedule clause. 1031 /// \param HasMonotonicModifier Whether the monotonic modifier is present in 1032 /// the schedule clause. 1033 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is 1034 /// present in the schedule clause. 1035 /// \param HasOrderedClause Whether the (parameterless) ordered clause is 1036 /// present. 1037 /// \param LoopType Information about type of loop worksharing. 1038 /// It corresponds to type of loop workshare OpenMP pragma. 1039 /// 1040 /// \returns Point where to insert code after the workshare construct. 1041 InsertPointTy applyWorkshareLoop( 1042 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, 1043 bool NeedsBarrier, 1044 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, 1045 Value *ChunkSize = nullptr, bool HasSimdModifier = false, 1046 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, 1047 bool HasOrderedClause = false, 1048 omp::WorksharingLoopType LoopType = 1049 omp::WorksharingLoopType::ForStaticLoop); 1050 1051 /// Tile a loop nest. 1052 /// 1053 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in 1054 /// \p/ Loops must be perfectly nested, from outermost to innermost loop 1055 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value 1056 /// of every loop and every tile sizes must be usable in the outermost 1057 /// loop's preheader. This implies that the loop nest is rectangular. 1058 /// 1059 /// Example: 1060 /// \code 1061 /// for (int i = 0; i < 15; ++i) // Canonical loop "i" 1062 /// for (int j = 0; j < 14; ++j) // Canonical loop "j" 1063 /// body(i, j); 1064 /// \endcode 1065 /// 1066 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to 1067 /// \code 1068 /// for (int i1 = 0; i1 < 3; ++i1) 1069 /// for (int j1 = 0; j1 < 2; ++j1) 1070 /// for (int i2 = 0; i2 < 5; ++i2) 1071 /// for (int j2 = 0; j2 < 7; ++j2) 1072 /// body(i1*3+i2, j1*3+j2); 1073 /// \endcode 1074 /// 1075 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are 1076 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also 1077 /// handles non-constant trip counts, non-constant tile sizes and trip counts 1078 /// that are not multiples of the tile size. In the latter case the tile loop 1079 /// of the last floor-loop iteration will have fewer iterations than specified 1080 /// as its tile size. 1081 /// 1082 /// 1083 /// @param DL Debug location for instructions added by tiling, for 1084 /// instance the floor- and tile trip count computation. 1085 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are 1086 /// invalidated by this method, i.e. should not used after 1087 /// tiling. 1088 /// @param TileSizes For each loop in \p Loops, the tile size for that 1089 /// dimensions. 1090 /// 1091 /// \returns A list of generated loops. Contains twice as many loops as the 1092 /// input loop nest; the first half are the floor loops and the 1093 /// second half are the tile loops. 1094 std::vector<CanonicalLoopInfo *> 1095 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1096 ArrayRef<Value *> TileSizes); 1097 1098 /// Fully unroll a loop. 1099 /// 1100 /// Instead of unrolling the loop immediately (and duplicating its body 1101 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop 1102 /// metadata. 1103 /// 1104 /// \param DL Debug location for instructions added by unrolling. 1105 /// \param Loop The loop to unroll. The loop will be invalidated. 1106 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); 1107 1108 /// Fully or partially unroll a loop. How the loop is unrolled is determined 1109 /// using LLVM's LoopUnrollPass. 1110 /// 1111 /// \param DL Debug location for instructions added by unrolling. 1112 /// \param Loop The loop to unroll. The loop will be invalidated. 1113 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); 1114 1115 /// Partially unroll a loop. 1116 /// 1117 /// The CanonicalLoopInfo of the unrolled loop for use with chained 1118 /// loop-associated directive can be requested using \p UnrolledCLI. Not 1119 /// needing the CanonicalLoopInfo allows more efficient code generation by 1120 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. 1121 /// A loop-associated directive applied to the unrolled loop needs to know the 1122 /// new trip count which means that if using a heuristically determined unroll 1123 /// factor (\p Factor == 0), that factor must be computed immediately. We are 1124 /// using the same logic as the LoopUnrollPass to derived the unroll factor, 1125 /// but which assumes that some canonicalization has taken place (e.g. 1126 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform 1127 /// better when the unrolled loop's CanonicalLoopInfo is not needed. 1128 /// 1129 /// \param DL Debug location for instructions added by unrolling. 1130 /// \param Loop The loop to unroll. The loop will be invalidated. 1131 /// \param Factor The factor to unroll the loop by. A factor of 0 1132 /// indicates that a heuristic should be used to determine 1133 /// the unroll-factor. 1134 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the 1135 /// partially unrolled loop. Otherwise, uses loop metadata 1136 /// to defer unrolling to the LoopUnrollPass. 1137 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, 1138 CanonicalLoopInfo **UnrolledCLI); 1139 1140 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop 1141 /// is cloned. The metadata which prevents vectorization is added to 1142 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated 1143 /// to false. 1144 /// 1145 /// \param Loop The loop to simd-ize. 1146 /// \param AlignedVars The map which containts pairs of the pointer 1147 /// and its corresponding alignment. 1148 /// \param IfCond The value which corresponds to the if clause 1149 /// condition. 1150 /// \param Order The enum to map order clause. 1151 /// \param Simdlen The Simdlen length to apply to the simd loop. 1152 /// \param Safelen The Safelen length to apply to the simd loop. 1153 void applySimd(CanonicalLoopInfo *Loop, 1154 MapVector<Value *, Value *> AlignedVars, Value *IfCond, 1155 omp::OrderKind Order, ConstantInt *Simdlen, 1156 ConstantInt *Safelen); 1157 1158 /// Generator for '#omp flush' 1159 /// 1160 /// \param Loc The location where the flush directive was encountered 1161 void createFlush(const LocationDescription &Loc); 1162 1163 /// Generator for '#omp taskwait' 1164 /// 1165 /// \param Loc The location where the taskwait directive was encountered. 1166 void createTaskwait(const LocationDescription &Loc); 1167 1168 /// Generator for '#omp taskyield' 1169 /// 1170 /// \param Loc The location where the taskyield directive was encountered. 1171 void createTaskyield(const LocationDescription &Loc); 1172 1173 /// A struct to pack the relevant information for an OpenMP depend clause. 1174 struct DependData { 1175 omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; 1176 Type *DepValueType; 1177 Value *DepVal; 1178 explicit DependData() = default; DependDataDependData1179 DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, 1180 Value *DepVal) 1181 : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} 1182 }; 1183 1184 /// Generator for `#omp task` 1185 /// 1186 /// \param Loc The location where the task construct was encountered. 1187 /// \param AllocaIP The insertion point to be used for alloca instructions. 1188 /// \param BodyGenCB Callback that will generate the region code. 1189 /// \param Tied True if the task is tied, false if the task is untied. 1190 /// \param Final i1 value which is `true` if the task is final, `false` if the 1191 /// task is not final. 1192 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred 1193 /// task is generated, and the encountering thread must 1194 /// suspend the current task region, for which execution 1195 /// cannot be resumed until execution of the structured 1196 /// block that is associated with the generated task is 1197 /// completed. 1198 InsertPointTy createTask(const LocationDescription &Loc, 1199 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, 1200 bool Tied = true, Value *Final = nullptr, 1201 Value *IfCondition = nullptr, 1202 SmallVector<DependData> Dependencies = {}); 1203 1204 /// Generator for the taskgroup construct 1205 /// 1206 /// \param Loc The location where the taskgroup construct was encountered. 1207 /// \param AllocaIP The insertion point to be used for alloca instructions. 1208 /// \param BodyGenCB Callback that will generate the region code. 1209 InsertPointTy createTaskgroup(const LocationDescription &Loc, 1210 InsertPointTy AllocaIP, 1211 BodyGenCallbackTy BodyGenCB); 1212 1213 using FileIdentifierInfoCallbackTy = 1214 std::function<std::tuple<std::string, uint64_t>()>; 1215 1216 /// Creates a unique info for a target entry when provided a filename and 1217 /// line number from. 1218 /// 1219 /// \param CallBack A callback function which should return filename the entry 1220 /// resides in as well as the line number for the target entry 1221 /// \param ParentName The name of the parent the target entry resides in, if 1222 /// any. 1223 static TargetRegionEntryInfo 1224 getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, 1225 StringRef ParentName = ""); 1226 1227 /// Functions used to generate reductions. Such functions take two Values 1228 /// representing LHS and RHS of the reduction, respectively, and a reference 1229 /// to the value that is updated to refer to the reduction result. 1230 using ReductionGenTy = 1231 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; 1232 1233 /// Functions used to generate atomic reductions. Such functions take two 1234 /// Values representing pointers to LHS and RHS of the reduction, as well as 1235 /// the element type of these pointers. They are expected to atomically 1236 /// update the LHS to the reduced value. 1237 using AtomicReductionGenTy = 1238 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; 1239 1240 /// Information about an OpenMP reduction. 1241 struct ReductionInfo { ReductionInfoReductionInfo1242 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, 1243 ReductionGenTy ReductionGen, 1244 AtomicReductionGenTy AtomicReductionGen) 1245 : ElementType(ElementType), Variable(Variable), 1246 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), 1247 AtomicReductionGen(AtomicReductionGen) {} 1248 1249 /// Reduction element type, must match pointee type of variable. 1250 Type *ElementType; 1251 1252 /// Reduction variable of pointer type. 1253 Value *Variable; 1254 1255 /// Thread-private partial reduction variable. 1256 Value *PrivateVariable; 1257 1258 /// Callback for generating the reduction body. The IR produced by this will 1259 /// be used to combine two values in a thread-safe context, e.g., under 1260 /// lock or within the same thread, and therefore need not be atomic. 1261 ReductionGenTy ReductionGen; 1262 1263 /// Callback for generating the atomic reduction body, may be null. The IR 1264 /// produced by this will be used to atomically combine two values during 1265 /// reduction. If null, the implementation will use the non-atomic version 1266 /// along with the appropriate synchronization mechanisms. 1267 AtomicReductionGenTy AtomicReductionGen; 1268 }; 1269 1270 // TODO: provide atomic and non-atomic reduction generators for reduction 1271 // operators defined by the OpenMP specification. 1272 1273 /// Generator for '#omp reduction'. 1274 /// 1275 /// Emits the IR instructing the runtime to perform the specific kind of 1276 /// reductions. Expects reduction variables to have been privatized and 1277 /// initialized to reduction-neutral values separately. Emits the calls to 1278 /// runtime functions as well as the reduction function and the basic blocks 1279 /// performing the reduction atomically and non-atomically. 1280 /// 1281 /// The code emitted for the following: 1282 /// 1283 /// \code 1284 /// type var_1; 1285 /// type var_2; 1286 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) 1287 /// /* body */; 1288 /// \endcode 1289 /// 1290 /// corresponds to the following sketch. 1291 /// 1292 /// \code 1293 /// void _outlined_par() { 1294 /// // N is the number of different reductions. 1295 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; 1296 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, 1297 /// _omp_reduction_func, 1298 /// _gomp_critical_user.reduction.var)) { 1299 /// case 1: { 1300 /// var_1 = var_1 <reduction-op> privatized_var_1; 1301 /// var_2 = var_2 <reduction-op> privatized_var_2; 1302 /// // ... 1303 /// __kmpc_end_reduce(...); 1304 /// break; 1305 /// } 1306 /// case 2: { 1307 /// _Atomic<ReductionOp>(var_1, privatized_var_1); 1308 /// _Atomic<ReductionOp>(var_2, privatized_var_2); 1309 /// // ... 1310 /// break; 1311 /// } 1312 /// default: break; 1313 /// } 1314 /// } 1315 /// 1316 /// void _omp_reduction_func(void **lhs, void **rhs) { 1317 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; 1318 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; 1319 /// // ... 1320 /// } 1321 /// \endcode 1322 /// 1323 /// \param Loc The location where the reduction was 1324 /// encountered. Must be within the associate 1325 /// directive and after the last local access to the 1326 /// reduction variables. 1327 /// \param AllocaIP An insertion point suitable for allocas usable 1328 /// in reductions. 1329 /// \param ReductionInfos A list of info on each reduction variable. 1330 /// \param IsNoWait A flag set if the reduction is marked as nowait. 1331 InsertPointTy createReductions(const LocationDescription &Loc, 1332 InsertPointTy AllocaIP, 1333 ArrayRef<ReductionInfo> ReductionInfos, 1334 bool IsNoWait = false); 1335 1336 ///} 1337 1338 /// Return the insertion point used by the underlying IRBuilder. getInsertionPoint()1339 InsertPointTy getInsertionPoint() { return Builder.saveIP(); } 1340 1341 /// Update the internal location to \p Loc. updateToLocation(const LocationDescription & Loc)1342 bool updateToLocation(const LocationDescription &Loc) { 1343 Builder.restoreIP(Loc.IP); 1344 Builder.SetCurrentDebugLocation(Loc.DL); 1345 return Loc.IP.getBlock() != nullptr; 1346 } 1347 1348 /// Return the function declaration for the runtime function with \p FnID. 1349 FunctionCallee getOrCreateRuntimeFunction(Module &M, 1350 omp::RuntimeFunction FnID); 1351 1352 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); 1353 1354 /// Return the (LLVM-IR) string describing the source location \p LocStr. 1355 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); 1356 1357 /// Return the (LLVM-IR) string describing the default source location. 1358 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); 1359 1360 /// Return the (LLVM-IR) string describing the source location identified by 1361 /// the arguments. 1362 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, 1363 unsigned Line, unsigned Column, 1364 uint32_t &SrcLocStrSize); 1365 1366 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as 1367 /// fallback if \p DL does not specify the function name. 1368 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, 1369 Function *F = nullptr); 1370 1371 /// Return the (LLVM-IR) string describing the source location \p Loc. 1372 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, 1373 uint32_t &SrcLocStrSize); 1374 1375 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. 1376 /// TODO: Create a enum class for the Reserve2Flags 1377 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, 1378 omp::IdentFlag Flags = omp::IdentFlag(0), 1379 unsigned Reserve2Flags = 0); 1380 1381 /// Create a hidden global flag \p Name in the module with initial value \p 1382 /// Value. 1383 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); 1384 1385 /// Generate control flow and cleanup for cancellation. 1386 /// 1387 /// \param CancelFlag Flag indicating if the cancellation is performed. 1388 /// \param CanceledDirective The kind of directive that is cancled. 1389 /// \param ExitCB Extra code to be generated in the exit block. 1390 void emitCancelationCheckImpl(Value *CancelFlag, 1391 omp::Directive CanceledDirective, 1392 FinalizeCallbackTy ExitCB = {}); 1393 1394 /// Generate a target region entry call. 1395 /// 1396 /// \param Loc The location at which the request originated and is fulfilled. 1397 /// \param AllocaIP The insertion point to be used for alloca instructions. 1398 /// \param Return Return value of the created function returned by reference. 1399 /// \param DeviceID Identifier for the device via the 'device' clause. 1400 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause 1401 /// or 0 if unspecified and -1 if there is no 'teams' clause. 1402 /// \param NumThreads Number of threads via the 'thread_limit' clause. 1403 /// \param HostPtr Pointer to the host-side pointer of the target kernel. 1404 /// \param KernelArgs Array of arguments to the kernel. 1405 InsertPointTy emitTargetKernel(const LocationDescription &Loc, 1406 InsertPointTy AllocaIP, Value *&Return, 1407 Value *Ident, Value *DeviceID, Value *NumTeams, 1408 Value *NumThreads, Value *HostPtr, 1409 ArrayRef<Value *> KernelArgs); 1410 1411 /// Generate a barrier runtime call. 1412 /// 1413 /// \param Loc The location at which the request originated and is fulfilled. 1414 /// \param DK The directive which caused the barrier 1415 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 1416 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 1417 /// should be checked and acted upon. 1418 /// 1419 /// \returns The insertion point after the barrier. 1420 InsertPointTy emitBarrierImpl(const LocationDescription &Loc, 1421 omp::Directive DK, bool ForceSimpleCall, 1422 bool CheckCancelFlag); 1423 1424 /// Generate a flush runtime call. 1425 /// 1426 /// \param Loc The location at which the request originated and is fulfilled. 1427 void emitFlush(const LocationDescription &Loc); 1428 1429 /// The finalization stack made up of finalize callbacks currently in-flight, 1430 /// wrapped into FinalizationInfo objects that reference also the finalization 1431 /// target block and the kind of cancellable directive. 1432 SmallVector<FinalizationInfo, 8> FinalizationStack; 1433 1434 /// Return true if the last entry in the finalization stack is of kind \p DK 1435 /// and cancellable. isLastFinalizationInfoCancellable(omp::Directive DK)1436 bool isLastFinalizationInfoCancellable(omp::Directive DK) { 1437 return !FinalizationStack.empty() && 1438 FinalizationStack.back().IsCancellable && 1439 FinalizationStack.back().DK == DK; 1440 } 1441 1442 /// Generate a taskwait runtime call. 1443 /// 1444 /// \param Loc The location at which the request originated and is fulfilled. 1445 void emitTaskwaitImpl(const LocationDescription &Loc); 1446 1447 /// Generate a taskyield runtime call. 1448 /// 1449 /// \param Loc The location at which the request originated and is fulfilled. 1450 void emitTaskyieldImpl(const LocationDescription &Loc); 1451 1452 /// Return the current thread ID. 1453 /// 1454 /// \param Ident The ident (ident_t*) describing the query origin. 1455 Value *getOrCreateThreadID(Value *Ident); 1456 1457 /// The OpenMPIRBuilder Configuration 1458 OpenMPIRBuilderConfig Config; 1459 1460 /// The underlying LLVM-IR module 1461 Module &M; 1462 1463 /// The LLVM-IR Builder used to create IR. 1464 IRBuilder<> Builder; 1465 1466 /// Map to remember source location strings 1467 StringMap<Constant *> SrcLocStrMap; 1468 1469 /// Map to remember existing ident_t*. 1470 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; 1471 1472 /// Info manager to keep track of target regions. 1473 OffloadEntriesInfoManager OffloadInfoManager; 1474 1475 /// The target triple of the underlying module. 1476 const Triple T; 1477 1478 /// Helper that contains information about regions we need to outline 1479 /// during finalization. 1480 struct OutlineInfo { 1481 using PostOutlineCBTy = std::function<void(Function &)>; 1482 PostOutlineCBTy PostOutlineCB; 1483 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; 1484 SmallVector<Value *, 2> ExcludeArgsFromAggregate; 1485 1486 /// Collect all blocks in between EntryBB and ExitBB in both the given 1487 /// vector and set. 1488 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, 1489 SmallVectorImpl<BasicBlock *> &BlockVector); 1490 1491 /// Return the function that contains the region to be outlined. getFunctionOutlineInfo1492 Function *getFunction() const { return EntryBB->getParent(); } 1493 }; 1494 1495 /// Collection of regions that need to be outlined during finalization. 1496 SmallVector<OutlineInfo, 16> OutlineInfos; 1497 1498 /// Collection of owned canonical loop objects that eventually need to be 1499 /// free'd. 1500 std::forward_list<CanonicalLoopInfo> LoopInfos; 1501 1502 /// Add a new region that will be outlined later. addOutlineInfo(OutlineInfo && OI)1503 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } 1504 1505 /// An ordered map of auto-generated variables to their unique names. 1506 /// It stores variables with the following names: 1) ".gomp_critical_user_" + 1507 /// <critical_section_name> + ".var" for "omp critical" directives; 2) 1508 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate 1509 /// variables. 1510 StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars; 1511 1512 /// Computes the size of type in bytes. 1513 Value *getSizeInBytes(Value *BasePtr); 1514 1515 // Emit a branch from the current block to the Target block only if 1516 // the current block has a terminator. 1517 void emitBranch(BasicBlock *Target); 1518 1519 // If BB has no use then delete it and return. Else place BB after the current 1520 // block, if possible, or else at the end of the function. Also add a branch 1521 // from current block to BB if current block does not have a terminator. 1522 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false); 1523 1524 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy 1525 /// Here is the logic: 1526 /// if (Cond) { 1527 /// ThenGen(); 1528 /// } else { 1529 /// ElseGen(); 1530 /// } 1531 void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, 1532 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {}); 1533 1534 /// Create the global variable holding the offload mappings information. 1535 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 1536 std::string VarName); 1537 1538 /// Create the global variable holding the offload names information. 1539 GlobalVariable * 1540 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 1541 std::string VarName); 1542 1543 struct MapperAllocas { 1544 AllocaInst *ArgsBase = nullptr; 1545 AllocaInst *Args = nullptr; 1546 AllocaInst *ArgSizes = nullptr; 1547 }; 1548 1549 /// Create the allocas instruction used in call to mapper functions. 1550 void createMapperAllocas(const LocationDescription &Loc, 1551 InsertPointTy AllocaIP, unsigned NumOperands, 1552 struct MapperAllocas &MapperAllocas); 1553 1554 /// Create the call for the target mapper function. 1555 /// \param Loc The source location description. 1556 /// \param MapperFunc Function to be called. 1557 /// \param SrcLocInfo Source location information global. 1558 /// \param MaptypesArg The argument types. 1559 /// \param MapnamesArg The argument names. 1560 /// \param MapperAllocas The AllocaInst used for the call. 1561 /// \param DeviceID Device ID for the call. 1562 /// \param NumOperands Number of operands in the call. 1563 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, 1564 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, 1565 struct MapperAllocas &MapperAllocas, int64_t DeviceID, 1566 unsigned NumOperands); 1567 1568 /// Container for the arguments used to pass data to the runtime library. 1569 struct TargetDataRTArgs { 1570 /// The array of base pointer passed to the runtime library. 1571 Value *BasePointersArray = nullptr; 1572 /// The array of section pointers passed to the runtime library. 1573 Value *PointersArray = nullptr; 1574 /// The array of sizes passed to the runtime library. 1575 Value *SizesArray = nullptr; 1576 /// The array of map types passed to the runtime library for the beginning 1577 /// of the region or for the entire region if there are no separate map 1578 /// types for the region end. 1579 Value *MapTypesArray = nullptr; 1580 /// The array of map types passed to the runtime library for the end of the 1581 /// region, or nullptr if there are no separate map types for the region 1582 /// end. 1583 Value *MapTypesArrayEnd = nullptr; 1584 /// The array of user-defined mappers passed to the runtime library. 1585 Value *MappersArray = nullptr; 1586 /// The array of original declaration names of mapped pointers sent to the 1587 /// runtime library for debugging 1588 Value *MapNamesArray = nullptr; 1589 TargetDataRTArgsTargetDataRTArgs1590 explicit TargetDataRTArgs() {} TargetDataRTArgsTargetDataRTArgs1591 explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, 1592 Value *SizesArray, Value *MapTypesArray, 1593 Value *MapTypesArrayEnd, Value *MappersArray, 1594 Value *MapNamesArray) 1595 : BasePointersArray(BasePointersArray), PointersArray(PointersArray), 1596 SizesArray(SizesArray), MapTypesArray(MapTypesArray), 1597 MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray), 1598 MapNamesArray(MapNamesArray) {} 1599 }; 1600 1601 /// Data structure that contains the needed information to construct the 1602 /// kernel args vector. 1603 struct TargetKernelArgs { 1604 /// Number of arguments passed to the runtime library. 1605 unsigned NumTargetItems; 1606 /// Arguments passed to the runtime library 1607 TargetDataRTArgs RTArgs; 1608 /// The number of iterations 1609 Value *NumIterations; 1610 /// The number of teams. 1611 Value *NumTeams; 1612 /// The number of threads. 1613 Value *NumThreads; 1614 /// The size of the dynamic shared memory. 1615 Value *DynCGGroupMem; 1616 /// True if the kernel has 'no wait' clause. 1617 bool HasNoWait; 1618 1619 /// Constructor for TargetKernelArgs TargetKernelArgsTargetKernelArgs1620 TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, 1621 Value *NumIterations, Value *NumTeams, Value *NumThreads, 1622 Value *DynCGGroupMem, bool HasNoWait) 1623 : NumTargetItems(NumTargetItems), RTArgs(RTArgs), 1624 NumIterations(NumIterations), NumTeams(NumTeams), 1625 NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem), 1626 HasNoWait(HasNoWait) {} 1627 }; 1628 1629 /// Create the kernel args vector used by emitTargetKernel. This function 1630 /// creates various constant values that are used in the resulting args 1631 /// vector. 1632 static void getKernelArgsVector(TargetKernelArgs &KernelArgs, 1633 IRBuilderBase &Builder, 1634 SmallVector<Value *> &ArgsVector); 1635 1636 /// Struct that keeps the information that should be kept throughout 1637 /// a 'target data' region. 1638 class TargetDataInfo { 1639 /// Set to true if device pointer information have to be obtained. 1640 bool RequiresDevicePointerInfo = false; 1641 /// Set to true if Clang emits separate runtime calls for the beginning and 1642 /// end of the region. These calls might have separate map type arrays. 1643 bool SeparateBeginEndCalls = false; 1644 1645 public: 1646 TargetDataRTArgs RTArgs; 1647 1648 SmallMapVector<const Value *, std::pair<Value *, Value *>, 4> 1649 DevicePtrInfoMap; 1650 1651 /// Indicate whether any user-defined mapper exists. 1652 bool HasMapper = false; 1653 /// The total number of pointers passed to the runtime library. 1654 unsigned NumberOfPtrs = 0u; 1655 TargetDataInfo()1656 explicit TargetDataInfo() {} TargetDataInfo(bool RequiresDevicePointerInfo,bool SeparateBeginEndCalls)1657 explicit TargetDataInfo(bool RequiresDevicePointerInfo, 1658 bool SeparateBeginEndCalls) 1659 : RequiresDevicePointerInfo(RequiresDevicePointerInfo), 1660 SeparateBeginEndCalls(SeparateBeginEndCalls) {} 1661 /// Clear information about the data arrays. clearArrayInfo()1662 void clearArrayInfo() { 1663 RTArgs = TargetDataRTArgs(); 1664 HasMapper = false; 1665 NumberOfPtrs = 0u; 1666 } 1667 /// Return true if the current target data information has valid arrays. isValid()1668 bool isValid() { 1669 return RTArgs.BasePointersArray && RTArgs.PointersArray && 1670 RTArgs.SizesArray && RTArgs.MapTypesArray && 1671 (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs; 1672 } requiresDevicePointerInfo()1673 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } separateBeginEndCalls()1674 bool separateBeginEndCalls() { return SeparateBeginEndCalls; } 1675 }; 1676 1677 enum class DeviceInfoTy { None, Pointer, Address }; 1678 using MapValuesArrayTy = SmallVector<Value *, 4>; 1679 using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>; 1680 using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>; 1681 using MapNamesArrayTy = SmallVector<Constant *, 4>; 1682 using MapDimArrayTy = SmallVector<uint64_t, 4>; 1683 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 1684 1685 /// This structure contains combined information generated for mappable 1686 /// clauses, including base pointers, pointers, sizes, map types, user-defined 1687 /// mappers, and non-contiguous information. 1688 struct MapInfosTy { 1689 struct StructNonContiguousInfo { 1690 bool IsNonContiguous = false; 1691 MapDimArrayTy Dims; 1692 MapNonContiguousArrayTy Offsets; 1693 MapNonContiguousArrayTy Counts; 1694 MapNonContiguousArrayTy Strides; 1695 }; 1696 MapValuesArrayTy BasePointers; 1697 MapValuesArrayTy Pointers; 1698 MapDeviceInfoArrayTy DevicePointers; 1699 MapValuesArrayTy Sizes; 1700 MapFlagsArrayTy Types; 1701 MapNamesArrayTy Names; 1702 StructNonContiguousInfo NonContigInfo; 1703 1704 /// Append arrays in \a CurInfo. appendMapInfosTy1705 void append(MapInfosTy &CurInfo) { 1706 BasePointers.append(CurInfo.BasePointers.begin(), 1707 CurInfo.BasePointers.end()); 1708 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 1709 DevicePointers.append(CurInfo.DevicePointers.begin(), 1710 CurInfo.DevicePointers.end()); 1711 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 1712 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 1713 Names.append(CurInfo.Names.begin(), CurInfo.Names.end()); 1714 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 1715 CurInfo.NonContigInfo.Dims.end()); 1716 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 1717 CurInfo.NonContigInfo.Offsets.end()); 1718 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 1719 CurInfo.NonContigInfo.Counts.end()); 1720 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 1721 CurInfo.NonContigInfo.Strides.end()); 1722 } 1723 }; 1724 1725 /// Callback function type for functions emitting the host fallback code that 1726 /// is executed when the kernel launch fails. It takes an insertion point as 1727 /// parameter where the code should be emitted. It returns an insertion point 1728 /// that points right after after the emitted code. 1729 using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>; 1730 1731 /// Generate a target region entry call and host fallback call. 1732 /// 1733 /// \param Loc The location at which the request originated and is fulfilled. 1734 /// \param OutlinedFn The outlined kernel function. 1735 /// \param OutlinedFnID The ooulined function ID. 1736 /// \param EmitTargetCallFallbackCB Call back function to generate host 1737 /// fallback code. 1738 /// \param Args Data structure holding information about the kernel arguments. 1739 /// \param DeviceID Identifier for the device via the 'device' clause. 1740 /// \param RTLoc Source location identifier 1741 /// \param AllocaIP The insertion point to be used for alloca instructions. 1742 InsertPointTy emitKernelLaunch( 1743 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, 1744 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, 1745 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP); 1746 1747 /// Emit the arguments to be passed to the runtime library based on the 1748 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 1749 /// ForEndCall, emit map types to be passed for the end of the region instead 1750 /// of the beginning. 1751 void emitOffloadingArraysArgument(IRBuilderBase &Builder, 1752 OpenMPIRBuilder::TargetDataRTArgs &RTArgs, 1753 OpenMPIRBuilder::TargetDataInfo &Info, 1754 bool EmitDebug = false, 1755 bool ForEndCall = false); 1756 1757 /// Emit an array of struct descriptors to be assigned to the offload args. 1758 void emitNonContiguousDescriptor(InsertPointTy AllocaIP, 1759 InsertPointTy CodeGenIP, 1760 MapInfosTy &CombinedInfo, 1761 TargetDataInfo &Info); 1762 1763 /// Emit the arrays used to pass the captures and map information to the 1764 /// offloading runtime library. If there is no map or capture information, 1765 /// return nullptr by reference. 1766 void emitOffloadingArrays( 1767 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, 1768 TargetDataInfo &Info, bool IsNonContiguous = false, 1769 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, 1770 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); 1771 1772 /// Creates offloading entry for the provided entry ID \a ID, address \a 1773 /// Addr, size \a Size, and flags \a Flags. 1774 void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, 1775 int32_t Flags, GlobalValue::LinkageTypes, 1776 StringRef Name = ""); 1777 1778 /// The kind of errors that can occur when emitting the offload entries and 1779 /// metadata. 1780 enum EmitMetadataErrorKind { 1781 EMIT_MD_TARGET_REGION_ERROR, 1782 EMIT_MD_DECLARE_TARGET_ERROR, 1783 EMIT_MD_GLOBAL_VAR_LINK_ERROR 1784 }; 1785 1786 /// Callback function type 1787 using EmitMetadataErrorReportFunctionTy = 1788 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>; 1789 1790 // Emit the offloading entries and metadata so that the device codegen side 1791 // can easily figure out what to emit. The produced metadata looks like 1792 // this: 1793 // 1794 // !omp_offload.info = !{!1, ...} 1795 // 1796 // We only generate metadata for function that contain target regions. 1797 void createOffloadEntriesAndInfoMetadata( 1798 EmitMetadataErrorReportFunctionTy &ErrorReportFunction); 1799 1800 public: 1801 /// Generator for __kmpc_copyprivate 1802 /// 1803 /// \param Loc The source location description. 1804 /// \param BufSize Number of elements in the buffer. 1805 /// \param CpyBuf List of pointers to data to be copied. 1806 /// \param CpyFn function to call for copying data. 1807 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. 1808 /// 1809 /// \return The insertion position *after* the CopyPrivate call. 1810 1811 InsertPointTy createCopyPrivate(const LocationDescription &Loc, 1812 llvm::Value *BufSize, llvm::Value *CpyBuf, 1813 llvm::Value *CpyFn, llvm::Value *DidIt); 1814 1815 /// Generator for '#omp single' 1816 /// 1817 /// \param Loc The source location description. 1818 /// \param BodyGenCB Callback that will generate the region code. 1819 /// \param FiniCB Callback to finalize variable copies. 1820 /// \param IsNowait If false, a barrier is emitted. 1821 /// \param DidIt Local variable used as a flag to indicate 'single' thread 1822 /// 1823 /// \returns The insertion position *after* the single call. 1824 InsertPointTy createSingle(const LocationDescription &Loc, 1825 BodyGenCallbackTy BodyGenCB, 1826 FinalizeCallbackTy FiniCB, bool IsNowait, 1827 llvm::Value *DidIt); 1828 1829 /// Generator for '#omp master' 1830 /// 1831 /// \param Loc The insert and source location description. 1832 /// \param BodyGenCB Callback that will generate the region code. 1833 /// \param FiniCB Callback to finalize variable copies. 1834 /// 1835 /// \returns The insertion position *after* the master. 1836 InsertPointTy createMaster(const LocationDescription &Loc, 1837 BodyGenCallbackTy BodyGenCB, 1838 FinalizeCallbackTy FiniCB); 1839 1840 /// Generator for '#omp masked' 1841 /// 1842 /// \param Loc The insert and source location description. 1843 /// \param BodyGenCB Callback that will generate the region code. 1844 /// \param FiniCB Callback to finialize variable copies. 1845 /// 1846 /// \returns The insertion position *after* the masked. 1847 InsertPointTy createMasked(const LocationDescription &Loc, 1848 BodyGenCallbackTy BodyGenCB, 1849 FinalizeCallbackTy FiniCB, Value *Filter); 1850 1851 /// Generator for '#omp critical' 1852 /// 1853 /// \param Loc The insert and source location description. 1854 /// \param BodyGenCB Callback that will generate the region body code. 1855 /// \param FiniCB Callback to finalize variable copies. 1856 /// \param CriticalName name of the lock used by the critical directive 1857 /// \param HintInst Hint Instruction for hint clause associated with critical 1858 /// 1859 /// \returns The insertion position *after* the critical. 1860 InsertPointTy createCritical(const LocationDescription &Loc, 1861 BodyGenCallbackTy BodyGenCB, 1862 FinalizeCallbackTy FiniCB, 1863 StringRef CriticalName, Value *HintInst); 1864 1865 /// Generator for '#omp ordered depend (source | sink)' 1866 /// 1867 /// \param Loc The insert and source location description. 1868 /// \param AllocaIP The insertion point to be used for alloca instructions. 1869 /// \param NumLoops The number of loops in depend clause. 1870 /// \param StoreValues The value will be stored in vector address. 1871 /// \param Name The name of alloca instruction. 1872 /// \param IsDependSource If true, depend source; otherwise, depend sink. 1873 /// 1874 /// \return The insertion position *after* the ordered. 1875 InsertPointTy createOrderedDepend(const LocationDescription &Loc, 1876 InsertPointTy AllocaIP, unsigned NumLoops, 1877 ArrayRef<llvm::Value *> StoreValues, 1878 const Twine &Name, bool IsDependSource); 1879 1880 /// Generator for '#omp ordered [threads | simd]' 1881 /// 1882 /// \param Loc The insert and source location description. 1883 /// \param BodyGenCB Callback that will generate the region code. 1884 /// \param FiniCB Callback to finalize variable copies. 1885 /// \param IsThreads If true, with threads clause or without clause; 1886 /// otherwise, with simd clause; 1887 /// 1888 /// \returns The insertion position *after* the ordered. 1889 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, 1890 BodyGenCallbackTy BodyGenCB, 1891 FinalizeCallbackTy FiniCB, 1892 bool IsThreads); 1893 1894 /// Generator for '#omp sections' 1895 /// 1896 /// \param Loc The insert and source location description. 1897 /// \param AllocaIP The insertion points to be used for alloca instructions. 1898 /// \param SectionCBs Callbacks that will generate body of each section. 1899 /// \param PrivCB Callback to copy a given variable (think copy constructor). 1900 /// \param FiniCB Callback to finalize variable copies. 1901 /// \param IsCancellable Flag to indicate a cancellable parallel region. 1902 /// \param IsNowait If true, barrier - to ensure all sections are executed 1903 /// before moving forward will not be generated. 1904 /// \returns The insertion position *after* the sections. 1905 InsertPointTy createSections(const LocationDescription &Loc, 1906 InsertPointTy AllocaIP, 1907 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, 1908 PrivatizeCallbackTy PrivCB, 1909 FinalizeCallbackTy FiniCB, bool IsCancellable, 1910 bool IsNowait); 1911 1912 /// Generator for '#omp section' 1913 /// 1914 /// \param Loc The insert and source location description. 1915 /// \param BodyGenCB Callback that will generate the region body code. 1916 /// \param FiniCB Callback to finalize variable copies. 1917 /// \returns The insertion position *after* the section. 1918 InsertPointTy createSection(const LocationDescription &Loc, 1919 BodyGenCallbackTy BodyGenCB, 1920 FinalizeCallbackTy FiniCB); 1921 1922 /// Generator for `#omp teams` 1923 /// 1924 /// \param Loc The location where the teams construct was encountered. 1925 /// \param BodyGenCB Callback that will generate the region code. 1926 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr, 1927 /// it is as if lower bound is specified as equal to upperbound. If 1928 /// this is non-null, then upperbound must also be non-null. 1929 /// \param NumTeamsUpper Upper bound on the number of teams. 1930 /// \param ThreadLimit on the number of threads that may participate in a 1931 /// contention group created by each team. 1932 /// \param IfExpr is the integer argument value of the if condition on the 1933 /// teams clause. 1934 InsertPointTy 1935 createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1936 Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, 1937 Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); 1938 1939 /// Generate conditional branch and relevant BasicBlocks through which private 1940 /// threads copy the 'copyin' variables from Master copy to threadprivate 1941 /// copies. 1942 /// 1943 /// \param IP insertion block for copyin conditional 1944 /// \param MasterVarPtr a pointer to the master variable 1945 /// \param PrivateVarPtr a pointer to the threadprivate variable 1946 /// \param IntPtrTy Pointer size type 1947 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks 1948 // and copy.in.end block 1949 /// 1950 /// \returns The insertion point where copying operation to be emitted. 1951 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, 1952 Value *PrivateAddr, 1953 llvm::IntegerType *IntPtrTy, 1954 bool BranchtoEnd = true); 1955 1956 /// Create a runtime call for kmpc_Alloc 1957 /// 1958 /// \param Loc The insert and source location description. 1959 /// \param Size Size of allocated memory space 1960 /// \param Allocator Allocator information instruction 1961 /// \param Name Name of call Instruction for OMP_alloc 1962 /// 1963 /// \returns CallInst to the OMP_Alloc call 1964 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, 1965 Value *Allocator, std::string Name = ""); 1966 1967 /// Create a runtime call for kmpc_free 1968 /// 1969 /// \param Loc The insert and source location description. 1970 /// \param Addr Address of memory space to be freed 1971 /// \param Allocator Allocator information instruction 1972 /// \param Name Name of call Instruction for OMP_Free 1973 /// 1974 /// \returns CallInst to the OMP_Free call 1975 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, 1976 Value *Allocator, std::string Name = ""); 1977 1978 /// Create a runtime call for kmpc_threadprivate_cached 1979 /// 1980 /// \param Loc The insert and source location description. 1981 /// \param Pointer pointer to data to be cached 1982 /// \param Size size of data to be cached 1983 /// \param Name Name of call Instruction for callinst 1984 /// 1985 /// \returns CallInst to the thread private cache call. 1986 CallInst *createCachedThreadPrivate(const LocationDescription &Loc, 1987 llvm::Value *Pointer, 1988 llvm::ConstantInt *Size, 1989 const llvm::Twine &Name = Twine("")); 1990 1991 /// Create a runtime call for __tgt_interop_init 1992 /// 1993 /// \param Loc The insert and source location description. 1994 /// \param InteropVar variable to be allocated 1995 /// \param InteropType type of interop operation 1996 /// \param Device devide to which offloading will occur 1997 /// \param NumDependences number of dependence variables 1998 /// \param DependenceAddress pointer to dependence variables 1999 /// \param HaveNowaitClause does nowait clause exist 2000 /// 2001 /// \returns CallInst to the __tgt_interop_init call 2002 CallInst *createOMPInteropInit(const LocationDescription &Loc, 2003 Value *InteropVar, 2004 omp::OMPInteropType InteropType, Value *Device, 2005 Value *NumDependences, 2006 Value *DependenceAddress, 2007 bool HaveNowaitClause); 2008 2009 /// Create a runtime call for __tgt_interop_destroy 2010 /// 2011 /// \param Loc The insert and source location description. 2012 /// \param InteropVar variable to be allocated 2013 /// \param Device devide to which offloading will occur 2014 /// \param NumDependences number of dependence variables 2015 /// \param DependenceAddress pointer to dependence variables 2016 /// \param HaveNowaitClause does nowait clause exist 2017 /// 2018 /// \returns CallInst to the __tgt_interop_destroy call 2019 CallInst *createOMPInteropDestroy(const LocationDescription &Loc, 2020 Value *InteropVar, Value *Device, 2021 Value *NumDependences, 2022 Value *DependenceAddress, 2023 bool HaveNowaitClause); 2024 2025 /// Create a runtime call for __tgt_interop_use 2026 /// 2027 /// \param Loc The insert and source location description. 2028 /// \param InteropVar variable to be allocated 2029 /// \param Device devide to which offloading will occur 2030 /// \param NumDependences number of dependence variables 2031 /// \param DependenceAddress pointer to dependence variables 2032 /// \param HaveNowaitClause does nowait clause exist 2033 /// 2034 /// \returns CallInst to the __tgt_interop_use call 2035 CallInst *createOMPInteropUse(const LocationDescription &Loc, 2036 Value *InteropVar, Value *Device, 2037 Value *NumDependences, Value *DependenceAddress, 2038 bool HaveNowaitClause); 2039 2040 /// The `omp target` interface 2041 /// 2042 /// For more information about the usage of this interface, 2043 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h 2044 /// 2045 ///{ 2046 2047 /// Create a runtime call for kmpc_target_init 2048 /// 2049 /// \param Loc The insert and source location description. 2050 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 2051 /// \param MinThreads Minimal number of threads, or 0. 2052 /// \param MaxThreads Maximal number of threads, or 0. 2053 /// \param MinTeams Minimal number of teams, or 0. 2054 /// \param MaxTeams Maximal number of teams, or 0. 2055 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, 2056 int32_t MinThreadsVal = 0, 2057 int32_t MaxThreadsVal = 0, 2058 int32_t MinTeamsVal = 0, 2059 int32_t MaxTeamsVal = 0); 2060 2061 /// Create a runtime call for kmpc_target_deinit 2062 /// 2063 /// \param Loc The insert and source location description. 2064 /// \param TeamsReductionDataSize The maximal size of all the reduction data 2065 /// for teams reduction. 2066 /// \param TeamsReductionBufferLength The number of elements (each of up to 2067 /// \p TeamsReductionDataSize size), in the teams reduction buffer. 2068 void createTargetDeinit(const LocationDescription &Loc, 2069 int32_t TeamsReductionDataSize = 0, 2070 int32_t TeamsReductionBufferLength = 1024); 2071 2072 ///} 2073 2074 /// Helpers to read/write kernel annotations from the IR. 2075 /// 2076 ///{ 2077 2078 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none 2079 /// is set. 2080 static std::pair<int32_t, int32_t> 2081 readThreadBoundsForKernel(const Triple &T, Function &Kernel); 2082 static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, 2083 int32_t LB, int32_t UB); 2084 2085 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none 2086 /// is set. 2087 static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T, 2088 Function &Kernel); 2089 static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, 2090 int32_t UB); 2091 ///} 2092 2093 private: 2094 // Sets the function attributes expected for the outlined function 2095 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn); 2096 2097 // Creates the function ID/Address for the given outlined function. 2098 // In the case of an embedded device function the address of the function is 2099 // used, in the case of a non-offload function a constant is created. 2100 Constant *createOutlinedFunctionID(Function *OutlinedFn, 2101 StringRef EntryFnIDName); 2102 2103 // Creates the region entry address for the outlined function 2104 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction, 2105 StringRef EntryFnName); 2106 2107 public: 2108 /// Functions used to generate a function with the given name. 2109 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>; 2110 2111 /// Create a unique name for the entry function using the source location 2112 /// information of the current target region. The name will be something like: 2113 /// 2114 /// __omp_offloading_DD_FFFF_PP_lBB[_CC] 2115 /// 2116 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 2117 /// mangled name of the function that encloses the target region and BB is the 2118 /// line number of the target region. CC is a count added when more than one 2119 /// region is located at the same location. 2120 /// 2121 /// If this target outline function is not an offload entry, we don't need to 2122 /// register it. This may happen if it is guarded by an if clause that is 2123 /// false at compile time, or no target archs have been specified. 2124 /// 2125 /// The created target region ID is used by the runtime library to identify 2126 /// the current target region, so it only has to be unique and not 2127 /// necessarily point to anything. It could be the pointer to the outlined 2128 /// function that implements the target region, but we aren't using that so 2129 /// that the compiler doesn't need to keep that, and could therefore inline 2130 /// the host function if proven worthwhile during optimization. In the other 2131 /// hand, if emitting code for the device, the ID has to be the function 2132 /// address so that it can retrieved from the offloading entry and launched 2133 /// by the runtime library. We also mark the outlined function to have 2134 /// external linkage in case we are emitting code for the device, because 2135 /// these functions will be entry points to the device. 2136 /// 2137 /// \param InfoManager The info manager keeping track of the offload entries 2138 /// \param EntryInfo The entry information about the function 2139 /// \param GenerateFunctionCallback The callback function to generate the code 2140 /// \param OutlinedFunction Pointer to the outlined function 2141 /// \param EntryFnIDName Name of the ID o be created 2142 void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, 2143 FunctionGenCallback &GenerateFunctionCallback, 2144 bool IsOffloadEntry, Function *&OutlinedFn, 2145 Constant *&OutlinedFnID); 2146 2147 /// Registers the given function and sets up the attribtues of the function 2148 /// Returns the FunctionID. 2149 /// 2150 /// \param InfoManager The info manager keeping track of the offload entries 2151 /// \param EntryInfo The entry information about the function 2152 /// \param OutlinedFunction Pointer to the outlined function 2153 /// \param EntryFnName Name of the outlined function 2154 /// \param EntryFnIDName Name of the ID o be created 2155 Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, 2156 Function *OutlinedFunction, 2157 StringRef EntryFnName, 2158 StringRef EntryFnIDName); 2159 2160 /// Type of BodyGen to use for region codegen 2161 /// 2162 /// Priv: If device pointer privatization is required, emit the body of the 2163 /// region here. It will have to be duplicated: with and without 2164 /// privatization. 2165 /// DupNoPriv: If we need device pointer privatization, we need 2166 /// to emit the body of the region with no privatization in the 'else' branch 2167 /// of the conditional. 2168 /// NoPriv: If we don't require privatization of device 2169 /// pointers, we emit the body in between the runtime calls. This avoids 2170 /// duplicating the body code. 2171 enum BodyGenTy { Priv, DupNoPriv, NoPriv }; 2172 2173 /// Callback type for creating the map infos for the kernel parameters. 2174 /// \param CodeGenIP is the insertion point where code should be generated, 2175 /// if any. 2176 using GenMapInfoCallbackTy = 2177 function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; 2178 2179 /// Generator for '#omp target data' 2180 /// 2181 /// \param Loc The location where the target data construct was encountered. 2182 /// \param AllocaIP The insertion points to be used for alloca instructions. 2183 /// \param CodeGenIP The insertion point at which the target directive code 2184 /// should be placed. 2185 /// \param IsBegin If true then emits begin mapper call otherwise emits 2186 /// end mapper call. 2187 /// \param DeviceID Stores the DeviceID from the device clause. 2188 /// \param IfCond Value which corresponds to the if clause condition. 2189 /// \param Info Stores all information realted to the Target Data directive. 2190 /// \param GenMapInfoCB Callback that populates the MapInfos and returns. 2191 /// \param BodyGenCB Optional Callback to generate the region code. 2192 /// \param DeviceAddrCB Optional callback to generate code related to 2193 /// use_device_ptr and use_device_addr. 2194 /// \param CustomMapperCB Optional callback to generate code related to 2195 /// custom mappers. 2196 OpenMPIRBuilder::InsertPointTy createTargetData( 2197 const LocationDescription &Loc, InsertPointTy AllocaIP, 2198 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, 2199 TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, 2200 omp::RuntimeFunction *MapperFunc = nullptr, 2201 function_ref<InsertPointTy(InsertPointTy CodeGenIP, 2202 BodyGenTy BodyGenType)> 2203 BodyGenCB = nullptr, 2204 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, 2205 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr, 2206 Value *SrcLocInfo = nullptr); 2207 2208 using TargetBodyGenCallbackTy = function_ref<InsertPointTy( 2209 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 2210 2211 using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy( 2212 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, 2213 InsertPointTy CodeGenIP)>; 2214 2215 /// Generator for '#omp target' 2216 /// 2217 /// \param Loc where the target data construct was encountered. 2218 /// \param CodeGenIP The insertion point where the call to the outlined 2219 /// function should be emitted. 2220 /// \param EntryInfo The entry information about the function. 2221 /// \param NumTeams Number of teams specified in the num_teams clause. 2222 /// \param NumThreads Number of teams specified in the thread_limit clause. 2223 /// \param Inputs The input values to the region that will be passed. 2224 /// as arguments to the outlined function. 2225 /// \param BodyGenCB Callback that will generate the region code. 2226 /// \param ArgAccessorFuncCB Callback that will generate accessors 2227 /// instructions for passed in target arguments where neccessary 2228 InsertPointTy createTarget(const LocationDescription &Loc, 2229 OpenMPIRBuilder::InsertPointTy AllocaIP, 2230 OpenMPIRBuilder::InsertPointTy CodeGenIP, 2231 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, 2232 int32_t NumThreads, 2233 SmallVectorImpl<Value *> &Inputs, 2234 GenMapInfoCallbackTy GenMapInfoCB, 2235 TargetBodyGenCallbackTy BodyGenCB, 2236 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB); 2237 2238 /// Returns __kmpc_for_static_init_* runtime function for the specified 2239 /// size \a IVSize and sign \a IVSigned. Will create a distribute call 2240 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set. 2241 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, 2242 bool IsGPUDistribute); 2243 2244 /// Returns __kmpc_dispatch_init_* runtime function for the specified 2245 /// size \a IVSize and sign \a IVSigned. 2246 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned); 2247 2248 /// Returns __kmpc_dispatch_next_* runtime function for the specified 2249 /// size \a IVSize and sign \a IVSigned. 2250 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned); 2251 2252 /// Returns __kmpc_dispatch_fini_* runtime function for the specified 2253 /// size \a IVSize and sign \a IVSigned. 2254 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned); 2255 2256 /// Declarations for LLVM-IR types (simple, array, function and structure) are 2257 /// generated below. Their names are defined and used in OpenMPKinds.def. Here 2258 /// we provide the declarations, the initializeTypes function will provide the 2259 /// values. 2260 /// 2261 ///{ 2262 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; 2263 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2264 ArrayType *VarName##Ty = nullptr; \ 2265 PointerType *VarName##PtrTy = nullptr; 2266 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2267 FunctionType *VarName = nullptr; \ 2268 PointerType *VarName##Ptr = nullptr; 2269 #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ 2270 StructType *VarName = nullptr; \ 2271 PointerType *VarName##Ptr = nullptr; 2272 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2273 2274 ///} 2275 2276 private: 2277 /// Create all simple and struct types exposed by the runtime and remember 2278 /// the llvm::PointerTypes of them for easy access later. 2279 void initializeTypes(Module &M); 2280 2281 /// Common interface for generating entry calls for OMP Directives. 2282 /// if the directive has a region/body, It will set the insertion 2283 /// point to the body 2284 /// 2285 /// \param OMPD Directive to generate entry blocks for 2286 /// \param EntryCall Call to the entry OMP Runtime Function 2287 /// \param ExitBB block where the region ends. 2288 /// \param Conditional indicate if the entry call result will be used 2289 /// to evaluate a conditional of whether a thread will execute 2290 /// body code or not. 2291 /// 2292 /// \return The insertion position in exit block 2293 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, 2294 BasicBlock *ExitBB, 2295 bool Conditional = false); 2296 2297 /// Common interface to finalize the region 2298 /// 2299 /// \param OMPD Directive to generate exiting code for 2300 /// \param FinIP Insertion point for emitting Finalization code and exit call 2301 /// \param ExitCall Call to the ending OMP Runtime Function 2302 /// \param HasFinalize indicate if the directive will require finalization 2303 /// and has a finalization callback in the stack that 2304 /// should be called. 2305 /// 2306 /// \return The insertion position in exit block 2307 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, 2308 InsertPointTy FinIP, 2309 Instruction *ExitCall, 2310 bool HasFinalize = true); 2311 2312 /// Common Interface to generate OMP inlined regions 2313 /// 2314 /// \param OMPD Directive to generate inlined region for 2315 /// \param EntryCall Call to the entry OMP Runtime Function 2316 /// \param ExitCall Call to the ending OMP Runtime Function 2317 /// \param BodyGenCB Body code generation callback. 2318 /// \param FiniCB Finalization Callback. Will be called when finalizing region 2319 /// \param Conditional indicate if the entry call result will be used 2320 /// to evaluate a conditional of whether a thread will execute 2321 /// body code or not. 2322 /// \param HasFinalize indicate if the directive will require finalization 2323 /// and has a finalization callback in the stack that 2324 /// should be called. 2325 /// \param IsCancellable if HasFinalize is set to true, indicate if the 2326 /// the directive should be cancellable. 2327 /// \return The insertion point after the region 2328 2329 InsertPointTy 2330 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, 2331 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, 2332 FinalizeCallbackTy FiniCB, bool Conditional = false, 2333 bool HasFinalize = true, bool IsCancellable = false); 2334 2335 /// Get the platform-specific name separator. 2336 /// \param Parts different parts of the final name that needs separation 2337 /// \param FirstSeparator First separator used between the initial two 2338 /// parts of the name. 2339 /// \param Separator separator used between all of the rest consecutive 2340 /// parts of the name 2341 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, 2342 StringRef FirstSeparator, 2343 StringRef Separator); 2344 2345 /// Returns corresponding lock object for the specified critical region 2346 /// name. If the lock object does not exist it is created, otherwise the 2347 /// reference to the existing copy is returned. 2348 /// \param CriticalName Name of the critical region. 2349 /// 2350 Value *getOMPCriticalRegionLock(StringRef CriticalName); 2351 2352 /// Callback type for Atomic Expression update 2353 /// ex: 2354 /// \code{.cpp} 2355 /// unsigned x = 0; 2356 /// #pragma omp atomic update 2357 /// x = Expr(x_old); //Expr() is any legal operation 2358 /// \endcode 2359 /// 2360 /// \param XOld the value of the atomic memory address to use for update 2361 /// \param IRB reference to the IRBuilder to use 2362 /// 2363 /// \returns Value to update X to. 2364 using AtomicUpdateCallbackTy = 2365 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; 2366 2367 private: 2368 enum AtomicKind { Read, Write, Update, Capture, Compare }; 2369 2370 /// Determine whether to emit flush or not 2371 /// 2372 /// \param Loc The insert and source location description. 2373 /// \param AO The required atomic ordering 2374 /// \param AK The OpenMP atomic operation kind used. 2375 /// 2376 /// \returns wether a flush was emitted or not 2377 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, 2378 AtomicOrdering AO, AtomicKind AK); 2379 2380 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 2381 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 2382 /// Only Scalar data types. 2383 /// 2384 /// \param AllocaIP The insertion point to be used for alloca 2385 /// instructions. 2386 /// \param X The target atomic pointer to be updated 2387 /// \param XElemTy The element type of the atomic pointer. 2388 /// \param Expr The value to update X with. 2389 /// \param AO Atomic ordering of the generated atomic 2390 /// instructions. 2391 /// \param RMWOp The binary operation used for update. If 2392 /// operation is not supported by atomicRMW, 2393 /// or belong to {FADD, FSUB, BAD_BINOP}. 2394 /// Then a `cmpExch` based atomic will be generated. 2395 /// \param UpdateOp Code generator for complex expressions that cannot be 2396 /// expressed through atomicrmw instruction. 2397 /// \param VolatileX true if \a X volatile? 2398 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 2399 /// update expression, false otherwise. 2400 /// (e.g. true for X = X BinOp Expr) 2401 /// 2402 /// \returns A pair of the old value of X before the update, and the value 2403 /// used for the update. 2404 std::pair<Value *, Value *> 2405 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, 2406 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2407 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, 2408 bool IsXBinopExpr); 2409 2410 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . 2411 /// 2412 /// \Return The instruction 2413 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2414 AtomicRMWInst::BinOp RMWOp); 2415 2416 public: 2417 /// a struct to pack relevant information while generating atomic Ops 2418 struct AtomicOpValue { 2419 Value *Var = nullptr; 2420 Type *ElemTy = nullptr; 2421 bool IsSigned = false; 2422 bool IsVolatile = false; 2423 }; 2424 2425 /// Emit atomic Read for : V = X --- Only Scalar data types. 2426 /// 2427 /// \param Loc The insert and source location description. 2428 /// \param X The target pointer to be atomically read 2429 /// \param V Memory address where to store atomically read 2430 /// value 2431 /// \param AO Atomic ordering of the generated atomic 2432 /// instructions. 2433 /// 2434 /// \return Insertion point after generated atomic read IR. 2435 InsertPointTy createAtomicRead(const LocationDescription &Loc, 2436 AtomicOpValue &X, AtomicOpValue &V, 2437 AtomicOrdering AO); 2438 2439 /// Emit atomic write for : X = Expr --- Only Scalar data types. 2440 /// 2441 /// \param Loc The insert and source location description. 2442 /// \param X The target pointer to be atomically written to 2443 /// \param Expr The value to store. 2444 /// \param AO Atomic ordering of the generated atomic 2445 /// instructions. 2446 /// 2447 /// \return Insertion point after generated atomic Write IR. 2448 InsertPointTy createAtomicWrite(const LocationDescription &Loc, 2449 AtomicOpValue &X, Value *Expr, 2450 AtomicOrdering AO); 2451 2452 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 2453 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 2454 /// Only Scalar data types. 2455 /// 2456 /// \param Loc The insert and source location description. 2457 /// \param AllocaIP The insertion point to be used for alloca instructions. 2458 /// \param X The target atomic pointer to be updated 2459 /// \param Expr The value to update X with. 2460 /// \param AO Atomic ordering of the generated atomic instructions. 2461 /// \param RMWOp The binary operation used for update. If operation 2462 /// is not supported by atomicRMW, or belong to 2463 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based 2464 /// atomic will be generated. 2465 /// \param UpdateOp Code generator for complex expressions that cannot be 2466 /// expressed through atomicrmw instruction. 2467 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 2468 /// update expression, false otherwise. 2469 /// (e.g. true for X = X BinOp Expr) 2470 /// 2471 /// \return Insertion point after generated atomic update IR. 2472 InsertPointTy createAtomicUpdate(const LocationDescription &Loc, 2473 InsertPointTy AllocaIP, AtomicOpValue &X, 2474 Value *Expr, AtomicOrdering AO, 2475 AtomicRMWInst::BinOp RMWOp, 2476 AtomicUpdateCallbackTy &UpdateOp, 2477 bool IsXBinopExpr); 2478 2479 /// Emit atomic update for constructs: --- Only Scalar data types 2480 /// V = X; X = X BinOp Expr , 2481 /// X = X BinOp Expr; V = X, 2482 /// V = X; X = Expr BinOp X, 2483 /// X = Expr BinOp X; V = X, 2484 /// V = X; X = UpdateOp(X), 2485 /// X = UpdateOp(X); V = X, 2486 /// 2487 /// \param Loc The insert and source location description. 2488 /// \param AllocaIP The insertion point to be used for alloca instructions. 2489 /// \param X The target atomic pointer to be updated 2490 /// \param V Memory address where to store captured value 2491 /// \param Expr The value to update X with. 2492 /// \param AO Atomic ordering of the generated atomic instructions 2493 /// \param RMWOp The binary operation used for update. If 2494 /// operation is not supported by atomicRMW, or belong to 2495 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based 2496 /// atomic will be generated. 2497 /// \param UpdateOp Code generator for complex expressions that cannot be 2498 /// expressed through atomicrmw instruction. 2499 /// \param UpdateExpr true if X is an in place update of the form 2500 /// X = X BinOp Expr or X = Expr BinOp X 2501 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the 2502 /// update expression, false otherwise. 2503 /// (e.g. true for X = X BinOp Expr) 2504 /// \param IsPostfixUpdate true if original value of 'x' must be stored in 2505 /// 'v', not an updated one. 2506 /// 2507 /// \return Insertion point after generated atomic capture IR. 2508 InsertPointTy 2509 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, 2510 AtomicOpValue &X, AtomicOpValue &V, Value *Expr, 2511 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2512 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, 2513 bool IsPostfixUpdate, bool IsXBinopExpr); 2514 2515 /// Emit atomic compare for constructs: --- Only scalar data types 2516 /// cond-expr-stmt: 2517 /// x = x ordop expr ? expr : x; 2518 /// x = expr ordop x ? expr : x; 2519 /// x = x == e ? d : x; 2520 /// x = e == x ? d : x; (this one is not in the spec) 2521 /// cond-update-stmt: 2522 /// if (x ordop expr) { x = expr; } 2523 /// if (expr ordop x) { x = expr; } 2524 /// if (x == e) { x = d; } 2525 /// if (e == x) { x = d; } (this one is not in the spec) 2526 /// conditional-update-capture-atomic: 2527 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) 2528 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) 2529 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, 2530 /// IsFailOnly=true) 2531 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) 2532 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, 2533 /// IsFailOnly=true) 2534 /// 2535 /// \param Loc The insert and source location description. 2536 /// \param X The target atomic pointer to be updated. 2537 /// \param V Memory address where to store captured value (for 2538 /// compare capture only). 2539 /// \param R Memory address where to store comparison result 2540 /// (for compare capture with '==' only). 2541 /// \param E The expected value ('e') for forms that use an 2542 /// equality comparison or an expression ('expr') for 2543 /// forms that use 'ordop' (logically an atomic maximum or 2544 /// minimum). 2545 /// \param D The desired value for forms that use an equality 2546 /// comparison. If forms that use 'ordop', it should be 2547 /// \p nullptr. 2548 /// \param AO Atomic ordering of the generated atomic instructions. 2549 /// \param Op Atomic compare operation. It can only be ==, <, or >. 2550 /// \param IsXBinopExpr True if the conditional statement is in the form where 2551 /// x is on LHS. It only matters for < or >. 2552 /// \param IsPostfixUpdate True if original value of 'x' must be stored in 2553 /// 'v', not an updated one (for compare capture 2554 /// only). 2555 /// \param IsFailOnly True if the original value of 'x' is stored to 'v' 2556 /// only when the comparison fails. This is only valid for 2557 /// the case the comparison is '=='. 2558 /// 2559 /// \return Insertion point after generated atomic capture IR. 2560 InsertPointTy 2561 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, 2562 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, 2563 AtomicOrdering AO, omp::OMPAtomicCompareOp Op, 2564 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); 2565 InsertPointTy createAtomicCompare(const LocationDescription &Loc, 2566 AtomicOpValue &X, AtomicOpValue &V, 2567 AtomicOpValue &R, Value *E, Value *D, 2568 AtomicOrdering AO, 2569 omp::OMPAtomicCompareOp Op, 2570 bool IsXBinopExpr, bool IsPostfixUpdate, 2571 bool IsFailOnly, AtomicOrdering Failure); 2572 2573 /// Create the control flow structure of a canonical OpenMP loop. 2574 /// 2575 /// The emitted loop will be disconnected, i.e. no edge to the loop's 2576 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's 2577 /// IRBuilder location is not preserved. 2578 /// 2579 /// \param DL DebugLoc used for the instructions in the skeleton. 2580 /// \param TripCount Value to be used for the trip count. 2581 /// \param F Function in which to insert the BasicBlocks. 2582 /// \param PreInsertBefore Where to insert BBs that execute before the body, 2583 /// typically the body itself. 2584 /// \param PostInsertBefore Where to insert BBs that execute after the body. 2585 /// \param Name Base name used to derive BB 2586 /// and instruction names. 2587 /// 2588 /// \returns The CanonicalLoopInfo that represents the emitted loop. 2589 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, 2590 Function *F, 2591 BasicBlock *PreInsertBefore, 2592 BasicBlock *PostInsertBefore, 2593 const Twine &Name = {}); 2594 /// OMP Offload Info Metadata name string 2595 const std::string ompOffloadInfoName = "omp_offload.info"; 2596 2597 /// Loads all the offload entries information from the host IR 2598 /// metadata. This function is only meant to be used with device code 2599 /// generation. 2600 /// 2601 /// \param M Module to load Metadata info from. Module passed maybe 2602 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. 2603 void loadOffloadInfoMetadata(Module &M); 2604 2605 /// Loads all the offload entries information from the host IR 2606 /// metadata read from the file passed in as the HostFilePath argument. This 2607 /// function is only meant to be used with device code generation. 2608 /// 2609 /// \param HostFilePath The path to the host IR file, 2610 /// used to load in offload metadata for the device, allowing host and device 2611 /// to maintain the same metadata mapping. 2612 void loadOffloadInfoMetadata(StringRef HostFilePath); 2613 2614 /// Gets (if variable with the given name already exist) or creates 2615 /// internal global variable with the specified Name. The created variable has 2616 /// linkage CommonLinkage by default and is initialized by null value. 2617 /// \param Ty Type of the global variable. If it is exist already the type 2618 /// must be the same. 2619 /// \param Name Name of the variable. 2620 GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, 2621 unsigned AddressSpace = 0); 2622 2623 /// Create a global function to register OpenMP requires flags into the 2624 /// runtime, according to the `Config`. 2625 /// 2626 /// This function should be added to the list of constructors of the 2627 /// compilation unit in order to be called before other OpenMP runtime 2628 /// functions. 2629 /// 2630 /// \param Name Name of the created function. 2631 Function *createRegisterRequires(StringRef Name); 2632 }; 2633 2634 /// Class to represented the control flow structure of an OpenMP canonical loop. 2635 /// 2636 /// The control-flow structure is standardized for easy consumption by 2637 /// directives associated with loops. For instance, the worksharing-loop 2638 /// construct may change this control flow such that each loop iteration is 2639 /// executed on only one thread. The constraints of a canonical loop in brief 2640 /// are: 2641 /// 2642 /// * The number of loop iterations must have been computed before entering the 2643 /// loop. 2644 /// 2645 /// * Has an (unsigned) logical induction variable that starts at zero and 2646 /// increments by one. 2647 /// 2648 /// * The loop's CFG itself has no side-effects. The OpenMP specification 2649 /// itself allows side-effects, but the order in which they happen, including 2650 /// how often or whether at all, is unspecified. We expect that the frontend 2651 /// will emit those side-effect instructions somewhere (e.g. before the loop) 2652 /// such that the CanonicalLoopInfo itself can be side-effect free. 2653 /// 2654 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated 2655 /// execution of a loop body that satifies these constraints. It does NOT 2656 /// represent arbitrary SESE regions that happen to contain a loop. Do not use 2657 /// CanonicalLoopInfo for such purposes. 2658 /// 2659 /// The control flow can be described as follows: 2660 /// 2661 /// Preheader 2662 /// | 2663 /// /-> Header 2664 /// | | 2665 /// | Cond---\ 2666 /// | | | 2667 /// | Body | 2668 /// | | | | 2669 /// | <...> | 2670 /// | | | | 2671 /// \--Latch | 2672 /// | 2673 /// Exit 2674 /// | 2675 /// After 2676 /// 2677 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, 2678 /// including) and end at AfterIP (at the After's first instruction, excluding). 2679 /// That is, instructions in the Preheader and After blocks (except the 2680 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have 2681 /// side-effects. Typically, the Preheader is used to compute the loop's trip 2682 /// count. The instructions from BodyIP (at the Body block's first instruction, 2683 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's 2684 /// control and thus can have side-effects. The body block is the single entry 2685 /// point into the loop body, which may contain arbitrary control flow as long 2686 /// as all control paths eventually branch to the Latch block. 2687 /// 2688 /// TODO: Consider adding another standardized BasicBlock between Body CFG and 2689 /// Latch to guarantee that there is only a single edge to the latch. It would 2690 /// make loop transformations easier to not needing to consider multiple 2691 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us 2692 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that 2693 /// executes after each body iteration. 2694 /// 2695 /// There must be no loop-carried dependencies through llvm::Values. This is 2696 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is 2697 /// for the induction variable. 2698 /// 2699 /// All code in Header, Cond, Latch and Exit (plus the terminator of the 2700 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked 2701 /// by assertOK(). They are expected to not be modified unless explicitly 2702 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP 2703 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, 2704 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its 2705 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used 2706 /// anymore as its underlying control flow may not exist anymore. 2707 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop 2708 /// may also return a new CanonicalLoopInfo that can be passed to other 2709 /// loop-associated construct implementing methods. These loop-transforming 2710 /// methods may either create a new CanonicalLoopInfo usually using 2711 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and 2712 /// modify one of the input CanonicalLoopInfo and return it as representing the 2713 /// modified loop. What is done is an implementation detail of 2714 /// transformation-implementing method and callers should always assume that the 2715 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. 2716 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one 2717 /// created by createCanonicalLoop, such that transforming methods do not have 2718 /// to special case where the CanonicalLoopInfo originated from. 2719 /// 2720 /// Generally, methods consuming CanonicalLoopInfo do not need an 2721 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the 2722 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless 2723 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate 2724 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, 2725 /// any InsertPoint in the Preheader, After or Block can still be used after 2726 /// calling such a method. 2727 /// 2728 /// TODO: Provide mechanisms for exception handling and cancellation points. 2729 /// 2730 /// Defined outside OpenMPIRBuilder because nested classes cannot be 2731 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. 2732 class CanonicalLoopInfo { 2733 friend class OpenMPIRBuilder; 2734 2735 private: 2736 BasicBlock *Header = nullptr; 2737 BasicBlock *Cond = nullptr; 2738 BasicBlock *Latch = nullptr; 2739 BasicBlock *Exit = nullptr; 2740 2741 /// Add the control blocks of this loop to \p BBs. 2742 /// 2743 /// This does not include any block from the body, including the one returned 2744 /// by getBody(). 2745 /// 2746 /// FIXME: This currently includes the Preheader and After blocks even though 2747 /// their content is (mostly) not under CanonicalLoopInfo's control. 2748 /// Re-evaluated whether this makes sense. 2749 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); 2750 2751 /// Sets the number of loop iterations to the given value. This value must be 2752 /// valid in the condition block (i.e., defined in the preheader) and is 2753 /// interpreted as an unsigned integer. 2754 void setTripCount(Value *TripCount); 2755 2756 /// Replace all uses of the canonical induction variable in the loop body with 2757 /// a new one. 2758 /// 2759 /// The intended use case is to update the induction variable for an updated 2760 /// iteration space such that it can stay normalized in the 0...tripcount-1 2761 /// range. 2762 /// 2763 /// The \p Updater is called with the (presumable updated) current normalized 2764 /// induction variable and is expected to return the value that uses of the 2765 /// pre-updated induction values should use instead, typically dependent on 2766 /// the new induction variable. This is a lambda (instead of e.g. just passing 2767 /// the new value) to be able to distinguish the uses of the pre-updated 2768 /// induction variable and uses of the induction varible to compute the 2769 /// updated induction variable value. 2770 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); 2771 2772 public: 2773 /// Returns whether this object currently represents the IR of a loop. If 2774 /// returning false, it may have been consumed by a loop transformation or not 2775 /// been intialized. Do not use in this case; isValid()2776 bool isValid() const { return Header; } 2777 2778 /// The preheader ensures that there is only a single edge entering the loop. 2779 /// Code that must be execute before any loop iteration can be emitted here, 2780 /// such as computing the loop trip count and begin lifetime markers. Code in 2781 /// the preheader is not considered part of the canonical loop. 2782 BasicBlock *getPreheader() const; 2783 2784 /// The header is the entry for each iteration. In the canonical control flow, 2785 /// it only contains the PHINode for the induction variable. getHeader()2786 BasicBlock *getHeader() const { 2787 assert(isValid() && "Requires a valid canonical loop"); 2788 return Header; 2789 } 2790 2791 /// The condition block computes whether there is another loop iteration. If 2792 /// yes, branches to the body; otherwise to the exit block. getCond()2793 BasicBlock *getCond() const { 2794 assert(isValid() && "Requires a valid canonical loop"); 2795 return Cond; 2796 } 2797 2798 /// The body block is the single entry for a loop iteration and not controlled 2799 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must 2800 /// eventually branch to the \p Latch block. getBody()2801 BasicBlock *getBody() const { 2802 assert(isValid() && "Requires a valid canonical loop"); 2803 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); 2804 } 2805 2806 /// Reaching the latch indicates the end of the loop body code. In the 2807 /// canonical control flow, it only contains the increment of the induction 2808 /// variable. getLatch()2809 BasicBlock *getLatch() const { 2810 assert(isValid() && "Requires a valid canonical loop"); 2811 return Latch; 2812 } 2813 2814 /// Reaching the exit indicates no more iterations are being executed. getExit()2815 BasicBlock *getExit() const { 2816 assert(isValid() && "Requires a valid canonical loop"); 2817 return Exit; 2818 } 2819 2820 /// The after block is intended for clean-up code such as lifetime end 2821 /// markers. It is separate from the exit block to ensure, analogous to the 2822 /// preheader, it having just a single entry edge and being free from PHI 2823 /// nodes should there be multiple loop exits (such as from break 2824 /// statements/cancellations). getAfter()2825 BasicBlock *getAfter() const { 2826 assert(isValid() && "Requires a valid canonical loop"); 2827 return Exit->getSingleSuccessor(); 2828 } 2829 2830 /// Returns the llvm::Value containing the number of loop iterations. It must 2831 /// be valid in the preheader and always interpreted as an unsigned integer of 2832 /// any bit-width. getTripCount()2833 Value *getTripCount() const { 2834 assert(isValid() && "Requires a valid canonical loop"); 2835 Instruction *CmpI = &Cond->front(); 2836 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 2837 return CmpI->getOperand(1); 2838 } 2839 2840 /// Returns the instruction representing the current logical induction 2841 /// variable. Always unsigned, always starting at 0 with an increment of one. getIndVar()2842 Instruction *getIndVar() const { 2843 assert(isValid() && "Requires a valid canonical loop"); 2844 Instruction *IndVarPHI = &Header->front(); 2845 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); 2846 return IndVarPHI; 2847 } 2848 2849 /// Return the type of the induction variable (and the trip count). getIndVarType()2850 Type *getIndVarType() const { 2851 assert(isValid() && "Requires a valid canonical loop"); 2852 return getIndVar()->getType(); 2853 } 2854 2855 /// Return the insertion point for user code before the loop. getPreheaderIP()2856 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { 2857 assert(isValid() && "Requires a valid canonical loop"); 2858 BasicBlock *Preheader = getPreheader(); 2859 return {Preheader, std::prev(Preheader->end())}; 2860 }; 2861 2862 /// Return the insertion point for user code in the body. getBodyIP()2863 OpenMPIRBuilder::InsertPointTy getBodyIP() const { 2864 assert(isValid() && "Requires a valid canonical loop"); 2865 BasicBlock *Body = getBody(); 2866 return {Body, Body->begin()}; 2867 }; 2868 2869 /// Return the insertion point for user code after the loop. getAfterIP()2870 OpenMPIRBuilder::InsertPointTy getAfterIP() const { 2871 assert(isValid() && "Requires a valid canonical loop"); 2872 BasicBlock *After = getAfter(); 2873 return {After, After->begin()}; 2874 }; 2875 getFunction()2876 Function *getFunction() const { 2877 assert(isValid() && "Requires a valid canonical loop"); 2878 return Header->getParent(); 2879 } 2880 2881 /// Consistency self-check. 2882 void assertOK() const; 2883 2884 /// Invalidate this loop. That is, the underlying IR does not fulfill the 2885 /// requirements of an OpenMP canonical loop anymore. 2886 void invalidate(); 2887 }; 2888 2889 } // end namespace llvm 2890 2891 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 2892