1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines some vectorizer utilities. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_VECTORUTILS_H 15 #define LLVM_ANALYSIS_VECTORUTILS_H 16 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/Analysis/LoopAccessAnalysis.h" 19 #include "llvm/Analysis/TargetLibraryInfo.h" 20 #include "llvm/IR/IRBuilder.h" 21 22 namespace llvm { 23 24 template <typename T> class ArrayRef; 25 class DemandedBits; 26 class GetElementPtrInst; 27 template <typename InstTy> class InterleaveGroup; 28 class Loop; 29 class ScalarEvolution; 30 class TargetTransformInfo; 31 class Type; 32 class Value; 33 34 namespace Intrinsic { 35 enum ID : unsigned; 36 } 37 38 /// Identify if the intrinsic is trivially vectorizable. 39 /// This method returns true if the intrinsic's argument types are all 40 /// scalars for the scalar form of the intrinsic and all vectors for 41 /// the vector form of the intrinsic. 42 bool isTriviallyVectorizable(Intrinsic::ID ID); 43 44 /// Identifies if the intrinsic has a scalar operand. It checks for 45 /// ctlz,cttz and powi special intrinsics whose argument is scalar. 46 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); 47 48 /// Returns intrinsic ID for call. 49 /// For the input call instruction it finds mapping intrinsic and returns 50 /// its intrinsic ID, in case it does not found it return not_intrinsic. 51 Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, 52 const TargetLibraryInfo *TLI); 53 54 /// Find the operand of the GEP that should be checked for consecutive 55 /// stores. This ignores trailing indices that have no effect on the final 56 /// pointer. 57 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); 58 59 /// If the argument is a GEP, then returns the operand identified by 60 /// getGEPInductionOperand. However, if there is some other non-loop-invariant 61 /// operand, it returns that instead. 62 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); 63 64 /// If a value has only one user that is a CastInst, return it. 65 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); 66 67 /// Get the stride of a pointer access in a loop. Looks for symbolic 68 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. 69 Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); 70 71 /// Given a vector and an element number, see if the scalar value is 72 /// already around as a register, for example if it were inserted then extracted 73 /// from the vector. 74 Value *findScalarElement(Value *V, unsigned EltNo); 75 76 /// Get splat value if the input is a splat vector or return nullptr. 77 /// The value may be extracted from a splat constants vector or from 78 /// a sequence of instructions that broadcast a single value into a vector. 79 const Value *getSplatValue(const Value *V); 80 81 /// Compute a map of integer instructions to their minimum legal type 82 /// size. 83 /// 84 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int 85 /// type (e.g. i32) whenever arithmetic is performed on them. 86 /// 87 /// For targets with native i8 or i16 operations, usually InstCombine can shrink 88 /// the arithmetic type down again. However InstCombine refuses to create 89 /// illegal types, so for targets without i8 or i16 registers, the lengthening 90 /// and shrinking remains. 91 /// 92 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when 93 /// their scalar equivalents do not, so during vectorization it is important to 94 /// remove these lengthens and truncates when deciding the profitability of 95 /// vectorization. 96 /// 97 /// This function analyzes the given range of instructions and determines the 98 /// minimum type size each can be converted to. It attempts to remove or 99 /// minimize type size changes across each def-use chain, so for example in the 100 /// following code: 101 /// 102 /// %1 = load i8, i8* 103 /// %2 = add i8 %1, 2 104 /// %3 = load i16, i16* 105 /// %4 = zext i8 %2 to i32 106 /// %5 = zext i16 %3 to i32 107 /// %6 = add i32 %4, %5 108 /// %7 = trunc i32 %6 to i16 109 /// 110 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes 111 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. 112 /// 113 /// If the optional TargetTransformInfo is provided, this function tries harder 114 /// to do less work by only looking at illegal types. 115 MapVector<Instruction*, uint64_t> 116 computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, 117 DemandedBits &DB, 118 const TargetTransformInfo *TTI=nullptr); 119 120 /// Compute the union of two access-group lists. 121 /// 122 /// If the list contains just one access group, it is returned directly. If the 123 /// list is empty, returns nullptr. 124 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2); 125 126 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2 127 /// are both in. If either instruction does not access memory at all, it is 128 /// considered to be in every list. 129 /// 130 /// If the list contains just one access group, it is returned directly. If the 131 /// list is empty, returns nullptr. 132 MDNode *intersectAccessGroups(const Instruction *Inst1, 133 const Instruction *Inst2); 134 135 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, 136 /// MD_nontemporal, MD_access_group]. 137 /// For K in Kinds, we get the MDNode for K from each of the 138 /// elements of VL, compute their "intersection" (i.e., the most generic 139 /// metadata value that covers all of the individual values), and set I's 140 /// metadata for M equal to the intersection value. 141 /// 142 /// This function always sets a (possibly null) value for each K in Kinds. 143 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); 144 145 /// Create a mask that filters the members of an interleave group where there 146 /// are gaps. 147 /// 148 /// For example, the mask for \p Group with interleave-factor 3 149 /// and \p VF 4, that has only its first member present is: 150 /// 151 /// <1,0,0,1,0,0,1,0,0,1,0,0> 152 /// 153 /// Note: The result is a mask of 0's and 1's, as opposed to the other 154 /// create[*]Mask() utilities which create a shuffle mask (mask that 155 /// consists of indices). 156 Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, 157 const InterleaveGroup<Instruction> &Group); 158 159 /// Create a mask with replicated elements. 160 /// 161 /// This function creates a shuffle mask for replicating each of the \p VF 162 /// elements in a vector \p ReplicationFactor times. It can be used to 163 /// transform a mask of \p VF elements into a mask of 164 /// \p VF * \p ReplicationFactor elements used by a predicated 165 /// interleaved-group of loads/stores whose Interleaved-factor == 166 /// \p ReplicationFactor. 167 /// 168 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: 169 /// 170 /// <0,0,0,1,1,1,2,2,2,3,3,3> 171 Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, 172 unsigned VF); 173 174 /// Create an interleave shuffle mask. 175 /// 176 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of 177 /// vectorization factor \p VF into a single wide vector. The mask is of the 178 /// form: 179 /// 180 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> 181 /// 182 /// For example, the mask for VF = 4 and NumVecs = 2 is: 183 /// 184 /// <0, 4, 1, 5, 2, 6, 3, 7>. 185 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF, 186 unsigned NumVecs); 187 188 /// Create a stride shuffle mask. 189 /// 190 /// This function creates a shuffle mask whose elements begin at \p Start and 191 /// are incremented by \p Stride. The mask can be used to deinterleave an 192 /// interleaved vector into separate vectors of vectorization factor \p VF. The 193 /// mask is of the form: 194 /// 195 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> 196 /// 197 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: 198 /// 199 /// <0, 2, 4, 6> 200 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start, 201 unsigned Stride, unsigned VF); 202 203 /// Create a sequential shuffle mask. 204 /// 205 /// This function creates shuffle mask whose elements are sequential and begin 206 /// at \p Start. The mask contains \p NumInts integers and is padded with \p 207 /// NumUndefs undef values. The mask is of the form: 208 /// 209 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> 210 /// 211 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: 212 /// 213 /// <0, 1, 2, 3, undef, undef, undef, undef> 214 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start, 215 unsigned NumInts, unsigned NumUndefs); 216 217 /// Concatenate a list of vectors. 218 /// 219 /// This function generates code that concatenate the vectors in \p Vecs into a 220 /// single large vector. The number of vectors should be greater than one, and 221 /// their element types should be the same. The number of elements in the 222 /// vectors should also be the same; however, if the last vector has fewer 223 /// elements, it will be padded with undefs. 224 Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs); 225 226 /// The group of interleaved loads/stores sharing the same stride and 227 /// close to each other. 228 /// 229 /// Each member in this group has an index starting from 0, and the largest 230 /// index should be less than interleaved factor, which is equal to the absolute 231 /// value of the access's stride. 232 /// 233 /// E.g. An interleaved load group of factor 4: 234 /// for (unsigned i = 0; i < 1024; i+=4) { 235 /// a = A[i]; // Member of index 0 236 /// b = A[i+1]; // Member of index 1 237 /// d = A[i+3]; // Member of index 3 238 /// ... 239 /// } 240 /// 241 /// An interleaved store group of factor 4: 242 /// for (unsigned i = 0; i < 1024; i+=4) { 243 /// ... 244 /// A[i] = a; // Member of index 0 245 /// A[i+1] = b; // Member of index 1 246 /// A[i+2] = c; // Member of index 2 247 /// A[i+3] = d; // Member of index 3 248 /// } 249 /// 250 /// Note: the interleaved load group could have gaps (missing members), but 251 /// the interleaved store group doesn't allow gaps. 252 template <typename InstTy> class InterleaveGroup { 253 public: InterleaveGroup(unsigned Factor,bool Reverse,unsigned Align)254 InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align) 255 : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} 256 InterleaveGroup(InstTy * Instr,int Stride,unsigned Align)257 InterleaveGroup(InstTy *Instr, int Stride, unsigned Align) 258 : Align(Align), InsertPos(Instr) { 259 assert(Align && "The alignment should be non-zero"); 260 261 Factor = std::abs(Stride); 262 assert(Factor > 1 && "Invalid interleave factor"); 263 264 Reverse = Stride < 0; 265 Members[0] = Instr; 266 } 267 isReverse()268 bool isReverse() const { return Reverse; } getFactor()269 unsigned getFactor() const { return Factor; } getAlignment()270 unsigned getAlignment() const { return Align; } getNumMembers()271 unsigned getNumMembers() const { return Members.size(); } 272 273 /// Try to insert a new member \p Instr with index \p Index and 274 /// alignment \p NewAlign. The index is related to the leader and it could be 275 /// negative if it is the new leader. 276 /// 277 /// \returns false if the instruction doesn't belong to the group. insertMember(InstTy * Instr,int Index,unsigned NewAlign)278 bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) { 279 assert(NewAlign && "The new member's alignment should be non-zero"); 280 281 int Key = Index + SmallestKey; 282 283 // Skip if there is already a member with the same index. 284 if (Members.find(Key) != Members.end()) 285 return false; 286 287 if (Key > LargestKey) { 288 // The largest index is always less than the interleave factor. 289 if (Index >= static_cast<int>(Factor)) 290 return false; 291 292 LargestKey = Key; 293 } else if (Key < SmallestKey) { 294 // The largest index is always less than the interleave factor. 295 if (LargestKey - Key >= static_cast<int>(Factor)) 296 return false; 297 298 SmallestKey = Key; 299 } 300 301 // It's always safe to select the minimum alignment. 302 Align = std::min(Align, NewAlign); 303 Members[Key] = Instr; 304 return true; 305 } 306 307 /// Get the member with the given index \p Index 308 /// 309 /// \returns nullptr if contains no such member. getMember(unsigned Index)310 InstTy *getMember(unsigned Index) const { 311 int Key = SmallestKey + Index; 312 auto Member = Members.find(Key); 313 if (Member == Members.end()) 314 return nullptr; 315 316 return Member->second; 317 } 318 319 /// Get the index for the given member. Unlike the key in the member 320 /// map, the index starts from 0. getIndex(const InstTy * Instr)321 unsigned getIndex(const InstTy *Instr) const { 322 for (auto I : Members) { 323 if (I.second == Instr) 324 return I.first - SmallestKey; 325 } 326 327 llvm_unreachable("InterleaveGroup contains no such member"); 328 } 329 getInsertPos()330 InstTy *getInsertPos() const { return InsertPos; } setInsertPos(InstTy * Inst)331 void setInsertPos(InstTy *Inst) { InsertPos = Inst; } 332 333 /// Add metadata (e.g. alias info) from the instructions in this group to \p 334 /// NewInst. 335 /// 336 /// FIXME: this function currently does not add noalias metadata a'la 337 /// addNewMedata. To do that we need to compute the intersection of the 338 /// noalias info from all members. 339 void addMetadata(InstTy *NewInst) const; 340 341 /// Returns true if this Group requires a scalar iteration to handle gaps. requiresScalarEpilogue()342 bool requiresScalarEpilogue() const { 343 // If the last member of the Group exists, then a scalar epilog is not 344 // needed for this group. 345 if (getMember(getFactor() - 1)) 346 return false; 347 348 // We have a group with gaps. It therefore cannot be a group of stores, 349 // and it can't be a reversed access, because such groups get invalidated. 350 assert(!getMember(0)->mayWriteToMemory() && 351 "Group should have been invalidated"); 352 assert(!isReverse() && "Group should have been invalidated"); 353 354 // This is a group of loads, with gaps, and without a last-member 355 return true; 356 } 357 358 private: 359 unsigned Factor; // Interleave Factor. 360 bool Reverse; 361 unsigned Align; 362 DenseMap<int, InstTy *> Members; 363 int SmallestKey = 0; 364 int LargestKey = 0; 365 366 // To avoid breaking dependences, vectorized instructions of an interleave 367 // group should be inserted at either the first load or the last store in 368 // program order. 369 // 370 // E.g. %even = load i32 // Insert Position 371 // %add = add i32 %even // Use of %even 372 // %odd = load i32 373 // 374 // store i32 %even 375 // %odd = add i32 // Def of %odd 376 // store i32 %odd // Insert Position 377 InstTy *InsertPos; 378 }; 379 380 /// Drive the analysis of interleaved memory accesses in the loop. 381 /// 382 /// Use this class to analyze interleaved accesses only when we can vectorize 383 /// a loop. Otherwise it's meaningless to do analysis as the vectorization 384 /// on interleaved accesses is unsafe. 385 /// 386 /// The analysis collects interleave groups and records the relationships 387 /// between the member and the group in a map. 388 class InterleavedAccessInfo { 389 public: InterleavedAccessInfo(PredicatedScalarEvolution & PSE,Loop * L,DominatorTree * DT,LoopInfo * LI,const LoopAccessInfo * LAI)390 InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, 391 DominatorTree *DT, LoopInfo *LI, 392 const LoopAccessInfo *LAI) 393 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} 394 ~InterleavedAccessInfo()395 ~InterleavedAccessInfo() { reset(); } 396 397 /// Analyze the interleaved accesses and collect them in interleave 398 /// groups. Substitute symbolic strides using \p Strides. 399 /// Consider also predicated loads/stores in the analysis if 400 /// \p EnableMaskedInterleavedGroup is true. 401 void analyzeInterleaving(bool EnableMaskedInterleavedGroup); 402 403 /// Invalidate groups, e.g., in case all blocks in loop will be predicated 404 /// contrary to original assumption. Although we currently prevent group 405 /// formation for predicated accesses, we may be able to relax this limitation 406 /// in the future once we handle more complicated blocks. reset()407 void reset() { 408 SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet; 409 // Avoid releasing a pointer twice. 410 for (auto &I : InterleaveGroupMap) 411 DelSet.insert(I.second); 412 for (auto *Ptr : DelSet) 413 delete Ptr; 414 InterleaveGroupMap.clear(); 415 RequiresScalarEpilogue = false; 416 } 417 418 419 /// Check if \p Instr belongs to any interleave group. isInterleaved(Instruction * Instr)420 bool isInterleaved(Instruction *Instr) const { 421 return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); 422 } 423 424 /// Get the interleave group that \p Instr belongs to. 425 /// 426 /// \returns nullptr if doesn't have such group. 427 InterleaveGroup<Instruction> * getInterleaveGroup(const Instruction * Instr)428 getInterleaveGroup(const Instruction *Instr) const { 429 if (InterleaveGroupMap.count(Instr)) 430 return InterleaveGroupMap.find(Instr)->second; 431 return nullptr; 432 } 433 434 iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> getInterleaveGroups()435 getInterleaveGroups() { 436 return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); 437 } 438 439 /// Returns true if an interleaved group that may access memory 440 /// out-of-bounds requires a scalar epilogue iteration for correctness. requiresScalarEpilogue()441 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } 442 443 /// Invalidate groups that require a scalar epilogue (due to gaps). This can 444 /// happen when optimizing for size forbids a scalar epilogue, and the gap 445 /// cannot be filtered by masking the load/store. 446 void invalidateGroupsRequiringScalarEpilogue(); 447 448 private: 449 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. 450 /// Simplifies SCEV expressions in the context of existing SCEV assumptions. 451 /// The interleaved access analysis can also add new predicates (for example 452 /// by versioning strides of pointers). 453 PredicatedScalarEvolution &PSE; 454 455 Loop *TheLoop; 456 DominatorTree *DT; 457 LoopInfo *LI; 458 const LoopAccessInfo *LAI; 459 460 /// True if the loop may contain non-reversed interleaved groups with 461 /// out-of-bounds accesses. We ensure we don't speculatively access memory 462 /// out-of-bounds by executing at least one scalar epilogue iteration. 463 bool RequiresScalarEpilogue = false; 464 465 /// Holds the relationships between the members and the interleave group. 466 DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap; 467 468 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups; 469 470 /// Holds dependences among the memory accesses in the loop. It maps a source 471 /// access to a set of dependent sink accesses. 472 DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; 473 474 /// The descriptor for a strided memory access. 475 struct StrideDescriptor { 476 StrideDescriptor() = default; StrideDescriptorStrideDescriptor477 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, 478 unsigned Align) 479 : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} 480 481 // The access's stride. It is negative for a reverse access. 482 int64_t Stride = 0; 483 484 // The scalar expression of this access. 485 const SCEV *Scev = nullptr; 486 487 // The size of the memory object. 488 uint64_t Size = 0; 489 490 // The alignment of this access. 491 unsigned Align = 0; 492 }; 493 494 /// A type for holding instructions and their stride descriptors. 495 using StrideEntry = std::pair<Instruction *, StrideDescriptor>; 496 497 /// Create a new interleave group with the given instruction \p Instr, 498 /// stride \p Stride and alignment \p Align. 499 /// 500 /// \returns the newly created interleave group. 501 InterleaveGroup<Instruction> * createInterleaveGroup(Instruction * Instr,int Stride,unsigned Align)502 createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { 503 assert(!InterleaveGroupMap.count(Instr) && 504 "Already in an interleaved access group"); 505 InterleaveGroupMap[Instr] = 506 new InterleaveGroup<Instruction>(Instr, Stride, Align); 507 InterleaveGroups.insert(InterleaveGroupMap[Instr]); 508 return InterleaveGroupMap[Instr]; 509 } 510 511 /// Release the group and remove all the relationships. releaseGroup(InterleaveGroup<Instruction> * Group)512 void releaseGroup(InterleaveGroup<Instruction> *Group) { 513 for (unsigned i = 0; i < Group->getFactor(); i++) 514 if (Instruction *Member = Group->getMember(i)) 515 InterleaveGroupMap.erase(Member); 516 517 InterleaveGroups.erase(Group); 518 delete Group; 519 } 520 521 /// Collect all the accesses with a constant stride in program order. 522 void collectConstStrideAccesses( 523 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, 524 const ValueToValueMap &Strides); 525 526 /// Returns true if \p Stride is allowed in an interleaved group. 527 static bool isStrided(int Stride); 528 529 /// Returns true if \p BB is a predicated block. isPredicated(BasicBlock * BB)530 bool isPredicated(BasicBlock *BB) const { 531 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 532 } 533 534 /// Returns true if LoopAccessInfo can be used for dependence queries. areDependencesValid()535 bool areDependencesValid() const { 536 return LAI && LAI->getDepChecker().getDependences(); 537 } 538 539 /// Returns true if memory accesses \p A and \p B can be reordered, if 540 /// necessary, when constructing interleaved groups. 541 /// 542 /// \p A must precede \p B in program order. We return false if reordering is 543 /// not necessary or is prevented because \p A and \p B may be dependent. canReorderMemAccessesForInterleavedGroups(StrideEntry * A,StrideEntry * B)544 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, 545 StrideEntry *B) const { 546 // Code motion for interleaved accesses can potentially hoist strided loads 547 // and sink strided stores. The code below checks the legality of the 548 // following two conditions: 549 // 550 // 1. Potentially moving a strided load (B) before any store (A) that 551 // precedes B, or 552 // 553 // 2. Potentially moving a strided store (A) after any load or store (B) 554 // that A precedes. 555 // 556 // It's legal to reorder A and B if we know there isn't a dependence from A 557 // to B. Note that this determination is conservative since some 558 // dependences could potentially be reordered safely. 559 560 // A is potentially the source of a dependence. 561 auto *Src = A->first; 562 auto SrcDes = A->second; 563 564 // B is potentially the sink of a dependence. 565 auto *Sink = B->first; 566 auto SinkDes = B->second; 567 568 // Code motion for interleaved accesses can't violate WAR dependences. 569 // Thus, reordering is legal if the source isn't a write. 570 if (!Src->mayWriteToMemory()) 571 return true; 572 573 // At least one of the accesses must be strided. 574 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) 575 return true; 576 577 // If dependence information is not available from LoopAccessInfo, 578 // conservatively assume the instructions can't be reordered. 579 if (!areDependencesValid()) 580 return false; 581 582 // If we know there is a dependence from source to sink, assume the 583 // instructions can't be reordered. Otherwise, reordering is legal. 584 return Dependences.find(Src) == Dependences.end() || 585 !Dependences.lookup(Src).count(Sink); 586 } 587 588 /// Collect the dependences from LoopAccessInfo. 589 /// 590 /// We process the dependences once during the interleaved access analysis to 591 /// enable constant-time dependence queries. collectDependences()592 void collectDependences() { 593 if (!areDependencesValid()) 594 return; 595 auto *Deps = LAI->getDepChecker().getDependences(); 596 for (auto Dep : *Deps) 597 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); 598 } 599 }; 600 601 } // llvm namespace 602 603 #endif 604