1 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass (at IR level) to replace atomic instructions with 11 // either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/Passes.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/IRBuilder.h" 18 #include "llvm/IR/InstIterator.h" 19 #include "llvm/IR/Instructions.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Target/TargetLowering.h" 24 #include "llvm/Target/TargetMachine.h" 25 #include "llvm/Target/TargetSubtargetInfo.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "atomic-expand" 30 31 namespace { 32 class AtomicExpand: public FunctionPass { 33 const TargetMachine *TM; 34 public: 35 static char ID; // Pass identification, replacement for typeid 36 explicit AtomicExpand(const TargetMachine *TM = nullptr) 37 : FunctionPass(ID), TM(TM) { 38 initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); 39 } 40 41 bool runOnFunction(Function &F) override; 42 43 private: 44 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, 45 bool IsStore, bool IsLoad); 46 bool expandAtomicLoad(LoadInst *LI); 47 bool expandAtomicLoadToLL(LoadInst *LI); 48 bool expandAtomicLoadToCmpXchg(LoadInst *LI); 49 bool expandAtomicStore(StoreInst *SI); 50 bool expandAtomicRMW(AtomicRMWInst *AI); 51 bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); 52 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); 53 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 54 bool isIdempotentRMW(AtomicRMWInst *AI); 55 bool simplifyIdempotentRMW(AtomicRMWInst *AI); 56 }; 57 } 58 59 char AtomicExpand::ID = 0; 60 char &llvm::AtomicExpandID = AtomicExpand::ID; 61 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", 62 "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", 63 false, false) 64 65 FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { 66 return new AtomicExpand(TM); 67 } 68 69 bool AtomicExpand::runOnFunction(Function &F) { 70 if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) 71 return false; 72 auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); 73 74 SmallVector<Instruction *, 1> AtomicInsts; 75 76 // Changing control-flow while iterating through it is a bad idea, so gather a 77 // list of all atomic instructions before we start. 78 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { 79 if (I->isAtomic()) 80 AtomicInsts.push_back(&*I); 81 } 82 83 bool MadeChange = false; 84 for (auto I : AtomicInsts) { 85 auto LI = dyn_cast<LoadInst>(I); 86 auto SI = dyn_cast<StoreInst>(I); 87 auto RMWI = dyn_cast<AtomicRMWInst>(I); 88 auto CASI = dyn_cast<AtomicCmpXchgInst>(I); 89 assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) && 90 "Unknown atomic instruction"); 91 92 auto FenceOrdering = Monotonic; 93 bool IsStore, IsLoad; 94 if (TargetLowering->getInsertFencesForAtomic()) { 95 if (LI && isAtLeastAcquire(LI->getOrdering())) { 96 FenceOrdering = LI->getOrdering(); 97 LI->setOrdering(Monotonic); 98 IsStore = false; 99 IsLoad = true; 100 } else if (SI && isAtLeastRelease(SI->getOrdering())) { 101 FenceOrdering = SI->getOrdering(); 102 SI->setOrdering(Monotonic); 103 IsStore = true; 104 IsLoad = false; 105 } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || 106 isAtLeastAcquire(RMWI->getOrdering()))) { 107 FenceOrdering = RMWI->getOrdering(); 108 RMWI->setOrdering(Monotonic); 109 IsStore = IsLoad = true; 110 } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && 111 (isAtLeastRelease(CASI->getSuccessOrdering()) || 112 isAtLeastAcquire(CASI->getSuccessOrdering()))) { 113 // If a compare and swap is lowered to LL/SC, we can do smarter fence 114 // insertion, with a stronger one on the success path than on the 115 // failure path. As a result, fence insertion is directly done by 116 // expandAtomicCmpXchg in that case. 117 FenceOrdering = CASI->getSuccessOrdering(); 118 CASI->setSuccessOrdering(Monotonic); 119 CASI->setFailureOrdering(Monotonic); 120 IsStore = IsLoad = true; 121 } 122 123 if (FenceOrdering != Monotonic) { 124 MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); 125 } 126 } 127 128 if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { 129 MadeChange |= expandAtomicLoad(LI); 130 } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { 131 MadeChange |= expandAtomicStore(SI); 132 } else if (RMWI) { 133 // There are two different ways of expanding RMW instructions: 134 // - into a load if it is idempotent 135 // - into a Cmpxchg/LL-SC loop otherwise 136 // we try them in that order. 137 MadeChange |= (isIdempotentRMW(RMWI) && 138 simplifyIdempotentRMW(RMWI)) || 139 (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) && 140 expandAtomicRMW(RMWI)); 141 } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { 142 MadeChange |= expandAtomicCmpXchg(CASI); 143 } 144 } 145 return MadeChange; 146 } 147 148 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, 149 bool IsStore, bool IsLoad) { 150 IRBuilder<> Builder(I); 151 152 auto LeadingFence = 153 TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( 154 Builder, Order, IsStore, IsLoad); 155 156 auto TrailingFence = 157 TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( 158 Builder, Order, IsStore, IsLoad); 159 // The trailing fence is emitted before the instruction instead of after 160 // because there is no easy way of setting Builder insertion point after 161 // an instruction. So we must erase it from the BB, and insert it back 162 // in the right place. 163 // We have a guard here because not every atomic operation generates a 164 // trailing fence. 165 if (TrailingFence) { 166 TrailingFence->removeFromParent(); 167 TrailingFence->insertAfter(I); 168 } 169 170 return (LeadingFence || TrailingFence); 171 } 172 173 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { 174 if (TM->getSubtargetImpl() 175 ->getTargetLowering() 176 ->hasLoadLinkedStoreConditional()) 177 return expandAtomicLoadToLL(LI); 178 else 179 return expandAtomicLoadToCmpXchg(LI); 180 } 181 182 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { 183 auto TLI = TM->getSubtargetImpl()->getTargetLowering(); 184 IRBuilder<> Builder(LI); 185 186 // On some architectures, load-linked instructions are atomic for larger 187 // sizes than normal loads. For example, the only 64-bit load guaranteed 188 // to be single-copy atomic by ARM is an ldrexd (A3.5.3). 189 Value *Val = 190 TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); 191 192 LI->replaceAllUsesWith(Val); 193 LI->eraseFromParent(); 194 195 return true; 196 } 197 198 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { 199 IRBuilder<> Builder(LI); 200 AtomicOrdering Order = LI->getOrdering(); 201 Value *Addr = LI->getPointerOperand(); 202 Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); 203 Constant *DummyVal = Constant::getNullValue(Ty); 204 205 Value *Pair = Builder.CreateAtomicCmpXchg( 206 Addr, DummyVal, DummyVal, Order, 207 AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 208 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); 209 210 LI->replaceAllUsesWith(Loaded); 211 LI->eraseFromParent(); 212 213 return true; 214 } 215 216 bool AtomicExpand::expandAtomicStore(StoreInst *SI) { 217 // This function is only called on atomic stores that are too large to be 218 // atomic if implemented as a native store. So we replace them by an 219 // atomic swap, that can be implemented for example as a ldrex/strex on ARM 220 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. 221 // It is the responsibility of the target to only return true in 222 // shouldExpandAtomicRMW in cases where this is required and possible. 223 IRBuilder<> Builder(SI); 224 AtomicRMWInst *AI = 225 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), 226 SI->getValueOperand(), SI->getOrdering()); 227 SI->eraseFromParent(); 228 229 // Now we have an appropriate swap instruction, lower it as usual. 230 return expandAtomicRMW(AI); 231 } 232 233 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { 234 if (TM->getSubtargetImpl() 235 ->getTargetLowering() 236 ->hasLoadLinkedStoreConditional()) 237 return expandAtomicRMWToLLSC(AI); 238 else 239 return expandAtomicRMWToCmpXchg(AI); 240 } 241 242 /// Emit IR to implement the given atomicrmw operation on values in registers, 243 /// returning the new value. 244 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, 245 Value *Loaded, Value *Inc) { 246 Value *NewVal; 247 switch (Op) { 248 case AtomicRMWInst::Xchg: 249 return Inc; 250 case AtomicRMWInst::Add: 251 return Builder.CreateAdd(Loaded, Inc, "new"); 252 case AtomicRMWInst::Sub: 253 return Builder.CreateSub(Loaded, Inc, "new"); 254 case AtomicRMWInst::And: 255 return Builder.CreateAnd(Loaded, Inc, "new"); 256 case AtomicRMWInst::Nand: 257 return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); 258 case AtomicRMWInst::Or: 259 return Builder.CreateOr(Loaded, Inc, "new"); 260 case AtomicRMWInst::Xor: 261 return Builder.CreateXor(Loaded, Inc, "new"); 262 case AtomicRMWInst::Max: 263 NewVal = Builder.CreateICmpSGT(Loaded, Inc); 264 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 265 case AtomicRMWInst::Min: 266 NewVal = Builder.CreateICmpSLE(Loaded, Inc); 267 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 268 case AtomicRMWInst::UMax: 269 NewVal = Builder.CreateICmpUGT(Loaded, Inc); 270 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 271 case AtomicRMWInst::UMin: 272 NewVal = Builder.CreateICmpULE(Loaded, Inc); 273 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 274 default: 275 llvm_unreachable("Unknown atomic op"); 276 } 277 } 278 279 bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { 280 auto TLI = TM->getSubtargetImpl()->getTargetLowering(); 281 AtomicOrdering MemOpOrder = AI->getOrdering(); 282 Value *Addr = AI->getPointerOperand(); 283 BasicBlock *BB = AI->getParent(); 284 Function *F = BB->getParent(); 285 LLVMContext &Ctx = F->getContext(); 286 287 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 288 // 289 // The standard expansion we produce is: 290 // [...] 291 // fence? 292 // atomicrmw.start: 293 // %loaded = @load.linked(%addr) 294 // %new = some_op iN %loaded, %incr 295 // %stored = @store_conditional(%new, %addr) 296 // %try_again = icmp i32 ne %stored, 0 297 // br i1 %try_again, label %loop, label %atomicrmw.end 298 // atomicrmw.end: 299 // fence? 300 // [...] 301 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); 302 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 303 304 // This grabs the DebugLoc from AI. 305 IRBuilder<> Builder(AI); 306 307 // The split call above "helpfully" added a branch at the end of BB (to the 308 // wrong place), but we might want a fence too. It's easiest to just remove 309 // the branch entirely. 310 std::prev(BB->end())->eraseFromParent(); 311 Builder.SetInsertPoint(BB); 312 Builder.CreateBr(LoopBB); 313 314 // Start the main loop block now that we've taken care of the preliminaries. 315 Builder.SetInsertPoint(LoopBB); 316 Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); 317 318 Value *NewVal = 319 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); 320 321 Value *StoreSuccess = 322 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); 323 Value *TryAgain = Builder.CreateICmpNE( 324 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 325 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 326 327 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 328 329 AI->replaceAllUsesWith(Loaded); 330 AI->eraseFromParent(); 331 332 return true; 333 } 334 335 bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { 336 AtomicOrdering MemOpOrder = 337 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); 338 Value *Addr = AI->getPointerOperand(); 339 BasicBlock *BB = AI->getParent(); 340 Function *F = BB->getParent(); 341 LLVMContext &Ctx = F->getContext(); 342 343 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 344 // 345 // The standard expansion we produce is: 346 // [...] 347 // %init_loaded = load atomic iN* %addr 348 // br label %loop 349 // loop: 350 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 351 // %new = some_op iN %loaded, %incr 352 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 353 // %new_loaded = extractvalue { iN, i1 } %pair, 0 354 // %success = extractvalue { iN, i1 } %pair, 1 355 // br i1 %success, label %atomicrmw.end, label %loop 356 // atomicrmw.end: 357 // [...] 358 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); 359 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 360 361 // This grabs the DebugLoc from AI. 362 IRBuilder<> Builder(AI); 363 364 // The split call above "helpfully" added a branch at the end of BB (to the 365 // wrong place), but we want a load. It's easiest to just remove 366 // the branch entirely. 367 std::prev(BB->end())->eraseFromParent(); 368 Builder.SetInsertPoint(BB); 369 LoadInst *InitLoaded = Builder.CreateLoad(Addr); 370 // Atomics require at least natural alignment. 371 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); 372 Builder.CreateBr(LoopBB); 373 374 // Start the main loop block now that we've taken care of the preliminaries. 375 Builder.SetInsertPoint(LoopBB); 376 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); 377 Loaded->addIncoming(InitLoaded, BB); 378 379 Value *NewVal = 380 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); 381 382 Value *Pair = Builder.CreateAtomicCmpXchg( 383 Addr, Loaded, NewVal, MemOpOrder, 384 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); 385 Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 386 Loaded->addIncoming(NewLoaded, LoopBB); 387 388 Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); 389 Builder.CreateCondBr(Success, ExitBB, LoopBB); 390 391 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 392 393 AI->replaceAllUsesWith(NewLoaded); 394 AI->eraseFromParent(); 395 396 return true; 397 } 398 399 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 400 auto TLI = TM->getSubtargetImpl()->getTargetLowering(); 401 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 402 AtomicOrdering FailureOrder = CI->getFailureOrdering(); 403 Value *Addr = CI->getPointerOperand(); 404 BasicBlock *BB = CI->getParent(); 405 Function *F = BB->getParent(); 406 LLVMContext &Ctx = F->getContext(); 407 // If getInsertFencesForAtomic() returns true, then the target does not want 408 // to deal with memory orders, and emitLeading/TrailingFence should take care 409 // of everything. Otherwise, emitLeading/TrailingFence are no-op and we 410 // should preserve the ordering. 411 AtomicOrdering MemOpOrder = 412 TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; 413 414 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 415 // 416 // The full expansion we produce is: 417 // [...] 418 // fence? 419 // cmpxchg.start: 420 // %loaded = @load.linked(%addr) 421 // %should_store = icmp eq %loaded, %desired 422 // br i1 %should_store, label %cmpxchg.trystore, 423 // label %cmpxchg.failure 424 // cmpxchg.trystore: 425 // %stored = @store_conditional(%new, %addr) 426 // %success = icmp eq i32 %stored, 0 427 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure 428 // cmpxchg.success: 429 // fence? 430 // br label %cmpxchg.end 431 // cmpxchg.failure: 432 // fence? 433 // br label %cmpxchg.end 434 // cmpxchg.end: 435 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 436 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 437 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 438 // [...] 439 BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); 440 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 441 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); 442 auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); 443 auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); 444 445 // This grabs the DebugLoc from CI 446 IRBuilder<> Builder(CI); 447 448 // The split call above "helpfully" added a branch at the end of BB (to the 449 // wrong place), but we might want a fence too. It's easiest to just remove 450 // the branch entirely. 451 std::prev(BB->end())->eraseFromParent(); 452 Builder.SetInsertPoint(BB); 453 TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, 454 /*IsLoad=*/true); 455 Builder.CreateBr(LoopBB); 456 457 // Start the main loop block now that we've taken care of the preliminaries. 458 Builder.SetInsertPoint(LoopBB); 459 Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); 460 Value *ShouldStore = 461 Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); 462 463 // If the the cmpxchg doesn't actually need any ordering when it fails, we can 464 // jump straight past that fence instruction (if it exists). 465 Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); 466 467 Builder.SetInsertPoint(TryStoreBB); 468 Value *StoreSuccess = TLI->emitStoreConditional( 469 Builder, CI->getNewValOperand(), Addr, MemOpOrder); 470 StoreSuccess = Builder.CreateICmpEQ( 471 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 472 Builder.CreateCondBr(StoreSuccess, SuccessBB, 473 CI->isWeak() ? FailureBB : LoopBB); 474 475 // Make sure later instructions don't get reordered with a fence if necessary. 476 Builder.SetInsertPoint(SuccessBB); 477 TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, 478 /*IsLoad=*/true); 479 Builder.CreateBr(ExitBB); 480 481 Builder.SetInsertPoint(FailureBB); 482 TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, 483 /*IsLoad=*/true); 484 Builder.CreateBr(ExitBB); 485 486 // Finally, we have control-flow based knowledge of whether the cmpxchg 487 // succeeded or not. We expose this to later passes by converting any 488 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. 489 490 // Setup the builder so we can create any PHIs we need. 491 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 492 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); 493 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 494 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 495 496 // Look for any users of the cmpxchg that are just comparing the loaded value 497 // against the desired one, and replace them with the CFG-derived version. 498 SmallVector<ExtractValueInst *, 2> PrunedInsts; 499 for (auto User : CI->users()) { 500 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 501 if (!EV) 502 continue; 503 504 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 505 "weird extraction from { iN, i1 }"); 506 507 if (EV->getIndices()[0] == 0) 508 EV->replaceAllUsesWith(Loaded); 509 else 510 EV->replaceAllUsesWith(Success); 511 512 PrunedInsts.push_back(EV); 513 } 514 515 // We can remove the instructions now we're no longer iterating through them. 516 for (auto EV : PrunedInsts) 517 EV->eraseFromParent(); 518 519 if (!CI->use_empty()) { 520 // Some use of the full struct return that we don't understand has happened, 521 // so we've got to reconstruct it properly. 522 Value *Res; 523 Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); 524 Res = Builder.CreateInsertValue(Res, Success, 1); 525 526 CI->replaceAllUsesWith(Res); 527 } 528 529 CI->eraseFromParent(); 530 return true; 531 } 532 533 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { 534 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); 535 if(!C) 536 return false; 537 538 AtomicRMWInst::BinOp Op = RMWI->getOperation(); 539 switch(Op) { 540 case AtomicRMWInst::Add: 541 case AtomicRMWInst::Sub: 542 case AtomicRMWInst::Or: 543 case AtomicRMWInst::Xor: 544 return C->isZero(); 545 case AtomicRMWInst::And: 546 return C->isMinusOne(); 547 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... 548 default: 549 return false; 550 } 551 } 552 553 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { 554 auto TLI = TM->getSubtargetImpl()->getTargetLowering(); 555 556 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { 557 if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) 558 expandAtomicLoad(ResultingLoad); 559 return true; 560 } 561 562 return false; 563 } 564