1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This pass builds the coroutine frame and outlines resume and destroy parts 9 // of the coroutine into separate functions. 10 // 11 // We present a coroutine to an LLVM as an ordinary function with suspension 12 // points marked up with intrinsics. We let the optimizer party on the coroutine 13 // as a single function for as long as possible. Shortly before the coroutine is 14 // eligible to be inlined into its callers, we split up the coroutine into parts 15 // corresponding to an initial, resume and destroy invocations of the coroutine, 16 // add them to the current SCC and restart the IPO pipeline to optimize the 17 // coroutine subfunctions we extracted before proceeding to the caller of the 18 // coroutine. 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/Coroutines/CoroSplit.h" 22 #include "CoroInstr.h" 23 #include "CoroInternal.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/PriorityWorklist.h" 26 #include "llvm/ADT/SmallPtrSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/Analysis/CFG.h" 31 #include "llvm/Analysis/CallGraph.h" 32 #include "llvm/Analysis/ConstantFolding.h" 33 #include "llvm/Analysis/LazyCallGraph.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetTransformInfo.h" 36 #include "llvm/BinaryFormat/Dwarf.h" 37 #include "llvm/IR/Argument.h" 38 #include "llvm/IR/Attributes.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/CFG.h" 41 #include "llvm/IR/CallingConv.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DataLayout.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Dominators.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/IR/GlobalValue.h" 48 #include "llvm/IR/GlobalVariable.h" 49 #include "llvm/IR/IRBuilder.h" 50 #include "llvm/IR/InstIterator.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Instruction.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/LLVMContext.h" 56 #include "llvm/IR/Module.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/IR/Verifier.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/Debug.h" 62 #include "llvm/Support/PrettyStackTrace.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Scalar.h" 65 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 66 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 67 #include "llvm/Transforms/Utils/Cloning.h" 68 #include "llvm/Transforms/Utils/Local.h" 69 #include "llvm/Transforms/Utils/ValueMapper.h" 70 #include <cassert> 71 #include <cstddef> 72 #include <cstdint> 73 #include <initializer_list> 74 #include <iterator> 75 76 using namespace llvm; 77 78 #define DEBUG_TYPE "coro-split" 79 80 namespace { 81 82 /// A little helper class for building 83 class CoroCloner { 84 public: 85 enum class Kind { 86 /// The shared resume function for a switch lowering. 87 SwitchResume, 88 89 /// The shared unwind function for a switch lowering. 90 SwitchUnwind, 91 92 /// The shared cleanup function for a switch lowering. 93 SwitchCleanup, 94 95 /// An individual continuation function. 96 Continuation, 97 98 /// An async resume function. 99 Async, 100 }; 101 102 private: 103 Function &OrigF; 104 Function *NewF; 105 const Twine &Suffix; 106 coro::Shape &Shape; 107 Kind FKind; 108 ValueToValueMapTy VMap; 109 IRBuilder<> Builder; 110 Value *NewFramePtr = nullptr; 111 112 /// The active suspend instruction; meaningful only for continuation and async 113 /// ABIs. 114 AnyCoroSuspendInst *ActiveSuspend = nullptr; 115 116 public: 117 /// Create a cloner for a switch lowering. 118 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 119 Kind FKind) 120 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), 121 FKind(FKind), Builder(OrigF.getContext()) { 122 assert(Shape.ABI == coro::ABI::Switch); 123 } 124 125 /// Create a cloner for a continuation lowering. 126 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 127 Function *NewF, AnyCoroSuspendInst *ActiveSuspend) 128 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), 129 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation), 130 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) { 131 assert(Shape.ABI == coro::ABI::Retcon || 132 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); 133 assert(NewF && "need existing function for continuation"); 134 assert(ActiveSuspend && "need active suspend point for continuation"); 135 } 136 137 Function *getFunction() const { 138 assert(NewF != nullptr && "declaration not yet set"); 139 return NewF; 140 } 141 142 void create(); 143 144 private: 145 bool isSwitchDestroyFunction() { 146 switch (FKind) { 147 case Kind::Async: 148 case Kind::Continuation: 149 case Kind::SwitchResume: 150 return false; 151 case Kind::SwitchUnwind: 152 case Kind::SwitchCleanup: 153 return true; 154 } 155 llvm_unreachable("Unknown CoroCloner::Kind enum"); 156 } 157 158 void replaceEntryBlock(); 159 Value *deriveNewFramePointer(); 160 void replaceRetconOrAsyncSuspendUses(); 161 void replaceCoroSuspends(); 162 void replaceCoroEnds(); 163 void replaceSwiftErrorOps(); 164 void salvageDebugInfo(); 165 void handleFinalSuspend(); 166 }; 167 168 } // end anonymous namespace 169 170 static void maybeFreeRetconStorage(IRBuilder<> &Builder, 171 const coro::Shape &Shape, Value *FramePtr, 172 CallGraph *CG) { 173 assert(Shape.ABI == coro::ABI::Retcon || 174 Shape.ABI == coro::ABI::RetconOnce); 175 if (Shape.RetconLowering.IsFrameInlineInStorage) 176 return; 177 178 Shape.emitDealloc(Builder, FramePtr, CG); 179 } 180 181 /// Replace an llvm.coro.end.async. 182 /// Will inline the must tail call function call if there is one. 183 /// \returns true if cleanup of the coro.end block is needed, false otherwise. 184 static bool replaceCoroEndAsync(AnyCoroEndInst *End) { 185 IRBuilder<> Builder(End); 186 187 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End); 188 if (!EndAsync) { 189 Builder.CreateRetVoid(); 190 return true /*needs cleanup of coro.end block*/; 191 } 192 193 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); 194 if (!MustTailCallFunc) { 195 Builder.CreateRetVoid(); 196 return true /*needs cleanup of coro.end block*/; 197 } 198 199 // Move the must tail call from the predecessor block into the end block. 200 auto *CoroEndBlock = End->getParent(); 201 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); 202 assert(MustTailCallFuncBlock && "Must have a single predecessor block"); 203 auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); 204 auto *MustTailCall = cast<CallInst>(&*std::prev(It)); 205 CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock, 206 MustTailCall->getIterator()); 207 208 // Insert the return instruction. 209 Builder.SetInsertPoint(End); 210 Builder.CreateRetVoid(); 211 InlineFunctionInfo FnInfo; 212 213 // Remove the rest of the block, by splitting it into an unreachable block. 214 auto *BB = End->getParent(); 215 BB->splitBasicBlock(End); 216 BB->getTerminator()->eraseFromParent(); 217 218 auto InlineRes = InlineFunction(*MustTailCall, FnInfo); 219 assert(InlineRes.isSuccess() && "Expected inlining to succeed"); 220 (void)InlineRes; 221 222 // We have cleaned up the coro.end block above. 223 return false; 224 } 225 226 /// Replace a non-unwind call to llvm.coro.end. 227 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, 228 const coro::Shape &Shape, Value *FramePtr, 229 bool InResume, CallGraph *CG) { 230 // Start inserting right before the coro.end. 231 IRBuilder<> Builder(End); 232 233 // Create the return instruction. 234 switch (Shape.ABI) { 235 // The cloned functions in switch-lowering always return void. 236 case coro::ABI::Switch: 237 // coro.end doesn't immediately end the coroutine in the main function 238 // in this lowering, because we need to deallocate the coroutine. 239 if (!InResume) 240 return; 241 Builder.CreateRetVoid(); 242 break; 243 244 // In async lowering this returns. 245 case coro::ABI::Async: { 246 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); 247 if (!CoroEndBlockNeedsCleanup) 248 return; 249 break; 250 } 251 252 // In unique continuation lowering, the continuations always return void. 253 // But we may have implicitly allocated storage. 254 case coro::ABI::RetconOnce: 255 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 256 Builder.CreateRetVoid(); 257 break; 258 259 // In non-unique continuation lowering, we signal completion by returning 260 // a null continuation. 261 case coro::ABI::Retcon: { 262 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 263 auto RetTy = Shape.getResumeFunctionType()->getReturnType(); 264 auto RetStructTy = dyn_cast<StructType>(RetTy); 265 PointerType *ContinuationTy = 266 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); 267 268 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); 269 if (RetStructTy) { 270 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), 271 ReturnValue, 0); 272 } 273 Builder.CreateRet(ReturnValue); 274 break; 275 } 276 } 277 278 // Remove the rest of the block, by splitting it into an unreachable block. 279 auto *BB = End->getParent(); 280 BB->splitBasicBlock(End); 281 BB->getTerminator()->eraseFromParent(); 282 } 283 284 // Mark a coroutine as done, which implies that the coroutine is finished and 285 // never get resumed. 286 // 287 // In resume-switched ABI, the done state is represented by storing zero in 288 // ResumeFnAddr. 289 // 290 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the 291 // pointer to the frame in splitted function is not stored in `Shape`. 292 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape, 293 Value *FramePtr) { 294 assert( 295 Shape.ABI == coro::ABI::Switch && 296 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now."); 297 auto *GepIndex = Builder.CreateStructGEP( 298 Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume, 299 "ResumeFn.addr"); 300 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( 301 Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume))); 302 Builder.CreateStore(NullPtr, GepIndex); 303 304 // If the coroutine don't have unwind coro end, we could omit the store to 305 // the final suspend point since we could infer the coroutine is suspended 306 // at the final suspend point by the nullness of ResumeFnAddr. 307 // However, we can't skip it if the coroutine have unwind coro end. Since 308 // the coroutine reaches unwind coro end is considered suspended at the 309 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine 310 // didn't complete yet. We need the IndexVal for the final suspend point 311 // to make the states clear. 312 if (Shape.SwitchLowering.HasUnwindCoroEnd && 313 Shape.SwitchLowering.HasFinalSuspend) { 314 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() && 315 "The final suspend should only live in the last position of " 316 "CoroSuspends."); 317 ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1); 318 auto *FinalIndex = Builder.CreateStructGEP( 319 Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 320 321 Builder.CreateStore(IndexVal, FinalIndex); 322 } 323 } 324 325 /// Replace an unwind call to llvm.coro.end. 326 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 327 Value *FramePtr, bool InResume, 328 CallGraph *CG) { 329 IRBuilder<> Builder(End); 330 331 switch (Shape.ABI) { 332 // In switch-lowering, this does nothing in the main function. 333 case coro::ABI::Switch: { 334 // In C++'s specification, the coroutine should be marked as done 335 // if promise.unhandled_exception() throws. The frontend will 336 // call coro.end(true) along this path. 337 // 338 // FIXME: We should refactor this once there is other language 339 // which uses Switch-Resumed style other than C++. 340 markCoroutineAsDone(Builder, Shape, FramePtr); 341 if (!InResume) 342 return; 343 break; 344 } 345 // In async lowering this does nothing. 346 case coro::ABI::Async: 347 break; 348 // In continuation-lowering, this frees the continuation storage. 349 case coro::ABI::Retcon: 350 case coro::ABI::RetconOnce: 351 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 352 break; 353 } 354 355 // If coro.end has an associated bundle, add cleanupret instruction. 356 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { 357 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); 358 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); 359 End->getParent()->splitBasicBlock(End); 360 CleanupRet->getParent()->getTerminator()->eraseFromParent(); 361 } 362 } 363 364 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 365 Value *FramePtr, bool InResume, CallGraph *CG) { 366 if (End->isUnwind()) 367 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); 368 else 369 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); 370 371 auto &Context = End->getContext(); 372 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) 373 : ConstantInt::getFalse(Context)); 374 End->eraseFromParent(); 375 } 376 377 // Create an entry block for a resume function with a switch that will jump to 378 // suspend points. 379 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { 380 assert(Shape.ABI == coro::ABI::Switch); 381 LLVMContext &C = F.getContext(); 382 383 // resume.entry: 384 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, 385 // i32 2 386 // % index = load i32, i32* %index.addr 387 // switch i32 %index, label %unreachable [ 388 // i32 0, label %resume.0 389 // i32 1, label %resume.1 390 // ... 391 // ] 392 393 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); 394 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); 395 396 IRBuilder<> Builder(NewEntry); 397 auto *FramePtr = Shape.FramePtr; 398 auto *FrameTy = Shape.FrameTy; 399 auto *GepIndex = Builder.CreateStructGEP( 400 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 401 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); 402 auto *Switch = 403 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); 404 Shape.SwitchLowering.ResumeSwitch = Switch; 405 406 size_t SuspendIndex = 0; 407 for (auto *AnyS : Shape.CoroSuspends) { 408 auto *S = cast<CoroSuspendInst>(AnyS); 409 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); 410 411 // Replace CoroSave with a store to Index: 412 // %index.addr = getelementptr %f.frame... (index field number) 413 // store i32 %IndexVal, i32* %index.addr1 414 auto *Save = S->getCoroSave(); 415 Builder.SetInsertPoint(Save); 416 if (S->isFinal()) { 417 // The coroutine should be marked done if it reaches the final suspend 418 // point. 419 markCoroutineAsDone(Builder, Shape, FramePtr); 420 } else { 421 auto *GepIndex = Builder.CreateStructGEP( 422 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 423 Builder.CreateStore(IndexVal, GepIndex); 424 } 425 426 Save->replaceAllUsesWith(ConstantTokenNone::get(C)); 427 Save->eraseFromParent(); 428 429 // Split block before and after coro.suspend and add a jump from an entry 430 // switch: 431 // 432 // whateverBB: 433 // whatever 434 // %0 = call i8 @llvm.coro.suspend(token none, i1 false) 435 // switch i8 %0, label %suspend[i8 0, label %resume 436 // i8 1, label %cleanup] 437 // becomes: 438 // 439 // whateverBB: 440 // whatever 441 // br label %resume.0.landing 442 // 443 // resume.0: ; <--- jump from the switch in the resume.entry 444 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) 445 // br label %resume.0.landing 446 // 447 // resume.0.landing: 448 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] 449 // switch i8 % 1, label %suspend [i8 0, label %resume 450 // i8 1, label %cleanup] 451 452 auto *SuspendBB = S->getParent(); 453 auto *ResumeBB = 454 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); 455 auto *LandingBB = ResumeBB->splitBasicBlock( 456 S->getNextNode(), ResumeBB->getName() + Twine(".landing")); 457 Switch->addCase(IndexVal, ResumeBB); 458 459 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); 460 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front()); 461 S->replaceAllUsesWith(PN); 462 PN->addIncoming(Builder.getInt8(-1), SuspendBB); 463 PN->addIncoming(S, ResumeBB); 464 465 ++SuspendIndex; 466 } 467 468 Builder.SetInsertPoint(UnreachBB); 469 Builder.CreateUnreachable(); 470 471 Shape.SwitchLowering.ResumeEntryBlock = NewEntry; 472 } 473 474 // In the resume function, we remove the last case (when coro::Shape is built, 475 // the final suspend point (if present) is always the last element of 476 // CoroSuspends array) since it is an undefined behavior to resume a coroutine 477 // suspended at the final suspend point. 478 // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL 479 // and the coroutine doesn't suspend at the final suspend point actually (this 480 // is possible since the coroutine is considered suspended at the final suspend 481 // point if promise.unhandled_exception() exits via an exception), we can 482 // remove the last case. 483 void CoroCloner::handleFinalSuspend() { 484 assert(Shape.ABI == coro::ABI::Switch && 485 Shape.SwitchLowering.HasFinalSuspend); 486 487 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) 488 return; 489 490 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); 491 auto FinalCaseIt = std::prev(Switch->case_end()); 492 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); 493 Switch->removeCase(FinalCaseIt); 494 if (isSwitchDestroyFunction()) { 495 BasicBlock *OldSwitchBB = Switch->getParent(); 496 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); 497 Builder.SetInsertPoint(OldSwitchBB->getTerminator()); 498 auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, 499 coro::Shape::SwitchFieldIndex::Resume, 500 "ResumeFn.addr"); 501 auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), 502 GepIndex); 503 auto *Cond = Builder.CreateIsNull(Load); 504 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); 505 OldSwitchBB->getTerminator()->eraseFromParent(); 506 } 507 } 508 509 static FunctionType * 510 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { 511 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend); 512 auto *StructTy = cast<StructType>(AsyncSuspend->getType()); 513 auto &Context = Suspend->getParent()->getParent()->getContext(); 514 auto *VoidTy = Type::getVoidTy(Context); 515 return FunctionType::get(VoidTy, StructTy->elements(), false); 516 } 517 518 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, 519 const Twine &Suffix, 520 Module::iterator InsertBefore, 521 AnyCoroSuspendInst *ActiveSuspend) { 522 Module *M = OrigF.getParent(); 523 auto *FnTy = (Shape.ABI != coro::ABI::Async) 524 ? Shape.getResumeFunctionType() 525 : getFunctionTypeFromAsyncSuspend(ActiveSuspend); 526 527 Function *NewF = 528 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, 529 OrigF.getName() + Suffix); 530 531 M->getFunctionList().insert(InsertBefore, NewF); 532 533 return NewF; 534 } 535 536 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the 537 /// arguments to the continuation function. 538 /// 539 /// This assumes that the builder has a meaningful insertion point. 540 void CoroCloner::replaceRetconOrAsyncSuspendUses() { 541 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || 542 Shape.ABI == coro::ABI::Async); 543 544 auto NewS = VMap[ActiveSuspend]; 545 if (NewS->use_empty()) return; 546 547 // Copy out all the continuation arguments after the buffer pointer into 548 // an easily-indexed data structure for convenience. 549 SmallVector<Value*, 8> Args; 550 // The async ABI includes all arguments -- including the first argument. 551 bool IsAsyncABI = Shape.ABI == coro::ABI::Async; 552 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), 553 E = NewF->arg_end(); 554 I != E; ++I) 555 Args.push_back(&*I); 556 557 // If the suspend returns a single scalar value, we can just do a simple 558 // replacement. 559 if (!isa<StructType>(NewS->getType())) { 560 assert(Args.size() == 1); 561 NewS->replaceAllUsesWith(Args.front()); 562 return; 563 } 564 565 // Try to peephole extracts of an aggregate return. 566 for (Use &U : llvm::make_early_inc_range(NewS->uses())) { 567 auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()); 568 if (!EVI || EVI->getNumIndices() != 1) 569 continue; 570 571 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); 572 EVI->eraseFromParent(); 573 } 574 575 // If we have no remaining uses, we're done. 576 if (NewS->use_empty()) return; 577 578 // Otherwise, we need to create an aggregate. 579 Value *Agg = PoisonValue::get(NewS->getType()); 580 for (size_t I = 0, E = Args.size(); I != E; ++I) 581 Agg = Builder.CreateInsertValue(Agg, Args[I], I); 582 583 NewS->replaceAllUsesWith(Agg); 584 } 585 586 void CoroCloner::replaceCoroSuspends() { 587 Value *SuspendResult; 588 589 switch (Shape.ABI) { 590 // In switch lowering, replace coro.suspend with the appropriate value 591 // for the type of function we're extracting. 592 // Replacing coro.suspend with (0) will result in control flow proceeding to 593 // a resume label associated with a suspend point, replacing it with (1) will 594 // result in control flow proceeding to a cleanup label associated with this 595 // suspend point. 596 case coro::ABI::Switch: 597 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); 598 break; 599 600 // In async lowering there are no uses of the result. 601 case coro::ABI::Async: 602 return; 603 604 // In returned-continuation lowering, the arguments from earlier 605 // continuations are theoretically arbitrary, and they should have been 606 // spilled. 607 case coro::ABI::RetconOnce: 608 case coro::ABI::Retcon: 609 return; 610 } 611 612 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { 613 // The active suspend was handled earlier. 614 if (CS == ActiveSuspend) continue; 615 616 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); 617 MappedCS->replaceAllUsesWith(SuspendResult); 618 MappedCS->eraseFromParent(); 619 } 620 } 621 622 void CoroCloner::replaceCoroEnds() { 623 for (AnyCoroEndInst *CE : Shape.CoroEnds) { 624 // We use a null call graph because there's no call graph node for 625 // the cloned function yet. We'll just be rebuilding that later. 626 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]); 627 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); 628 } 629 } 630 631 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, 632 ValueToValueMapTy *VMap) { 633 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) 634 return; 635 Value *CachedSlot = nullptr; 636 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { 637 if (CachedSlot) 638 return CachedSlot; 639 640 // Check if the function has a swifterror argument. 641 for (auto &Arg : F.args()) { 642 if (Arg.isSwiftError()) { 643 CachedSlot = &Arg; 644 return &Arg; 645 } 646 } 647 648 // Create a swifterror alloca. 649 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); 650 auto Alloca = Builder.CreateAlloca(ValueTy); 651 Alloca->setSwiftError(true); 652 653 CachedSlot = Alloca; 654 return Alloca; 655 }; 656 657 for (CallInst *Op : Shape.SwiftErrorOps) { 658 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; 659 IRBuilder<> Builder(MappedOp); 660 661 // If there are no arguments, this is a 'get' operation. 662 Value *MappedResult; 663 if (Op->arg_empty()) { 664 auto ValueTy = Op->getType(); 665 auto Slot = getSwiftErrorSlot(ValueTy); 666 MappedResult = Builder.CreateLoad(ValueTy, Slot); 667 } else { 668 assert(Op->arg_size() == 1); 669 auto Value = MappedOp->getArgOperand(0); 670 auto ValueTy = Value->getType(); 671 auto Slot = getSwiftErrorSlot(ValueTy); 672 Builder.CreateStore(Value, Slot); 673 MappedResult = Slot; 674 } 675 676 MappedOp->replaceAllUsesWith(MappedResult); 677 MappedOp->eraseFromParent(); 678 } 679 680 // If we're updating the original function, we've invalidated SwiftErrorOps. 681 if (VMap == nullptr) { 682 Shape.SwiftErrorOps.clear(); 683 } 684 } 685 686 /// Returns all DbgVariableIntrinsic in F. 687 static SmallVector<DbgVariableIntrinsic *, 8> 688 collectDbgVariableIntrinsics(Function &F) { 689 SmallVector<DbgVariableIntrinsic *, 8> Intrinsics; 690 for (auto &I : instructions(F)) 691 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) 692 Intrinsics.push_back(DVI); 693 return Intrinsics; 694 } 695 696 void CoroCloner::replaceSwiftErrorOps() { 697 ::replaceSwiftErrorOps(*NewF, Shape, &VMap); 698 } 699 700 void CoroCloner::salvageDebugInfo() { 701 SmallVector<DbgVariableIntrinsic *, 8> Worklist = 702 collectDbgVariableIntrinsics(*NewF); 703 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; 704 for (DbgVariableIntrinsic *DVI : Worklist) 705 coro::salvageDebugInfo(ArgToAllocaMap, DVI, Shape.OptimizeFrame); 706 707 // Remove all salvaged dbg.declare intrinsics that became 708 // either unreachable or stale due to the CoroSplit transformation. 709 DominatorTree DomTree(*NewF); 710 auto IsUnreachableBlock = [&](BasicBlock *BB) { 711 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr, 712 &DomTree); 713 }; 714 for (DbgVariableIntrinsic *DVI : Worklist) { 715 if (IsUnreachableBlock(DVI->getParent())) 716 DVI->eraseFromParent(); 717 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) { 718 // Count all non-debuginfo uses in reachable blocks. 719 unsigned Uses = 0; 720 for (auto *User : DVI->getVariableLocationOp(0)->users()) 721 if (auto *I = dyn_cast<Instruction>(User)) 722 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent())) 723 ++Uses; 724 if (!Uses) 725 DVI->eraseFromParent(); 726 } 727 } 728 } 729 730 void CoroCloner::replaceEntryBlock() { 731 // In the original function, the AllocaSpillBlock is a block immediately 732 // following the allocation of the frame object which defines GEPs for 733 // all the allocas that have been moved into the frame, and it ends by 734 // branching to the original beginning of the coroutine. Make this 735 // the entry block of the cloned function. 736 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); 737 auto *OldEntry = &NewF->getEntryBlock(); 738 Entry->setName("entry" + Suffix); 739 Entry->moveBefore(OldEntry); 740 Entry->getTerminator()->eraseFromParent(); 741 742 // Clear all predecessors of the new entry block. There should be 743 // exactly one predecessor, which we created when splitting out 744 // AllocaSpillBlock to begin with. 745 assert(Entry->hasOneUse()); 746 auto BranchToEntry = cast<BranchInst>(Entry->user_back()); 747 assert(BranchToEntry->isUnconditional()); 748 Builder.SetInsertPoint(BranchToEntry); 749 Builder.CreateUnreachable(); 750 BranchToEntry->eraseFromParent(); 751 752 // Branch from the entry to the appropriate place. 753 Builder.SetInsertPoint(Entry); 754 switch (Shape.ABI) { 755 case coro::ABI::Switch: { 756 // In switch-lowering, we built a resume-entry block in the original 757 // function. Make the entry block branch to this. 758 auto *SwitchBB = 759 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); 760 Builder.CreateBr(SwitchBB); 761 break; 762 } 763 case coro::ABI::Async: 764 case coro::ABI::Retcon: 765 case coro::ABI::RetconOnce: { 766 // In continuation ABIs, we want to branch to immediately after the 767 // active suspend point. Earlier phases will have put the suspend in its 768 // own basic block, so just thread our jump directly to its successor. 769 assert((Shape.ABI == coro::ABI::Async && 770 isa<CoroSuspendAsyncInst>(ActiveSuspend)) || 771 ((Shape.ABI == coro::ABI::Retcon || 772 Shape.ABI == coro::ABI::RetconOnce) && 773 isa<CoroSuspendRetconInst>(ActiveSuspend))); 774 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]); 775 auto Branch = cast<BranchInst>(MappedCS->getNextNode()); 776 assert(Branch->isUnconditional()); 777 Builder.CreateBr(Branch->getSuccessor(0)); 778 break; 779 } 780 } 781 782 // Any static alloca that's still being used but not reachable from the new 783 // entry needs to be moved to the new entry. 784 Function *F = OldEntry->getParent(); 785 DominatorTree DT{*F}; 786 for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { 787 auto *Alloca = dyn_cast<AllocaInst>(&I); 788 if (!Alloca || I.use_empty()) 789 continue; 790 if (DT.isReachableFromEntry(I.getParent()) || 791 !isa<ConstantInt>(Alloca->getArraySize())) 792 continue; 793 I.moveBefore(*Entry, Entry->getFirstInsertionPt()); 794 } 795 } 796 797 /// Derive the value of the new frame pointer. 798 Value *CoroCloner::deriveNewFramePointer() { 799 // Builder should be inserting to the front of the new entry block. 800 801 switch (Shape.ABI) { 802 // In switch-lowering, the argument is the frame pointer. 803 case coro::ABI::Switch: 804 return &*NewF->arg_begin(); 805 // In async-lowering, one of the arguments is an async context as determined 806 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of 807 // the resume function from the async context projection function associated 808 // with the active suspend. The frame is located as a tail to the async 809 // context header. 810 case coro::ABI::Async: { 811 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 812 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; 813 auto *CalleeContext = NewF->getArg(ContextIdx); 814 auto *FramePtrTy = Shape.FrameTy->getPointerTo(); 815 auto *ProjectionFunc = 816 ActiveAsyncSuspend->getAsyncContextProjectionFunction(); 817 auto DbgLoc = 818 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc(); 819 // Calling i8* (i8*) 820 auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(), 821 ProjectionFunc, CalleeContext); 822 CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); 823 CallerContext->setDebugLoc(DbgLoc); 824 // The frame is located after the async_context header. 825 auto &Context = Builder.getContext(); 826 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( 827 Type::getInt8Ty(Context), CallerContext, 828 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); 829 // Inline the projection function. 830 InlineFunctionInfo InlineInfo; 831 auto InlineRes = InlineFunction(*CallerContext, InlineInfo); 832 assert(InlineRes.isSuccess()); 833 (void)InlineRes; 834 return Builder.CreateBitCast(FramePtrAddr, FramePtrTy); 835 } 836 // In continuation-lowering, the argument is the opaque storage. 837 case coro::ABI::Retcon: 838 case coro::ABI::RetconOnce: { 839 Argument *NewStorage = &*NewF->arg_begin(); 840 auto FramePtrTy = Shape.FrameTy->getPointerTo(); 841 842 // If the storage is inline, just bitcast to the storage to the frame type. 843 if (Shape.RetconLowering.IsFrameInlineInStorage) 844 return Builder.CreateBitCast(NewStorage, FramePtrTy); 845 846 // Otherwise, load the real frame from the opaque storage. 847 auto FramePtrPtr = 848 Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); 849 return Builder.CreateLoad(FramePtrTy, FramePtrPtr); 850 } 851 } 852 llvm_unreachable("bad ABI"); 853 } 854 855 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, 856 unsigned ParamIndex, uint64_t Size, 857 Align Alignment, bool NoAlias) { 858 AttrBuilder ParamAttrs(Context); 859 ParamAttrs.addAttribute(Attribute::NonNull); 860 ParamAttrs.addAttribute(Attribute::NoUndef); 861 862 if (NoAlias) 863 ParamAttrs.addAttribute(Attribute::NoAlias); 864 865 ParamAttrs.addAlignmentAttr(Alignment); 866 ParamAttrs.addDereferenceableAttr(Size); 867 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 868 } 869 870 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, 871 unsigned ParamIndex) { 872 AttrBuilder ParamAttrs(Context); 873 ParamAttrs.addAttribute(Attribute::SwiftAsync); 874 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 875 } 876 877 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, 878 unsigned ParamIndex) { 879 AttrBuilder ParamAttrs(Context); 880 ParamAttrs.addAttribute(Attribute::SwiftSelf); 881 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 882 } 883 884 /// Clone the body of the original function into a resume function of 885 /// some sort. 886 void CoroCloner::create() { 887 // Create the new function if we don't already have one. 888 if (!NewF) { 889 NewF = createCloneDeclaration(OrigF, Shape, Suffix, 890 OrigF.getParent()->end(), ActiveSuspend); 891 } 892 893 // Replace all args with dummy instructions. If an argument is the old frame 894 // pointer, the dummy will be replaced by the new frame pointer once it is 895 // computed below. Uses of all other arguments should have already been 896 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine 897 // frame. 898 SmallVector<Instruction *> DummyArgs; 899 for (Argument &A : OrigF.args()) { 900 DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType()))); 901 VMap[&A] = DummyArgs.back(); 902 } 903 904 SmallVector<ReturnInst *, 4> Returns; 905 906 // Ignore attempts to change certain attributes of the function. 907 // TODO: maybe there should be a way to suppress this during cloning? 908 auto savedVisibility = NewF->getVisibility(); 909 auto savedUnnamedAddr = NewF->getUnnamedAddr(); 910 auto savedDLLStorageClass = NewF->getDLLStorageClass(); 911 912 // NewF's linkage (which CloneFunctionInto does *not* change) might not 913 // be compatible with the visibility of OrigF (which it *does* change), 914 // so protect against that. 915 auto savedLinkage = NewF->getLinkage(); 916 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); 917 918 CloneFunctionInto(NewF, &OrigF, VMap, 919 CloneFunctionChangeType::LocalChangesOnly, Returns); 920 921 auto &Context = NewF->getContext(); 922 923 // For async functions / continuations, adjust the scope line of the 924 // clone to the line number of the suspend point. However, only 925 // adjust the scope line when the files are the same. This ensures 926 // line number and file name belong together. The scope line is 927 // associated with all pre-prologue instructions. This avoids a jump 928 // in the linetable from the function declaration to the suspend point. 929 if (DISubprogram *SP = NewF->getSubprogram()) { 930 assert(SP != OrigF.getSubprogram() && SP->isDistinct()); 931 if (ActiveSuspend) 932 if (auto DL = ActiveSuspend->getDebugLoc()) 933 if (SP->getFile() == DL->getFile()) 934 SP->setScopeLine(DL->getLine()); 935 // Update the linkage name to reflect the modified symbol name. It 936 // is necessary to update the linkage name in Swift, since the 937 // mangling changes for resume functions. It might also be the 938 // right thing to do in C++, but due to a limitation in LLVM's 939 // AsmPrinter we can only do this if the function doesn't have an 940 // abstract specification, since the DWARF backend expects the 941 // abstract specification to contain the linkage name and asserts 942 // that they are identical. 943 if (!SP->getDeclaration() && SP->getUnit() && 944 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) 945 SP->replaceLinkageName(MDString::get(Context, NewF->getName())); 946 } 947 948 NewF->setLinkage(savedLinkage); 949 NewF->setVisibility(savedVisibility); 950 NewF->setUnnamedAddr(savedUnnamedAddr); 951 NewF->setDLLStorageClass(savedDLLStorageClass); 952 // The function sanitizer metadata needs to match the signature of the 953 // function it is being attached to. However this does not hold for split 954 // functions here. Thus remove the metadata for split functions. 955 if (Shape.ABI == coro::ABI::Switch && 956 NewF->hasMetadata(LLVMContext::MD_func_sanitize)) 957 NewF->eraseMetadata(LLVMContext::MD_func_sanitize); 958 959 // Replace the attributes of the new function: 960 auto OrigAttrs = NewF->getAttributes(); 961 auto NewAttrs = AttributeList(); 962 963 switch (Shape.ABI) { 964 case coro::ABI::Switch: 965 // Bootstrap attributes by copying function attributes from the 966 // original function. This should include optimization settings and so on. 967 NewAttrs = NewAttrs.addFnAttributes( 968 Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); 969 970 addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, 971 Shape.FrameAlign, /*NoAlias=*/false); 972 break; 973 case coro::ABI::Async: { 974 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 975 if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo, 976 Attribute::SwiftAsync)) { 977 uint32_t ArgAttributeIndices = 978 ActiveAsyncSuspend->getStorageArgumentIndex(); 979 auto ContextArgIndex = ArgAttributeIndices & 0xff; 980 addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex); 981 982 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for 983 // `swiftself`. 984 auto SwiftSelfIndex = ArgAttributeIndices >> 8; 985 if (SwiftSelfIndex) 986 addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex); 987 } 988 989 // Transfer the original function's attributes. 990 auto FnAttrs = OrigF.getAttributes().getFnAttrs(); 991 NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs)); 992 break; 993 } 994 case coro::ABI::Retcon: 995 case coro::ABI::RetconOnce: 996 // If we have a continuation prototype, just use its attributes, 997 // full-stop. 998 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); 999 1000 /// FIXME: Is it really good to add the NoAlias attribute? 1001 addFramePointerAttrs(NewAttrs, Context, 0, 1002 Shape.getRetconCoroId()->getStorageSize(), 1003 Shape.getRetconCoroId()->getStorageAlignment(), 1004 /*NoAlias=*/true); 1005 1006 break; 1007 } 1008 1009 switch (Shape.ABI) { 1010 // In these ABIs, the cloned functions always return 'void', and the 1011 // existing return sites are meaningless. Note that for unique 1012 // continuations, this includes the returns associated with suspends; 1013 // this is fine because we can't suspend twice. 1014 case coro::ABI::Switch: 1015 case coro::ABI::RetconOnce: 1016 // Remove old returns. 1017 for (ReturnInst *Return : Returns) 1018 changeToUnreachable(Return); 1019 break; 1020 1021 // With multi-suspend continuations, we'll already have eliminated the 1022 // original returns and inserted returns before all the suspend points, 1023 // so we want to leave any returns in place. 1024 case coro::ABI::Retcon: 1025 break; 1026 // Async lowering will insert musttail call functions at all suspend points 1027 // followed by a return. 1028 // Don't change returns to unreachable because that will trip up the verifier. 1029 // These returns should be unreachable from the clone. 1030 case coro::ABI::Async: 1031 break; 1032 } 1033 1034 NewF->setAttributes(NewAttrs); 1035 NewF->setCallingConv(Shape.getResumeFunctionCC()); 1036 1037 // Set up the new entry block. 1038 replaceEntryBlock(); 1039 1040 Builder.SetInsertPoint(&NewF->getEntryBlock().front()); 1041 NewFramePtr = deriveNewFramePointer(); 1042 1043 // Remap frame pointer. 1044 Value *OldFramePtr = VMap[Shape.FramePtr]; 1045 NewFramePtr->takeName(OldFramePtr); 1046 OldFramePtr->replaceAllUsesWith(NewFramePtr); 1047 1048 // Remap vFrame pointer. 1049 auto *NewVFrame = Builder.CreateBitCast( 1050 NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame"); 1051 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); 1052 if (OldVFrame != NewVFrame) 1053 OldVFrame->replaceAllUsesWith(NewVFrame); 1054 1055 // All uses of the arguments should have been resolved by this point, 1056 // so we can safely remove the dummy values. 1057 for (Instruction *DummyArg : DummyArgs) { 1058 DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType())); 1059 DummyArg->deleteValue(); 1060 } 1061 1062 switch (Shape.ABI) { 1063 case coro::ABI::Switch: 1064 // Rewrite final suspend handling as it is not done via switch (allows to 1065 // remove final case from the switch, since it is undefined behavior to 1066 // resume the coroutine suspended at the final suspend point. 1067 if (Shape.SwitchLowering.HasFinalSuspend) 1068 handleFinalSuspend(); 1069 break; 1070 case coro::ABI::Async: 1071 case coro::ABI::Retcon: 1072 case coro::ABI::RetconOnce: 1073 // Replace uses of the active suspend with the corresponding 1074 // continuation-function arguments. 1075 assert(ActiveSuspend != nullptr && 1076 "no active suspend when lowering a continuation-style coroutine"); 1077 replaceRetconOrAsyncSuspendUses(); 1078 break; 1079 } 1080 1081 // Handle suspends. 1082 replaceCoroSuspends(); 1083 1084 // Handle swifterror. 1085 replaceSwiftErrorOps(); 1086 1087 // Remove coro.end intrinsics. 1088 replaceCoroEnds(); 1089 1090 // Salvage debug info that points into the coroutine frame. 1091 salvageDebugInfo(); 1092 1093 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, 1094 // to suppress deallocation code. 1095 if (Shape.ABI == coro::ABI::Switch) 1096 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), 1097 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); 1098 } 1099 1100 // Create a resume clone by cloning the body of the original function, setting 1101 // new entry block and replacing coro.suspend an appropriate value to force 1102 // resume or cleanup pass for every suspend point. 1103 static Function *createClone(Function &F, const Twine &Suffix, 1104 coro::Shape &Shape, CoroCloner::Kind FKind) { 1105 CoroCloner Cloner(F, Suffix, Shape, FKind); 1106 Cloner.create(); 1107 return Cloner.getFunction(); 1108 } 1109 1110 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { 1111 assert(Shape.ABI == coro::ABI::Async); 1112 1113 auto *FuncPtrStruct = cast<ConstantStruct>( 1114 Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); 1115 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); 1116 auto *OrigContextSize = FuncPtrStruct->getOperand(1); 1117 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), 1118 Shape.AsyncLowering.ContextSize); 1119 auto *NewFuncPtrStruct = ConstantStruct::get( 1120 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); 1121 1122 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); 1123 } 1124 1125 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { 1126 if (Shape.ABI == coro::ABI::Async) 1127 updateAsyncFuncPointerContextSize(Shape); 1128 1129 for (CoroAlignInst *CA : Shape.CoroAligns) { 1130 CA->replaceAllUsesWith( 1131 ConstantInt::get(CA->getType(), Shape.FrameAlign.value())); 1132 CA->eraseFromParent(); 1133 } 1134 1135 if (Shape.CoroSizes.empty()) 1136 return; 1137 1138 // In the same function all coro.sizes should have the same result type. 1139 auto *SizeIntrin = Shape.CoroSizes.back(); 1140 Module *M = SizeIntrin->getModule(); 1141 const DataLayout &DL = M->getDataLayout(); 1142 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1143 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); 1144 1145 for (CoroSizeInst *CS : Shape.CoroSizes) { 1146 CS->replaceAllUsesWith(SizeConstant); 1147 CS->eraseFromParent(); 1148 } 1149 } 1150 1151 // Create a global constant array containing pointers to functions provided and 1152 // set Info parameter of CoroBegin to point at this constant. Example: 1153 // 1154 // @f.resumers = internal constant [2 x void(%f.frame*)*] 1155 // [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy] 1156 // define void @f() { 1157 // ... 1158 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, 1159 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*)) 1160 // 1161 // Assumes that all the functions have the same signature. 1162 static void setCoroInfo(Function &F, coro::Shape &Shape, 1163 ArrayRef<Function *> Fns) { 1164 // This only works under the switch-lowering ABI because coro elision 1165 // only works on the switch-lowering ABI. 1166 assert(Shape.ABI == coro::ABI::Switch); 1167 1168 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end()); 1169 assert(!Args.empty()); 1170 Function *Part = *Fns.begin(); 1171 Module *M = Part->getParent(); 1172 auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); 1173 1174 auto *ConstVal = ConstantArray::get(ArrTy, Args); 1175 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, 1176 GlobalVariable::PrivateLinkage, ConstVal, 1177 F.getName() + Twine(".resumers")); 1178 1179 // Update coro.begin instruction to refer to this constant. 1180 LLVMContext &C = F.getContext(); 1181 auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C)); 1182 Shape.getSwitchCoroId()->setInfo(BC); 1183 } 1184 1185 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. 1186 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, 1187 Function *DestroyFn, Function *CleanupFn) { 1188 assert(Shape.ABI == coro::ABI::Switch); 1189 1190 IRBuilder<> Builder(Shape.getInsertPtAfterFramePtr()); 1191 1192 auto *ResumeAddr = Builder.CreateStructGEP( 1193 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, 1194 "resume.addr"); 1195 Builder.CreateStore(ResumeFn, ResumeAddr); 1196 1197 Value *DestroyOrCleanupFn = DestroyFn; 1198 1199 CoroIdInst *CoroId = Shape.getSwitchCoroId(); 1200 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { 1201 // If there is a CoroAlloc and it returns false (meaning we elide the 1202 // allocation, use CleanupFn instead of DestroyFn). 1203 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); 1204 } 1205 1206 auto *DestroyAddr = Builder.CreateStructGEP( 1207 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, 1208 "destroy.addr"); 1209 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); 1210 } 1211 1212 static void postSplitCleanup(Function &F) { 1213 removeUnreachableBlocks(F); 1214 1215 #ifndef NDEBUG 1216 // For now, we do a mandatory verification step because we don't 1217 // entirely trust this pass. Note that we don't want to add a verifier 1218 // pass to FPM below because it will also verify all the global data. 1219 if (verifyFunction(F, &errs())) 1220 report_fatal_error("Broken function"); 1221 #endif 1222 } 1223 1224 // Assuming we arrived at the block NewBlock from Prev instruction, store 1225 // PHI's incoming values in the ResolvedValues map. 1226 static void 1227 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, 1228 DenseMap<Value *, Value *> &ResolvedValues) { 1229 auto *PrevBB = Prev->getParent(); 1230 for (PHINode &PN : NewBlock->phis()) { 1231 auto V = PN.getIncomingValueForBlock(PrevBB); 1232 // See if we already resolved it. 1233 auto VI = ResolvedValues.find(V); 1234 if (VI != ResolvedValues.end()) 1235 V = VI->second; 1236 // Remember the value. 1237 ResolvedValues[&PN] = V; 1238 } 1239 } 1240 1241 // Replace a sequence of branches leading to a ret, with a clone of a ret 1242 // instruction. Suspend instruction represented by a switch, track the PHI 1243 // values and select the correct case successor when possible. 1244 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { 1245 // There is nothing to simplify. 1246 if (isa<ReturnInst>(InitialInst)) 1247 return false; 1248 1249 DenseMap<Value *, Value *> ResolvedValues; 1250 assert(InitialInst->getModule()); 1251 const DataLayout &DL = InitialInst->getModule()->getDataLayout(); 1252 1253 auto GetFirstValidInstruction = [](Instruction *I) { 1254 while (I) { 1255 // BitCastInst wouldn't generate actual code so that we could skip it. 1256 if (isa<BitCastInst>(I) || I->isDebugOrPseudoInst() || 1257 I->isLifetimeStartOrEnd()) 1258 I = I->getNextNode(); 1259 else if (isInstructionTriviallyDead(I)) 1260 // Duing we are in the middle of the transformation, we need to erase 1261 // the dead instruction manually. 1262 I = &*I->eraseFromParent(); 1263 else 1264 break; 1265 } 1266 return I; 1267 }; 1268 1269 auto TryResolveConstant = [&ResolvedValues](Value *V) { 1270 auto It = ResolvedValues.find(V); 1271 if (It != ResolvedValues.end()) 1272 V = It->second; 1273 return dyn_cast<ConstantInt>(V); 1274 }; 1275 1276 Instruction *I = InitialInst; 1277 while (I->isTerminator() || isa<CmpInst>(I)) { 1278 if (isa<ReturnInst>(I)) { 1279 ReplaceInstWithInst(InitialInst, I->clone()); 1280 return true; 1281 } 1282 1283 if (auto *BR = dyn_cast<BranchInst>(I)) { 1284 unsigned SuccIndex = 0; 1285 if (BR->isConditional()) { 1286 // Handle the case the condition of the conditional branch is constant. 1287 // e.g., 1288 // 1289 // br i1 false, label %cleanup, label %CoroEnd 1290 // 1291 // It is possible during the transformation. We could continue the 1292 // simplifying in this case. 1293 ConstantInt *Cond = TryResolveConstant(BR->getCondition()); 1294 if (!Cond) 1295 return false; 1296 1297 SuccIndex = Cond->isOne() ? 0 : 1; 1298 } 1299 1300 BasicBlock *Succ = BR->getSuccessor(SuccIndex); 1301 scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues); 1302 I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime()); 1303 1304 continue; 1305 } 1306 1307 if (auto *CondCmp = dyn_cast<CmpInst>(I)) { 1308 // If the case number of suspended switch instruction is reduced to 1309 // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. 1310 auto *BR = dyn_cast<BranchInst>( 1311 GetFirstValidInstruction(CondCmp->getNextNode())); 1312 if (!BR || !BR->isConditional() || CondCmp != BR->getCondition()) 1313 return false; 1314 1315 // And the comparsion looks like : %cond = icmp eq i8 %V, constant. 1316 // So we try to resolve constant for the first operand only since the 1317 // second operand should be literal constant by design. 1318 ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0)); 1319 auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); 1320 if (!Cond0 || !Cond1) 1321 return false; 1322 1323 // Both operands of the CmpInst are Constant. So that we could evaluate 1324 // it immediately to get the destination. 1325 auto *ConstResult = 1326 dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands( 1327 CondCmp->getPredicate(), Cond0, Cond1, DL)); 1328 if (!ConstResult) 1329 return false; 1330 1331 ResolvedValues[BR->getCondition()] = ConstResult; 1332 1333 // Handle this branch in next iteration. 1334 I = BR; 1335 continue; 1336 } 1337 1338 if (auto *SI = dyn_cast<SwitchInst>(I)) { 1339 ConstantInt *Cond = TryResolveConstant(SI->getCondition()); 1340 if (!Cond) 1341 return false; 1342 1343 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); 1344 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); 1345 I = GetFirstValidInstruction(BB->getFirstNonPHIOrDbgOrLifetime()); 1346 continue; 1347 } 1348 1349 return false; 1350 } 1351 1352 return false; 1353 } 1354 1355 // Check whether CI obeys the rules of musttail attribute. 1356 static bool shouldBeMustTail(const CallInst &CI, const Function &F) { 1357 if (CI.isInlineAsm()) 1358 return false; 1359 1360 // Match prototypes and calling conventions of resume function. 1361 FunctionType *CalleeTy = CI.getFunctionType(); 1362 if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1)) 1363 return false; 1364 1365 Type *CalleeParmTy = CalleeTy->getParamType(0); 1366 if (!CalleeParmTy->isPointerTy() || 1367 (CalleeParmTy->getPointerAddressSpace() != 0)) 1368 return false; 1369 1370 if (CI.getCallingConv() != F.getCallingConv()) 1371 return false; 1372 1373 // CI should not has any ABI-impacting function attributes. 1374 static const Attribute::AttrKind ABIAttrs[] = { 1375 Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, 1376 Attribute::Preallocated, Attribute::InReg, Attribute::Returned, 1377 Attribute::SwiftSelf, Attribute::SwiftError}; 1378 AttributeList Attrs = CI.getAttributes(); 1379 for (auto AK : ABIAttrs) 1380 if (Attrs.hasParamAttr(0, AK)) 1381 return false; 1382 1383 return true; 1384 } 1385 1386 // Add musttail to any resume instructions that is immediately followed by a 1387 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call 1388 // for symmetrical coroutine control transfer (C++ Coroutines TS extension). 1389 // This transformation is done only in the resume part of the coroutine that has 1390 // identical signature and calling convention as the coro.resume call. 1391 static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) { 1392 bool changed = false; 1393 1394 // Collect potential resume instructions. 1395 SmallVector<CallInst *, 4> Resumes; 1396 for (auto &I : instructions(F)) 1397 if (auto *Call = dyn_cast<CallInst>(&I)) 1398 if (shouldBeMustTail(*Call, F)) 1399 Resumes.push_back(Call); 1400 1401 // Set musttail on those that are followed by a ret instruction. 1402 for (CallInst *Call : Resumes) 1403 // Skip targets which don't support tail call on the specific case. 1404 if (TTI.supportsTailCallFor(Call) && 1405 simplifyTerminatorLeadingToRet(Call->getNextNode())) { 1406 Call->setTailCallKind(CallInst::TCK_MustTail); 1407 changed = true; 1408 } 1409 1410 if (changed) 1411 removeUnreachableBlocks(F); 1412 } 1413 1414 // Coroutine has no suspend points. Remove heap allocation for the coroutine 1415 // frame if possible. 1416 static void handleNoSuspendCoroutine(coro::Shape &Shape) { 1417 auto *CoroBegin = Shape.CoroBegin; 1418 auto *CoroId = CoroBegin->getId(); 1419 auto *AllocInst = CoroId->getCoroAlloc(); 1420 switch (Shape.ABI) { 1421 case coro::ABI::Switch: { 1422 auto SwitchId = cast<CoroIdInst>(CoroId); 1423 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); 1424 if (AllocInst) { 1425 IRBuilder<> Builder(AllocInst); 1426 auto *Frame = Builder.CreateAlloca(Shape.FrameTy); 1427 Frame->setAlignment(Shape.FrameAlign); 1428 auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); 1429 AllocInst->replaceAllUsesWith(Builder.getFalse()); 1430 AllocInst->eraseFromParent(); 1431 CoroBegin->replaceAllUsesWith(VFrame); 1432 } else { 1433 CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); 1434 } 1435 1436 break; 1437 } 1438 case coro::ABI::Async: 1439 case coro::ABI::Retcon: 1440 case coro::ABI::RetconOnce: 1441 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); 1442 break; 1443 } 1444 1445 CoroBegin->eraseFromParent(); 1446 } 1447 1448 // SimplifySuspendPoint needs to check that there is no calls between 1449 // coro_save and coro_suspend, since any of the calls may potentially resume 1450 // the coroutine and if that is the case we cannot eliminate the suspend point. 1451 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) { 1452 for (Instruction *I = From; I != To; I = I->getNextNode()) { 1453 // Assume that no intrinsic can resume the coroutine. 1454 if (isa<IntrinsicInst>(I)) 1455 continue; 1456 1457 if (isa<CallBase>(I)) 1458 return true; 1459 } 1460 return false; 1461 } 1462 1463 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { 1464 SmallPtrSet<BasicBlock *, 8> Set; 1465 SmallVector<BasicBlock *, 8> Worklist; 1466 1467 Set.insert(SaveBB); 1468 Worklist.push_back(ResDesBB); 1469 1470 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr 1471 // returns a token consumed by suspend instruction, all blocks in between 1472 // will have to eventually hit SaveBB when going backwards from ResDesBB. 1473 while (!Worklist.empty()) { 1474 auto *BB = Worklist.pop_back_val(); 1475 Set.insert(BB); 1476 for (auto *Pred : predecessors(BB)) 1477 if (!Set.contains(Pred)) 1478 Worklist.push_back(Pred); 1479 } 1480 1481 // SaveBB and ResDesBB are checked separately in hasCallsBetween. 1482 Set.erase(SaveBB); 1483 Set.erase(ResDesBB); 1484 1485 for (auto *BB : Set) 1486 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr)) 1487 return true; 1488 1489 return false; 1490 } 1491 1492 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { 1493 auto *SaveBB = Save->getParent(); 1494 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); 1495 1496 if (SaveBB == ResumeOrDestroyBB) 1497 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy); 1498 1499 // Any calls from Save to the end of the block? 1500 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr)) 1501 return true; 1502 1503 // Any calls from begging of the block up to ResumeOrDestroy? 1504 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(), 1505 ResumeOrDestroy)) 1506 return true; 1507 1508 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? 1509 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) 1510 return true; 1511 1512 return false; 1513 } 1514 1515 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the 1516 // suspend point and replace it with nornal control flow. 1517 static bool simplifySuspendPoint(CoroSuspendInst *Suspend, 1518 CoroBeginInst *CoroBegin) { 1519 Instruction *Prev = Suspend->getPrevNode(); 1520 if (!Prev) { 1521 auto *Pred = Suspend->getParent()->getSinglePredecessor(); 1522 if (!Pred) 1523 return false; 1524 Prev = Pred->getTerminator(); 1525 } 1526 1527 CallBase *CB = dyn_cast<CallBase>(Prev); 1528 if (!CB) 1529 return false; 1530 1531 auto *Callee = CB->getCalledOperand()->stripPointerCasts(); 1532 1533 // See if the callsite is for resumption or destruction of the coroutine. 1534 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); 1535 if (!SubFn) 1536 return false; 1537 1538 // Does not refer to the current coroutine, we cannot do anything with it. 1539 if (SubFn->getFrame() != CoroBegin) 1540 return false; 1541 1542 // See if the transformation is safe. Specifically, see if there are any 1543 // calls in between Save and CallInstr. They can potenitally resume the 1544 // coroutine rendering this optimization unsafe. 1545 auto *Save = Suspend->getCoroSave(); 1546 if (hasCallsBetween(Save, CB)) 1547 return false; 1548 1549 // Replace llvm.coro.suspend with the value that results in resumption over 1550 // the resume or cleanup path. 1551 Suspend->replaceAllUsesWith(SubFn->getRawIndex()); 1552 Suspend->eraseFromParent(); 1553 Save->eraseFromParent(); 1554 1555 // No longer need a call to coro.resume or coro.destroy. 1556 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { 1557 BranchInst::Create(Invoke->getNormalDest(), Invoke); 1558 } 1559 1560 // Grab the CalledValue from CB before erasing the CallInstr. 1561 auto *CalledValue = CB->getCalledOperand(); 1562 CB->eraseFromParent(); 1563 1564 // If no more users remove it. Usually it is a bitcast of SubFn. 1565 if (CalledValue != SubFn && CalledValue->user_empty()) 1566 if (auto *I = dyn_cast<Instruction>(CalledValue)) 1567 I->eraseFromParent(); 1568 1569 // Now we are good to remove SubFn. 1570 if (SubFn->user_empty()) 1571 SubFn->eraseFromParent(); 1572 1573 return true; 1574 } 1575 1576 // Remove suspend points that are simplified. 1577 static void simplifySuspendPoints(coro::Shape &Shape) { 1578 // Currently, the only simplification we do is switch-lowering-specific. 1579 if (Shape.ABI != coro::ABI::Switch) 1580 return; 1581 1582 auto &S = Shape.CoroSuspends; 1583 size_t I = 0, N = S.size(); 1584 if (N == 0) 1585 return; 1586 1587 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max(); 1588 while (true) { 1589 auto SI = cast<CoroSuspendInst>(S[I]); 1590 // Leave final.suspend to handleFinalSuspend since it is undefined behavior 1591 // to resume a coroutine suspended at the final suspend point. 1592 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { 1593 if (--N == I) 1594 break; 1595 1596 std::swap(S[I], S[N]); 1597 1598 if (cast<CoroSuspendInst>(S[I])->isFinal()) { 1599 assert(Shape.SwitchLowering.HasFinalSuspend); 1600 ChangedFinalIndex = I; 1601 } 1602 1603 continue; 1604 } 1605 if (++I == N) 1606 break; 1607 } 1608 S.resize(N); 1609 1610 // Maintain final.suspend in case final suspend was swapped. 1611 // Due to we requrie the final suspend to be the last element of CoroSuspends. 1612 if (ChangedFinalIndex < N) { 1613 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal()); 1614 std::swap(S[ChangedFinalIndex], S.back()); 1615 } 1616 } 1617 1618 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, 1619 SmallVectorImpl<Function *> &Clones, 1620 TargetTransformInfo &TTI) { 1621 assert(Shape.ABI == coro::ABI::Switch); 1622 1623 createResumeEntryBlock(F, Shape); 1624 auto ResumeClone = createClone(F, ".resume", Shape, 1625 CoroCloner::Kind::SwitchResume); 1626 auto DestroyClone = createClone(F, ".destroy", Shape, 1627 CoroCloner::Kind::SwitchUnwind); 1628 auto CleanupClone = createClone(F, ".cleanup", Shape, 1629 CoroCloner::Kind::SwitchCleanup); 1630 1631 postSplitCleanup(*ResumeClone); 1632 postSplitCleanup(*DestroyClone); 1633 postSplitCleanup(*CleanupClone); 1634 1635 // Adding musttail call to support symmetric transfer. 1636 // Skip targets which don't support tail call. 1637 // 1638 // FIXME: Could we support symmetric transfer effectively without musttail 1639 // call? 1640 if (TTI.supportsTailCalls()) 1641 addMustTailToCoroResumes(*ResumeClone, TTI); 1642 1643 // Store addresses resume/destroy/cleanup functions in the coroutine frame. 1644 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); 1645 1646 assert(Clones.empty()); 1647 Clones.push_back(ResumeClone); 1648 Clones.push_back(DestroyClone); 1649 Clones.push_back(CleanupClone); 1650 1651 // Create a constant array referring to resume/destroy/clone functions pointed 1652 // by the last argument of @llvm.coro.info, so that CoroElide pass can 1653 // determined correct function to call. 1654 setCoroInfo(F, Shape, Clones); 1655 } 1656 1657 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, 1658 Value *Continuation) { 1659 auto *ResumeIntrinsic = Suspend->getResumeFunction(); 1660 auto &Context = Suspend->getParent()->getParent()->getContext(); 1661 auto *Int8PtrTy = Type::getInt8PtrTy(Context); 1662 1663 IRBuilder<> Builder(ResumeIntrinsic); 1664 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); 1665 ResumeIntrinsic->replaceAllUsesWith(Val); 1666 ResumeIntrinsic->eraseFromParent(); 1667 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg, 1668 UndefValue::get(Int8PtrTy)); 1669 } 1670 1671 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. 1672 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, 1673 ArrayRef<Value *> FnArgs, 1674 SmallVectorImpl<Value *> &CallArgs) { 1675 size_t ArgIdx = 0; 1676 for (auto *paramTy : FnTy->params()) { 1677 assert(ArgIdx < FnArgs.size()); 1678 if (paramTy != FnArgs[ArgIdx]->getType()) 1679 CallArgs.push_back( 1680 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy)); 1681 else 1682 CallArgs.push_back(FnArgs[ArgIdx]); 1683 ++ArgIdx; 1684 } 1685 } 1686 1687 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, 1688 ArrayRef<Value *> Arguments, 1689 IRBuilder<> &Builder) { 1690 auto *FnTy = MustTailCallFn->getFunctionType(); 1691 // Coerce the arguments, llvm optimizations seem to ignore the types in 1692 // vaarg functions and throws away casts in optimized mode. 1693 SmallVector<Value *, 8> CallArgs; 1694 coerceArguments(Builder, FnTy, Arguments, CallArgs); 1695 1696 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); 1697 TailCall->setTailCallKind(CallInst::TCK_MustTail); 1698 TailCall->setDebugLoc(Loc); 1699 TailCall->setCallingConv(MustTailCallFn->getCallingConv()); 1700 return TailCall; 1701 } 1702 1703 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, 1704 SmallVectorImpl<Function *> &Clones) { 1705 assert(Shape.ABI == coro::ABI::Async); 1706 assert(Clones.empty()); 1707 // Reset various things that the optimizer might have decided it 1708 // "knows" about the coroutine function due to not seeing a return. 1709 F.removeFnAttr(Attribute::NoReturn); 1710 F.removeRetAttr(Attribute::NoAlias); 1711 F.removeRetAttr(Attribute::NonNull); 1712 1713 auto &Context = F.getContext(); 1714 auto *Int8PtrTy = Type::getInt8PtrTy(Context); 1715 1716 auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId()); 1717 IRBuilder<> Builder(Id); 1718 1719 auto *FramePtr = Id->getStorage(); 1720 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); 1721 FramePtr = Builder.CreateConstInBoundsGEP1_32( 1722 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, 1723 "async.ctx.frameptr"); 1724 1725 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1726 { 1727 // Make sure we don't invalidate Shape.FramePtr. 1728 TrackingVH<Value> Handle(Shape.FramePtr); 1729 Shape.CoroBegin->replaceAllUsesWith(FramePtr); 1730 Shape.FramePtr = Handle.getValPtr(); 1731 } 1732 1733 // Create all the functions in order after the main function. 1734 auto NextF = std::next(F.getIterator()); 1735 1736 // Create a continuation function for each of the suspend points. 1737 Clones.reserve(Shape.CoroSuspends.size()); 1738 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { 1739 auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]); 1740 1741 // Create the clone declaration. 1742 auto ResumeNameSuffix = ".resume."; 1743 auto ProjectionFunctionName = 1744 Suspend->getAsyncContextProjectionFunction()->getName(); 1745 bool UseSwiftMangling = false; 1746 if (ProjectionFunctionName.equals("__swift_async_resume_project_context")) { 1747 ResumeNameSuffix = "TQ"; 1748 UseSwiftMangling = true; 1749 } else if (ProjectionFunctionName.equals( 1750 "__swift_async_resume_get_context")) { 1751 ResumeNameSuffix = "TY"; 1752 UseSwiftMangling = true; 1753 } 1754 auto *Continuation = createCloneDeclaration( 1755 F, Shape, 1756 UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_" 1757 : ResumeNameSuffix + Twine(Idx), 1758 NextF, Suspend); 1759 Clones.push_back(Continuation); 1760 1761 // Insert a branch to a new return block immediately before the suspend 1762 // point. 1763 auto *SuspendBB = Suspend->getParent(); 1764 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1765 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1766 1767 // Place it before the first suspend. 1768 auto *ReturnBB = 1769 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); 1770 Branch->setSuccessor(0, ReturnBB); 1771 1772 IRBuilder<> Builder(ReturnBB); 1773 1774 // Insert the call to the tail call function and inline it. 1775 auto *Fn = Suspend->getMustTailCallFunction(); 1776 SmallVector<Value *, 8> Args(Suspend->args()); 1777 auto FnArgs = ArrayRef<Value *>(Args).drop_front( 1778 CoroSuspendAsyncInst::MustTailCallFuncArg + 1); 1779 auto *TailCall = 1780 coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder); 1781 Builder.CreateRetVoid(); 1782 InlineFunctionInfo FnInfo; 1783 auto InlineRes = InlineFunction(*TailCall, FnInfo); 1784 assert(InlineRes.isSuccess() && "Expected inlining to succeed"); 1785 (void)InlineRes; 1786 1787 // Replace the lvm.coro.async.resume intrisic call. 1788 replaceAsyncResumeFunction(Suspend, Continuation); 1789 } 1790 1791 assert(Clones.size() == Shape.CoroSuspends.size()); 1792 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { 1793 auto *Suspend = Shape.CoroSuspends[Idx]; 1794 auto *Clone = Clones[Idx]; 1795 1796 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create(); 1797 } 1798 } 1799 1800 static void splitRetconCoroutine(Function &F, coro::Shape &Shape, 1801 SmallVectorImpl<Function *> &Clones) { 1802 assert(Shape.ABI == coro::ABI::Retcon || 1803 Shape.ABI == coro::ABI::RetconOnce); 1804 assert(Clones.empty()); 1805 1806 // Reset various things that the optimizer might have decided it 1807 // "knows" about the coroutine function due to not seeing a return. 1808 F.removeFnAttr(Attribute::NoReturn); 1809 F.removeRetAttr(Attribute::NoAlias); 1810 F.removeRetAttr(Attribute::NonNull); 1811 1812 // Allocate the frame. 1813 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId()); 1814 Value *RawFramePtr; 1815 if (Shape.RetconLowering.IsFrameInlineInStorage) { 1816 RawFramePtr = Id->getStorage(); 1817 } else { 1818 IRBuilder<> Builder(Id); 1819 1820 // Determine the size of the frame. 1821 const DataLayout &DL = F.getParent()->getDataLayout(); 1822 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1823 1824 // Allocate. We don't need to update the call graph node because we're 1825 // going to recompute it from scratch after splitting. 1826 // FIXME: pass the required alignment 1827 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); 1828 RawFramePtr = 1829 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); 1830 1831 // Stash the allocated frame pointer in the continuation storage. 1832 auto Dest = Builder.CreateBitCast(Id->getStorage(), 1833 RawFramePtr->getType()->getPointerTo()); 1834 Builder.CreateStore(RawFramePtr, Dest); 1835 } 1836 1837 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1838 { 1839 // Make sure we don't invalidate Shape.FramePtr. 1840 TrackingVH<Value> Handle(Shape.FramePtr); 1841 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); 1842 Shape.FramePtr = Handle.getValPtr(); 1843 } 1844 1845 // Create a unique return block. 1846 BasicBlock *ReturnBB = nullptr; 1847 SmallVector<PHINode *, 4> ReturnPHIs; 1848 1849 // Create all the functions in order after the main function. 1850 auto NextF = std::next(F.getIterator()); 1851 1852 // Create a continuation function for each of the suspend points. 1853 Clones.reserve(Shape.CoroSuspends.size()); 1854 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1855 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]); 1856 1857 // Create the clone declaration. 1858 auto Continuation = 1859 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr); 1860 Clones.push_back(Continuation); 1861 1862 // Insert a branch to the unified return block immediately before 1863 // the suspend point. 1864 auto SuspendBB = Suspend->getParent(); 1865 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1866 auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1867 1868 // Create the unified return block. 1869 if (!ReturnBB) { 1870 // Place it before the first suspend. 1871 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, 1872 NewSuspendBB); 1873 Shape.RetconLowering.ReturnBlock = ReturnBB; 1874 1875 IRBuilder<> Builder(ReturnBB); 1876 1877 // Create PHIs for all the return values. 1878 assert(ReturnPHIs.empty()); 1879 1880 // First, the continuation. 1881 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), 1882 Shape.CoroSuspends.size())); 1883 1884 // Next, all the directly-yielded values. 1885 for (auto *ResultTy : Shape.getRetconResultTypes()) 1886 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy, 1887 Shape.CoroSuspends.size())); 1888 1889 // Build the return value. 1890 auto RetTy = F.getReturnType(); 1891 1892 // Cast the continuation value if necessary. 1893 // We can't rely on the types matching up because that type would 1894 // have to be infinite. 1895 auto CastedContinuationTy = 1896 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); 1897 auto *CastedContinuation = 1898 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); 1899 1900 Value *RetV; 1901 if (ReturnPHIs.size() == 1) { 1902 RetV = CastedContinuation; 1903 } else { 1904 RetV = PoisonValue::get(RetTy); 1905 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); 1906 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) 1907 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); 1908 } 1909 1910 Builder.CreateRet(RetV); 1911 } 1912 1913 // Branch to the return block. 1914 Branch->setSuccessor(0, ReturnBB); 1915 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); 1916 size_t NextPHIIndex = 1; 1917 for (auto &VUse : Suspend->value_operands()) 1918 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); 1919 assert(NextPHIIndex == ReturnPHIs.size()); 1920 } 1921 1922 assert(Clones.size() == Shape.CoroSuspends.size()); 1923 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1924 auto Suspend = Shape.CoroSuspends[i]; 1925 auto Clone = Clones[i]; 1926 1927 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); 1928 } 1929 } 1930 1931 namespace { 1932 class PrettyStackTraceFunction : public PrettyStackTraceEntry { 1933 Function &F; 1934 public: 1935 PrettyStackTraceFunction(Function &F) : F(F) {} 1936 void print(raw_ostream &OS) const override { 1937 OS << "While splitting coroutine "; 1938 F.printAsOperand(OS, /*print type*/ false, F.getParent()); 1939 OS << "\n"; 1940 } 1941 }; 1942 } 1943 1944 static coro::Shape 1945 splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, 1946 TargetTransformInfo &TTI, bool OptimizeFrame, 1947 std::function<bool(Instruction &)> MaterializableCallback) { 1948 PrettyStackTraceFunction prettyStackTrace(F); 1949 1950 // The suspend-crossing algorithm in buildCoroutineFrame get tripped 1951 // up by uses in unreachable blocks, so remove them as a first pass. 1952 removeUnreachableBlocks(F); 1953 1954 coro::Shape Shape(F, OptimizeFrame); 1955 if (!Shape.CoroBegin) 1956 return Shape; 1957 1958 simplifySuspendPoints(Shape); 1959 buildCoroutineFrame(F, Shape, MaterializableCallback); 1960 replaceFrameSizeAndAlignment(Shape); 1961 1962 // If there are no suspend points, no split required, just remove 1963 // the allocation and deallocation blocks, they are not needed. 1964 if (Shape.CoroSuspends.empty()) { 1965 handleNoSuspendCoroutine(Shape); 1966 } else { 1967 switch (Shape.ABI) { 1968 case coro::ABI::Switch: 1969 splitSwitchCoroutine(F, Shape, Clones, TTI); 1970 break; 1971 case coro::ABI::Async: 1972 splitAsyncCoroutine(F, Shape, Clones); 1973 break; 1974 case coro::ABI::Retcon: 1975 case coro::ABI::RetconOnce: 1976 splitRetconCoroutine(F, Shape, Clones); 1977 break; 1978 } 1979 } 1980 1981 // Replace all the swifterror operations in the original function. 1982 // This invalidates SwiftErrorOps in the Shape. 1983 replaceSwiftErrorOps(F, Shape, nullptr); 1984 1985 // Salvage debug intrinsics that point into the coroutine frame in the 1986 // original function. The Cloner has already salvaged debug info in the new 1987 // coroutine funclets. 1988 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; 1989 for (auto *DDI : collectDbgVariableIntrinsics(F)) 1990 coro::salvageDebugInfo(ArgToAllocaMap, DDI, Shape.OptimizeFrame); 1991 1992 return Shape; 1993 } 1994 1995 /// Remove calls to llvm.coro.end in the original function. 1996 static void removeCoroEnds(const coro::Shape &Shape) { 1997 for (auto *End : Shape.CoroEnds) { 1998 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); 1999 } 2000 } 2001 2002 static void updateCallGraphAfterCoroutineSplit( 2003 LazyCallGraph::Node &N, const coro::Shape &Shape, 2004 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, 2005 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, 2006 FunctionAnalysisManager &FAM) { 2007 if (!Shape.CoroBegin) 2008 return; 2009 2010 if (Shape.ABI != coro::ABI::Switch) 2011 removeCoroEnds(Shape); 2012 else { 2013 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { 2014 auto &Context = End->getContext(); 2015 End->replaceAllUsesWith(ConstantInt::getFalse(Context)); 2016 End->eraseFromParent(); 2017 } 2018 } 2019 2020 if (!Clones.empty()) { 2021 switch (Shape.ABI) { 2022 case coro::ABI::Switch: 2023 // Each clone in the Switch lowering is independent of the other clones. 2024 // Let the LazyCallGraph know about each one separately. 2025 for (Function *Clone : Clones) 2026 CG.addSplitFunction(N.getFunction(), *Clone); 2027 break; 2028 case coro::ABI::Async: 2029 case coro::ABI::Retcon: 2030 case coro::ABI::RetconOnce: 2031 // Each clone in the Async/Retcon lowering references of the other clones. 2032 // Let the LazyCallGraph know about all of them at once. 2033 if (!Clones.empty()) 2034 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones); 2035 break; 2036 } 2037 2038 // Let the CGSCC infra handle the changes to the original function. 2039 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM); 2040 } 2041 2042 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges 2043 // to the split functions. 2044 postSplitCleanup(N.getFunction()); 2045 updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM); 2046 } 2047 2048 /// Replace a call to llvm.coro.prepare.retcon. 2049 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, 2050 LazyCallGraph::SCC &C) { 2051 auto CastFn = Prepare->getArgOperand(0); // as an i8* 2052 auto Fn = CastFn->stripPointerCasts(); // as its original type 2053 2054 // Attempt to peephole this pattern: 2055 // %0 = bitcast [[TYPE]] @some_function to i8* 2056 // %1 = call @llvm.coro.prepare.retcon(i8* %0) 2057 // %2 = bitcast %1 to [[TYPE]] 2058 // ==> 2059 // %2 = @some_function 2060 for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { 2061 // Look for bitcasts back to the original function type. 2062 auto *Cast = dyn_cast<BitCastInst>(U.getUser()); 2063 if (!Cast || Cast->getType() != Fn->getType()) 2064 continue; 2065 2066 // Replace and remove the cast. 2067 Cast->replaceAllUsesWith(Fn); 2068 Cast->eraseFromParent(); 2069 } 2070 2071 // Replace any remaining uses with the function as an i8*. 2072 // This can never directly be a callee, so we don't need to update CG. 2073 Prepare->replaceAllUsesWith(CastFn); 2074 Prepare->eraseFromParent(); 2075 2076 // Kill dead bitcasts. 2077 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { 2078 if (!Cast->use_empty()) 2079 break; 2080 CastFn = Cast->getOperand(0); 2081 Cast->eraseFromParent(); 2082 } 2083 } 2084 2085 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, 2086 LazyCallGraph::SCC &C) { 2087 bool Changed = false; 2088 for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { 2089 // Intrinsics can only be used in calls. 2090 auto *Prepare = cast<CallInst>(P.getUser()); 2091 replacePrepare(Prepare, CG, C); 2092 Changed = true; 2093 } 2094 2095 return Changed; 2096 } 2097 2098 static void addPrepareFunction(const Module &M, 2099 SmallVectorImpl<Function *> &Fns, 2100 StringRef Name) { 2101 auto *PrepareFn = M.getFunction(Name); 2102 if (PrepareFn && !PrepareFn->use_empty()) 2103 Fns.push_back(PrepareFn); 2104 } 2105 2106 CoroSplitPass::CoroSplitPass(bool OptimizeFrame) 2107 : MaterializableCallback(coro::defaultMaterializable), 2108 OptimizeFrame(OptimizeFrame) {} 2109 2110 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, 2111 CGSCCAnalysisManager &AM, 2112 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 2113 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a 2114 // non-zero number of nodes, so we assume that here and grab the first 2115 // node's function's module. 2116 Module &M = *C.begin()->getFunction().getParent(); 2117 auto &FAM = 2118 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 2119 2120 // Check for uses of llvm.coro.prepare.retcon/async. 2121 SmallVector<Function *, 2> PrepareFns; 2122 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); 2123 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); 2124 2125 // Find coroutines for processing. 2126 SmallVector<LazyCallGraph::Node *> Coroutines; 2127 for (LazyCallGraph::Node &N : C) 2128 if (N.getFunction().isPresplitCoroutine()) 2129 Coroutines.push_back(&N); 2130 2131 if (Coroutines.empty() && PrepareFns.empty()) 2132 return PreservedAnalyses::all(); 2133 2134 if (Coroutines.empty()) { 2135 for (auto *PrepareFn : PrepareFns) { 2136 replaceAllPrepares(PrepareFn, CG, C); 2137 } 2138 } 2139 2140 // Split all the coroutines. 2141 for (LazyCallGraph::Node *N : Coroutines) { 2142 Function &F = N->getFunction(); 2143 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() 2144 << "\n"); 2145 F.setSplittedCoroutine(); 2146 2147 SmallVector<Function *, 4> Clones; 2148 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2149 const coro::Shape Shape = 2150 splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F), 2151 OptimizeFrame, MaterializableCallback); 2152 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); 2153 2154 ORE.emit([&]() { 2155 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) 2156 << "Split '" << ore::NV("function", F.getName()) 2157 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize) 2158 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")"; 2159 }); 2160 2161 if (!Shape.CoroSuspends.empty()) { 2162 // Run the CGSCC pipeline on the original and newly split functions. 2163 UR.CWorklist.insert(&C); 2164 for (Function *Clone : Clones) 2165 UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); 2166 } 2167 } 2168 2169 if (!PrepareFns.empty()) { 2170 for (auto *PrepareFn : PrepareFns) { 2171 replaceAllPrepares(PrepareFn, CG, C); 2172 } 2173 } 2174 2175 return PreservedAnalyses::none(); 2176 } 2177