1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
10 //
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
20 
21 #include "CoroInstr.h"
22 #include "CoroInternal.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/Analysis/CallGraph.h"
29 #include "llvm/Analysis/CallGraphSCCPass.h"
30 #include "llvm/IR/Argument.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/LegacyPassManager.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/IR/Verifier.h"
54 #include "llvm/InitializePasses.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/PrettyStackTrace.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include "llvm/Transforms/Scalar.h"
61 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
62 #include "llvm/Transforms/Utils/Cloning.h"
63 #include "llvm/Transforms/Utils/Local.h"
64 #include "llvm/Transforms/Utils/ValueMapper.h"
65 #include <cassert>
66 #include <cstddef>
67 #include <cstdint>
68 #include <initializer_list>
69 #include <iterator>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "coro-split"
74 
75 namespace {
76 
77 /// A little helper class for building
78 class CoroCloner {
79 public:
80   enum class Kind {
81     /// The shared resume function for a switch lowering.
82     SwitchResume,
83 
84     /// The shared unwind function for a switch lowering.
85     SwitchUnwind,
86 
87     /// The shared cleanup function for a switch lowering.
88     SwitchCleanup,
89 
90     /// An individual continuation function.
91     Continuation,
92   };
93 private:
94   Function &OrigF;
95   Function *NewF;
96   const Twine &Suffix;
97   coro::Shape &Shape;
98   Kind FKind;
99   ValueToValueMapTy VMap;
100   IRBuilder<> Builder;
101   Value *NewFramePtr = nullptr;
102   Value *SwiftErrorSlot = nullptr;
103 
104   /// The active suspend instruction; meaningful only for continuation ABIs.
105   AnyCoroSuspendInst *ActiveSuspend = nullptr;
106 
107 public:
108   /// Create a cloner for a switch lowering.
109   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
110              Kind FKind)
111     : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
112       FKind(FKind), Builder(OrigF.getContext()) {
113     assert(Shape.ABI == coro::ABI::Switch);
114   }
115 
116   /// Create a cloner for a continuation lowering.
117   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
118              Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
119     : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
120       FKind(Kind::Continuation), Builder(OrigF.getContext()),
121       ActiveSuspend(ActiveSuspend) {
122     assert(Shape.ABI == coro::ABI::Retcon ||
123            Shape.ABI == coro::ABI::RetconOnce);
124     assert(NewF && "need existing function for continuation");
125     assert(ActiveSuspend && "need active suspend point for continuation");
126   }
127 
128   Function *getFunction() const {
129     assert(NewF != nullptr && "declaration not yet set");
130     return NewF;
131   }
132 
133   void create();
134 
135 private:
136   bool isSwitchDestroyFunction() {
137     switch (FKind) {
138     case Kind::Continuation:
139     case Kind::SwitchResume:
140       return false;
141     case Kind::SwitchUnwind:
142     case Kind::SwitchCleanup:
143       return true;
144     }
145     llvm_unreachable("Unknown CoroCloner::Kind enum");
146   }
147 
148   void createDeclaration();
149   void replaceEntryBlock();
150   Value *deriveNewFramePointer();
151   void replaceRetconSuspendUses();
152   void replaceCoroSuspends();
153   void replaceCoroEnds();
154   void replaceSwiftErrorOps();
155   void handleFinalSuspend();
156   void maybeFreeContinuationStorage();
157 };
158 
159 } // end anonymous namespace
160 
161 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
162                                    const coro::Shape &Shape, Value *FramePtr,
163                                    CallGraph *CG) {
164   assert(Shape.ABI == coro::ABI::Retcon ||
165          Shape.ABI == coro::ABI::RetconOnce);
166   if (Shape.RetconLowering.IsFrameInlineInStorage)
167     return;
168 
169   Shape.emitDealloc(Builder, FramePtr, CG);
170 }
171 
172 /// Replace a non-unwind call to llvm.coro.end.
173 static void replaceFallthroughCoroEnd(CoroEndInst *End,
174                                       const coro::Shape &Shape, Value *FramePtr,
175                                       bool InResume, CallGraph *CG) {
176   // Start inserting right before the coro.end.
177   IRBuilder<> Builder(End);
178 
179   // Create the return instruction.
180   switch (Shape.ABI) {
181   // The cloned functions in switch-lowering always return void.
182   case coro::ABI::Switch:
183     // coro.end doesn't immediately end the coroutine in the main function
184     // in this lowering, because we need to deallocate the coroutine.
185     if (!InResume)
186       return;
187     Builder.CreateRetVoid();
188     break;
189 
190   // In unique continuation lowering, the continuations always return void.
191   // But we may have implicitly allocated storage.
192   case coro::ABI::RetconOnce:
193     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
194     Builder.CreateRetVoid();
195     break;
196 
197   // In non-unique continuation lowering, we signal completion by returning
198   // a null continuation.
199   case coro::ABI::Retcon: {
200     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
201     auto RetTy = Shape.getResumeFunctionType()->getReturnType();
202     auto RetStructTy = dyn_cast<StructType>(RetTy);
203     PointerType *ContinuationTy =
204       cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
205 
206     Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
207     if (RetStructTy) {
208       ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
209                                               ReturnValue, 0);
210     }
211     Builder.CreateRet(ReturnValue);
212     break;
213   }
214   }
215 
216   // Remove the rest of the block, by splitting it into an unreachable block.
217   auto *BB = End->getParent();
218   BB->splitBasicBlock(End);
219   BB->getTerminator()->eraseFromParent();
220 }
221 
222 /// Replace an unwind call to llvm.coro.end.
223 static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
224                                  Value *FramePtr, bool InResume, CallGraph *CG){
225   IRBuilder<> Builder(End);
226 
227   switch (Shape.ABI) {
228   // In switch-lowering, this does nothing in the main function.
229   case coro::ABI::Switch:
230     if (!InResume)
231       return;
232     break;
233 
234   // In continuation-lowering, this frees the continuation storage.
235   case coro::ABI::Retcon:
236   case coro::ABI::RetconOnce:
237     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
238     break;
239   }
240 
241   // If coro.end has an associated bundle, add cleanupret instruction.
242   if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
243     auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
244     auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
245     End->getParent()->splitBasicBlock(End);
246     CleanupRet->getParent()->getTerminator()->eraseFromParent();
247   }
248 }
249 
250 static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
251                            Value *FramePtr, bool InResume, CallGraph *CG) {
252   if (End->isUnwind())
253     replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
254   else
255     replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
256 
257   auto &Context = End->getContext();
258   End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
259                                    : ConstantInt::getFalse(Context));
260   End->eraseFromParent();
261 }
262 
263 // Create an entry block for a resume function with a switch that will jump to
264 // suspend points.
265 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
266   assert(Shape.ABI == coro::ABI::Switch);
267   LLVMContext &C = F.getContext();
268 
269   // resume.entry:
270   //  %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
271   //  i32 2
272   //  % index = load i32, i32* %index.addr
273   //  switch i32 %index, label %unreachable [
274   //    i32 0, label %resume.0
275   //    i32 1, label %resume.1
276   //    ...
277   //  ]
278 
279   auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
280   auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
281 
282   IRBuilder<> Builder(NewEntry);
283   auto *FramePtr = Shape.FramePtr;
284   auto *FrameTy = Shape.FrameTy;
285   auto *GepIndex = Builder.CreateStructGEP(
286       FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
287   auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
288   auto *Switch =
289       Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
290   Shape.SwitchLowering.ResumeSwitch = Switch;
291 
292   size_t SuspendIndex = 0;
293   for (auto *AnyS : Shape.CoroSuspends) {
294     auto *S = cast<CoroSuspendInst>(AnyS);
295     ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
296 
297     // Replace CoroSave with a store to Index:
298     //    %index.addr = getelementptr %f.frame... (index field number)
299     //    store i32 0, i32* %index.addr1
300     auto *Save = S->getCoroSave();
301     Builder.SetInsertPoint(Save);
302     if (S->isFinal()) {
303       // Final suspend point is represented by storing zero in ResumeFnAddr.
304       auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr,
305                                  coro::Shape::SwitchFieldIndex::Resume,
306                                   "ResumeFn.addr");
307       auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
308           cast<PointerType>(GepIndex->getType())->getElementType()));
309       Builder.CreateStore(NullPtr, GepIndex);
310     } else {
311       auto *GepIndex = Builder.CreateStructGEP(
312           FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
313       Builder.CreateStore(IndexVal, GepIndex);
314     }
315     Save->replaceAllUsesWith(ConstantTokenNone::get(C));
316     Save->eraseFromParent();
317 
318     // Split block before and after coro.suspend and add a jump from an entry
319     // switch:
320     //
321     //  whateverBB:
322     //    whatever
323     //    %0 = call i8 @llvm.coro.suspend(token none, i1 false)
324     //    switch i8 %0, label %suspend[i8 0, label %resume
325     //                                 i8 1, label %cleanup]
326     // becomes:
327     //
328     //  whateverBB:
329     //     whatever
330     //     br label %resume.0.landing
331     //
332     //  resume.0: ; <--- jump from the switch in the resume.entry
333     //     %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
334     //     br label %resume.0.landing
335     //
336     //  resume.0.landing:
337     //     %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
338     //     switch i8 % 1, label %suspend [i8 0, label %resume
339     //                                    i8 1, label %cleanup]
340 
341     auto *SuspendBB = S->getParent();
342     auto *ResumeBB =
343         SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
344     auto *LandingBB = ResumeBB->splitBasicBlock(
345         S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
346     Switch->addCase(IndexVal, ResumeBB);
347 
348     cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
349     auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
350     S->replaceAllUsesWith(PN);
351     PN->addIncoming(Builder.getInt8(-1), SuspendBB);
352     PN->addIncoming(S, ResumeBB);
353 
354     ++SuspendIndex;
355   }
356 
357   Builder.SetInsertPoint(UnreachBB);
358   Builder.CreateUnreachable();
359 
360   Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
361 }
362 
363 
364 // Rewrite final suspend point handling. We do not use suspend index to
365 // represent the final suspend point. Instead we zero-out ResumeFnAddr in the
366 // coroutine frame, since it is undefined behavior to resume a coroutine
367 // suspended at the final suspend point. Thus, in the resume function, we can
368 // simply remove the last case (when coro::Shape is built, the final suspend
369 // point (if present) is always the last element of CoroSuspends array).
370 // In the destroy function, we add a code sequence to check if ResumeFnAddress
371 // is Null, and if so, jump to the appropriate label to handle cleanup from the
372 // final suspend point.
373 void CoroCloner::handleFinalSuspend() {
374   assert(Shape.ABI == coro::ABI::Switch &&
375          Shape.SwitchLowering.HasFinalSuspend);
376   auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
377   auto FinalCaseIt = std::prev(Switch->case_end());
378   BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
379   Switch->removeCase(FinalCaseIt);
380   if (isSwitchDestroyFunction()) {
381     BasicBlock *OldSwitchBB = Switch->getParent();
382     auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
383     Builder.SetInsertPoint(OldSwitchBB->getTerminator());
384     auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
385                                        coro::Shape::SwitchFieldIndex::Resume,
386                                              "ResumeFn.addr");
387     auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
388                                     GepIndex);
389     auto *Cond = Builder.CreateIsNull(Load);
390     Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
391     OldSwitchBB->getTerminator()->eraseFromParent();
392   }
393 }
394 
395 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
396                                         const Twine &Suffix,
397                                         Module::iterator InsertBefore) {
398   Module *M = OrigF.getParent();
399   auto *FnTy = Shape.getResumeFunctionType();
400 
401   Function *NewF =
402       Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
403                        OrigF.getName() + Suffix);
404   NewF->addParamAttr(0, Attribute::NonNull);
405   NewF->addParamAttr(0, Attribute::NoAlias);
406 
407   M->getFunctionList().insert(InsertBefore, NewF);
408 
409   return NewF;
410 }
411 
412 /// Replace uses of the active llvm.coro.suspend.retcon call with the
413 /// arguments to the continuation function.
414 ///
415 /// This assumes that the builder has a meaningful insertion point.
416 void CoroCloner::replaceRetconSuspendUses() {
417   assert(Shape.ABI == coro::ABI::Retcon ||
418          Shape.ABI == coro::ABI::RetconOnce);
419 
420   auto NewS = VMap[ActiveSuspend];
421   if (NewS->use_empty()) return;
422 
423   // Copy out all the continuation arguments after the buffer pointer into
424   // an easily-indexed data structure for convenience.
425   SmallVector<Value*, 8> Args;
426   for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I)
427     Args.push_back(&*I);
428 
429   // If the suspend returns a single scalar value, we can just do a simple
430   // replacement.
431   if (!isa<StructType>(NewS->getType())) {
432     assert(Args.size() == 1);
433     NewS->replaceAllUsesWith(Args.front());
434     return;
435   }
436 
437   // Try to peephole extracts of an aggregate return.
438   for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
439     auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
440     if (!EVI || EVI->getNumIndices() != 1)
441       continue;
442 
443     EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
444     EVI->eraseFromParent();
445   }
446 
447   // If we have no remaining uses, we're done.
448   if (NewS->use_empty()) return;
449 
450   // Otherwise, we need to create an aggregate.
451   Value *Agg = UndefValue::get(NewS->getType());
452   for (size_t I = 0, E = Args.size(); I != E; ++I)
453     Agg = Builder.CreateInsertValue(Agg, Args[I], I);
454 
455   NewS->replaceAllUsesWith(Agg);
456 }
457 
458 void CoroCloner::replaceCoroSuspends() {
459   Value *SuspendResult;
460 
461   switch (Shape.ABI) {
462   // In switch lowering, replace coro.suspend with the appropriate value
463   // for the type of function we're extracting.
464   // Replacing coro.suspend with (0) will result in control flow proceeding to
465   // a resume label associated with a suspend point, replacing it with (1) will
466   // result in control flow proceeding to a cleanup label associated with this
467   // suspend point.
468   case coro::ABI::Switch:
469     SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
470     break;
471 
472   // In returned-continuation lowering, the arguments from earlier
473   // continuations are theoretically arbitrary, and they should have been
474   // spilled.
475   case coro::ABI::RetconOnce:
476   case coro::ABI::Retcon:
477     return;
478   }
479 
480   for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
481     // The active suspend was handled earlier.
482     if (CS == ActiveSuspend) continue;
483 
484     auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
485     MappedCS->replaceAllUsesWith(SuspendResult);
486     MappedCS->eraseFromParent();
487   }
488 }
489 
490 void CoroCloner::replaceCoroEnds() {
491   for (CoroEndInst *CE : Shape.CoroEnds) {
492     // We use a null call graph because there's no call graph node for
493     // the cloned function yet.  We'll just be rebuilding that later.
494     auto NewCE = cast<CoroEndInst>(VMap[CE]);
495     replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
496   }
497 }
498 
499 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
500                                  ValueToValueMapTy *VMap) {
501   Value *CachedSlot = nullptr;
502   auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
503     if (CachedSlot) {
504       assert(CachedSlot->getType()->getPointerElementType() == ValueTy &&
505              "multiple swifterror slots in function with different types");
506       return CachedSlot;
507     }
508 
509     // Check if the function has a swifterror argument.
510     for (auto &Arg : F.args()) {
511       if (Arg.isSwiftError()) {
512         CachedSlot = &Arg;
513         assert(Arg.getType()->getPointerElementType() == ValueTy &&
514                "swifterror argument does not have expected type");
515         return &Arg;
516       }
517     }
518 
519     // Create a swifterror alloca.
520     IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
521     auto Alloca = Builder.CreateAlloca(ValueTy);
522     Alloca->setSwiftError(true);
523 
524     CachedSlot = Alloca;
525     return Alloca;
526   };
527 
528   for (CallInst *Op : Shape.SwiftErrorOps) {
529     auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
530     IRBuilder<> Builder(MappedOp);
531 
532     // If there are no arguments, this is a 'get' operation.
533     Value *MappedResult;
534     if (Op->getNumArgOperands() == 0) {
535       auto ValueTy = Op->getType();
536       auto Slot = getSwiftErrorSlot(ValueTy);
537       MappedResult = Builder.CreateLoad(ValueTy, Slot);
538     } else {
539       assert(Op->getNumArgOperands() == 1);
540       auto Value = MappedOp->getArgOperand(0);
541       auto ValueTy = Value->getType();
542       auto Slot = getSwiftErrorSlot(ValueTy);
543       Builder.CreateStore(Value, Slot);
544       MappedResult = Slot;
545     }
546 
547     MappedOp->replaceAllUsesWith(MappedResult);
548     MappedOp->eraseFromParent();
549   }
550 
551   // If we're updating the original function, we've invalidated SwiftErrorOps.
552   if (VMap == nullptr) {
553     Shape.SwiftErrorOps.clear();
554   }
555 }
556 
557 void CoroCloner::replaceSwiftErrorOps() {
558   ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
559 }
560 
561 void CoroCloner::replaceEntryBlock() {
562   // In the original function, the AllocaSpillBlock is a block immediately
563   // following the allocation of the frame object which defines GEPs for
564   // all the allocas that have been moved into the frame, and it ends by
565   // branching to the original beginning of the coroutine.  Make this
566   // the entry block of the cloned function.
567   auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
568   Entry->setName("entry" + Suffix);
569   Entry->moveBefore(&NewF->getEntryBlock());
570   Entry->getTerminator()->eraseFromParent();
571 
572   // Clear all predecessors of the new entry block.  There should be
573   // exactly one predecessor, which we created when splitting out
574   // AllocaSpillBlock to begin with.
575   assert(Entry->hasOneUse());
576   auto BranchToEntry = cast<BranchInst>(Entry->user_back());
577   assert(BranchToEntry->isUnconditional());
578   Builder.SetInsertPoint(BranchToEntry);
579   Builder.CreateUnreachable();
580   BranchToEntry->eraseFromParent();
581 
582   // TODO: move any allocas into Entry that weren't moved into the frame.
583   // (Currently we move all allocas into the frame.)
584 
585   // Branch from the entry to the appropriate place.
586   Builder.SetInsertPoint(Entry);
587   switch (Shape.ABI) {
588   case coro::ABI::Switch: {
589     // In switch-lowering, we built a resume-entry block in the original
590     // function.  Make the entry block branch to this.
591     auto *SwitchBB =
592       cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
593     Builder.CreateBr(SwitchBB);
594     break;
595   }
596 
597   case coro::ABI::Retcon:
598   case coro::ABI::RetconOnce: {
599     // In continuation ABIs, we want to branch to immediately after the
600     // active suspend point.  Earlier phases will have put the suspend in its
601     // own basic block, so just thread our jump directly to its successor.
602     auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]);
603     auto Branch = cast<BranchInst>(MappedCS->getNextNode());
604     assert(Branch->isUnconditional());
605     Builder.CreateBr(Branch->getSuccessor(0));
606     break;
607   }
608   }
609 }
610 
611 /// Derive the value of the new frame pointer.
612 Value *CoroCloner::deriveNewFramePointer() {
613   // Builder should be inserting to the front of the new entry block.
614 
615   switch (Shape.ABI) {
616   // In switch-lowering, the argument is the frame pointer.
617   case coro::ABI::Switch:
618     return &*NewF->arg_begin();
619 
620   // In continuation-lowering, the argument is the opaque storage.
621   case coro::ABI::Retcon:
622   case coro::ABI::RetconOnce: {
623     Argument *NewStorage = &*NewF->arg_begin();
624     auto FramePtrTy = Shape.FrameTy->getPointerTo();
625 
626     // If the storage is inline, just bitcast to the storage to the frame type.
627     if (Shape.RetconLowering.IsFrameInlineInStorage)
628       return Builder.CreateBitCast(NewStorage, FramePtrTy);
629 
630     // Otherwise, load the real frame from the opaque storage.
631     auto FramePtrPtr =
632       Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
633     return Builder.CreateLoad(FramePtrPtr);
634   }
635   }
636   llvm_unreachable("bad ABI");
637 }
638 
639 /// Clone the body of the original function into a resume function of
640 /// some sort.
641 void CoroCloner::create() {
642   // Create the new function if we don't already have one.
643   if (!NewF) {
644     NewF = createCloneDeclaration(OrigF, Shape, Suffix,
645                                   OrigF.getParent()->end());
646   }
647 
648   // Replace all args with undefs. The buildCoroutineFrame algorithm already
649   // rewritten access to the args that occurs after suspend points with loads
650   // and stores to/from the coroutine frame.
651   for (Argument &A : OrigF.args())
652     VMap[&A] = UndefValue::get(A.getType());
653 
654   SmallVector<ReturnInst *, 4> Returns;
655 
656   // Ignore attempts to change certain attributes of the function.
657   // TODO: maybe there should be a way to suppress this during cloning?
658   auto savedVisibility = NewF->getVisibility();
659   auto savedUnnamedAddr = NewF->getUnnamedAddr();
660   auto savedDLLStorageClass = NewF->getDLLStorageClass();
661 
662   // NewF's linkage (which CloneFunctionInto does *not* change) might not
663   // be compatible with the visibility of OrigF (which it *does* change),
664   // so protect against that.
665   auto savedLinkage = NewF->getLinkage();
666   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
667 
668   CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);
669 
670   NewF->setLinkage(savedLinkage);
671   NewF->setVisibility(savedVisibility);
672   NewF->setUnnamedAddr(savedUnnamedAddr);
673   NewF->setDLLStorageClass(savedDLLStorageClass);
674 
675   auto &Context = NewF->getContext();
676 
677   // Replace the attributes of the new function:
678   auto OrigAttrs = NewF->getAttributes();
679   auto NewAttrs = AttributeList();
680 
681   switch (Shape.ABI) {
682   case coro::ABI::Switch:
683     // Bootstrap attributes by copying function attributes from the
684     // original function.  This should include optimization settings and so on.
685     NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
686                                       OrigAttrs.getFnAttributes());
687     break;
688 
689   case coro::ABI::Retcon:
690   case coro::ABI::RetconOnce:
691     // If we have a continuation prototype, just use its attributes,
692     // full-stop.
693     NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
694     break;
695   }
696 
697   // Make the frame parameter nonnull and noalias.
698   NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
699   NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
700 
701   switch (Shape.ABI) {
702   // In these ABIs, the cloned functions always return 'void', and the
703   // existing return sites are meaningless.  Note that for unique
704   // continuations, this includes the returns associated with suspends;
705   // this is fine because we can't suspend twice.
706   case coro::ABI::Switch:
707   case coro::ABI::RetconOnce:
708     // Remove old returns.
709     for (ReturnInst *Return : Returns)
710       changeToUnreachable(Return, /*UseLLVMTrap=*/false);
711     break;
712 
713   // With multi-suspend continuations, we'll already have eliminated the
714   // original returns and inserted returns before all the suspend points,
715   // so we want to leave any returns in place.
716   case coro::ABI::Retcon:
717     break;
718   }
719 
720   NewF->setAttributes(NewAttrs);
721   NewF->setCallingConv(Shape.getResumeFunctionCC());
722 
723   // Set up the new entry block.
724   replaceEntryBlock();
725 
726   Builder.SetInsertPoint(&NewF->getEntryBlock().front());
727   NewFramePtr = deriveNewFramePointer();
728 
729   // Remap frame pointer.
730   Value *OldFramePtr = VMap[Shape.FramePtr];
731   NewFramePtr->takeName(OldFramePtr);
732   OldFramePtr->replaceAllUsesWith(NewFramePtr);
733 
734   // Remap vFrame pointer.
735   auto *NewVFrame = Builder.CreateBitCast(
736       NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
737   Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
738   OldVFrame->replaceAllUsesWith(NewVFrame);
739 
740   switch (Shape.ABI) {
741   case coro::ABI::Switch:
742     // Rewrite final suspend handling as it is not done via switch (allows to
743     // remove final case from the switch, since it is undefined behavior to
744     // resume the coroutine suspended at the final suspend point.
745     if (Shape.SwitchLowering.HasFinalSuspend)
746       handleFinalSuspend();
747     break;
748 
749   case coro::ABI::Retcon:
750   case coro::ABI::RetconOnce:
751     // Replace uses of the active suspend with the corresponding
752     // continuation-function arguments.
753     assert(ActiveSuspend != nullptr &&
754            "no active suspend when lowering a continuation-style coroutine");
755     replaceRetconSuspendUses();
756     break;
757   }
758 
759   // Handle suspends.
760   replaceCoroSuspends();
761 
762   // Handle swifterror.
763   replaceSwiftErrorOps();
764 
765   // Remove coro.end intrinsics.
766   replaceCoroEnds();
767 
768   // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
769   // to suppress deallocation code.
770   if (Shape.ABI == coro::ABI::Switch)
771     coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
772                           /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
773 }
774 
775 // Create a resume clone by cloning the body of the original function, setting
776 // new entry block and replacing coro.suspend an appropriate value to force
777 // resume or cleanup pass for every suspend point.
778 static Function *createClone(Function &F, const Twine &Suffix,
779                              coro::Shape &Shape, CoroCloner::Kind FKind) {
780   CoroCloner Cloner(F, Suffix, Shape, FKind);
781   Cloner.create();
782   return Cloner.getFunction();
783 }
784 
785 /// Remove calls to llvm.coro.end in the original function.
786 static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
787   for (auto End : Shape.CoroEnds) {
788     replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
789   }
790 }
791 
792 static void replaceFrameSize(coro::Shape &Shape) {
793   if (Shape.CoroSizes.empty())
794     return;
795 
796   // In the same function all coro.sizes should have the same result type.
797   auto *SizeIntrin = Shape.CoroSizes.back();
798   Module *M = SizeIntrin->getModule();
799   const DataLayout &DL = M->getDataLayout();
800   auto Size = DL.getTypeAllocSize(Shape.FrameTy);
801   auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
802 
803   for (CoroSizeInst *CS : Shape.CoroSizes) {
804     CS->replaceAllUsesWith(SizeConstant);
805     CS->eraseFromParent();
806   }
807 }
808 
809 // Create a global constant array containing pointers to functions provided and
810 // set Info parameter of CoroBegin to point at this constant. Example:
811 //
812 //   @f.resumers = internal constant [2 x void(%f.frame*)*]
813 //                    [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
814 //   define void @f() {
815 //     ...
816 //     call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
817 //                    i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
818 //
819 // Assumes that all the functions have the same signature.
820 static void setCoroInfo(Function &F, coro::Shape &Shape,
821                         ArrayRef<Function *> Fns) {
822   // This only works under the switch-lowering ABI because coro elision
823   // only works on the switch-lowering ABI.
824   assert(Shape.ABI == coro::ABI::Switch);
825 
826   SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
827   assert(!Args.empty());
828   Function *Part = *Fns.begin();
829   Module *M = Part->getParent();
830   auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
831 
832   auto *ConstVal = ConstantArray::get(ArrTy, Args);
833   auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
834                                 GlobalVariable::PrivateLinkage, ConstVal,
835                                 F.getName() + Twine(".resumers"));
836 
837   // Update coro.begin instruction to refer to this constant.
838   LLVMContext &C = F.getContext();
839   auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
840   Shape.getSwitchCoroId()->setInfo(BC);
841 }
842 
843 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
844 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
845                             Function *DestroyFn, Function *CleanupFn) {
846   assert(Shape.ABI == coro::ABI::Switch);
847 
848   IRBuilder<> Builder(Shape.FramePtr->getNextNode());
849   auto *ResumeAddr = Builder.CreateStructGEP(
850       Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
851       "resume.addr");
852   Builder.CreateStore(ResumeFn, ResumeAddr);
853 
854   Value *DestroyOrCleanupFn = DestroyFn;
855 
856   CoroIdInst *CoroId = Shape.getSwitchCoroId();
857   if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
858     // If there is a CoroAlloc and it returns false (meaning we elide the
859     // allocation, use CleanupFn instead of DestroyFn).
860     DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
861   }
862 
863   auto *DestroyAddr = Builder.CreateStructGEP(
864       Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
865       "destroy.addr");
866   Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
867 }
868 
869 static void postSplitCleanup(Function &F) {
870   removeUnreachableBlocks(F);
871 
872   // For now, we do a mandatory verification step because we don't
873   // entirely trust this pass.  Note that we don't want to add a verifier
874   // pass to FPM below because it will also verify all the global data.
875   verifyFunction(F);
876 
877   legacy::FunctionPassManager FPM(F.getParent());
878 
879   FPM.add(createSCCPPass());
880   FPM.add(createCFGSimplificationPass());
881   FPM.add(createEarlyCSEPass());
882   FPM.add(createCFGSimplificationPass());
883 
884   FPM.doInitialization();
885   FPM.run(F);
886   FPM.doFinalization();
887 }
888 
889 // Assuming we arrived at the block NewBlock from Prev instruction, store
890 // PHI's incoming values in the ResolvedValues map.
891 static void
892 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
893                           DenseMap<Value *, Value *> &ResolvedValues) {
894   auto *PrevBB = Prev->getParent();
895   for (PHINode &PN : NewBlock->phis()) {
896     auto V = PN.getIncomingValueForBlock(PrevBB);
897     // See if we already resolved it.
898     auto VI = ResolvedValues.find(V);
899     if (VI != ResolvedValues.end())
900       V = VI->second;
901     // Remember the value.
902     ResolvedValues[&PN] = V;
903   }
904 }
905 
906 // Replace a sequence of branches leading to a ret, with a clone of a ret
907 // instruction. Suspend instruction represented by a switch, track the PHI
908 // values and select the correct case successor when possible.
909 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
910   DenseMap<Value *, Value *> ResolvedValues;
911   BasicBlock *UnconditionalSucc = nullptr;
912 
913   Instruction *I = InitialInst;
914   while (I->isTerminator()) {
915     if (isa<ReturnInst>(I)) {
916       if (I != InitialInst) {
917         // If InitialInst is an unconditional branch,
918         // remove PHI values that come from basic block of InitialInst
919         if (UnconditionalSucc)
920           for (PHINode &PN : UnconditionalSucc->phis()) {
921             int idx = PN.getBasicBlockIndex(InitialInst->getParent());
922             if (idx != -1)
923               PN.removeIncomingValue(idx);
924           }
925         ReplaceInstWithInst(InitialInst, I->clone());
926       }
927       return true;
928     }
929     if (auto *BR = dyn_cast<BranchInst>(I)) {
930       if (BR->isUnconditional()) {
931         BasicBlock *BB = BR->getSuccessor(0);
932         if (I == InitialInst)
933           UnconditionalSucc = BB;
934         scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
935         I = BB->getFirstNonPHIOrDbgOrLifetime();
936         continue;
937       }
938     } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
939       Value *V = SI->getCondition();
940       auto it = ResolvedValues.find(V);
941       if (it != ResolvedValues.end())
942         V = it->second;
943       if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
944         BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
945         scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
946         I = BB->getFirstNonPHIOrDbgOrLifetime();
947         continue;
948       }
949     }
950     return false;
951   }
952   return false;
953 }
954 
955 // Add musttail to any resume instructions that is immediately followed by a
956 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
957 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
958 // This transformation is done only in the resume part of the coroutine that has
959 // identical signature and calling convention as the coro.resume call.
960 static void addMustTailToCoroResumes(Function &F) {
961   bool changed = false;
962 
963   // Collect potential resume instructions.
964   SmallVector<CallInst *, 4> Resumes;
965   for (auto &I : instructions(F))
966     if (auto *Call = dyn_cast<CallInst>(&I))
967       if (auto *CalledValue = Call->getCalledValue())
968         // CoroEarly pass replaced coro resumes with indirect calls to an
969         // address return by CoroSubFnInst intrinsic. See if it is one of those.
970         if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts()))
971           Resumes.push_back(Call);
972 
973   // Set musttail on those that are followed by a ret instruction.
974   for (CallInst *Call : Resumes)
975     if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
976       Call->setTailCallKind(CallInst::TCK_MustTail);
977       changed = true;
978     }
979 
980   if (changed)
981     removeUnreachableBlocks(F);
982 }
983 
984 // Coroutine has no suspend points. Remove heap allocation for the coroutine
985 // frame if possible.
986 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
987   auto *CoroBegin = Shape.CoroBegin;
988   auto *CoroId = CoroBegin->getId();
989   auto *AllocInst = CoroId->getCoroAlloc();
990   switch (Shape.ABI) {
991   case coro::ABI::Switch: {
992     auto SwitchId = cast<CoroIdInst>(CoroId);
993     coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
994     if (AllocInst) {
995       IRBuilder<> Builder(AllocInst);
996       // FIXME: Need to handle overaligned members.
997       auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
998       auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
999       AllocInst->replaceAllUsesWith(Builder.getFalse());
1000       AllocInst->eraseFromParent();
1001       CoroBegin->replaceAllUsesWith(VFrame);
1002     } else {
1003       CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1004     }
1005     break;
1006   }
1007 
1008   case coro::ABI::Retcon:
1009   case coro::ABI::RetconOnce:
1010     CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1011     break;
1012   }
1013 
1014   CoroBegin->eraseFromParent();
1015 }
1016 
1017 // SimplifySuspendPoint needs to check that there is no calls between
1018 // coro_save and coro_suspend, since any of the calls may potentially resume
1019 // the coroutine and if that is the case we cannot eliminate the suspend point.
1020 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1021   for (Instruction *I = From; I != To; I = I->getNextNode()) {
1022     // Assume that no intrinsic can resume the coroutine.
1023     if (isa<IntrinsicInst>(I))
1024       continue;
1025 
1026     if (CallSite(I))
1027       return true;
1028   }
1029   return false;
1030 }
1031 
1032 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1033   SmallPtrSet<BasicBlock *, 8> Set;
1034   SmallVector<BasicBlock *, 8> Worklist;
1035 
1036   Set.insert(SaveBB);
1037   Worklist.push_back(ResDesBB);
1038 
1039   // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1040   // returns a token consumed by suspend instruction, all blocks in between
1041   // will have to eventually hit SaveBB when going backwards from ResDesBB.
1042   while (!Worklist.empty()) {
1043     auto *BB = Worklist.pop_back_val();
1044     Set.insert(BB);
1045     for (auto *Pred : predecessors(BB))
1046       if (Set.count(Pred) == 0)
1047         Worklist.push_back(Pred);
1048   }
1049 
1050   // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1051   Set.erase(SaveBB);
1052   Set.erase(ResDesBB);
1053 
1054   for (auto *BB : Set)
1055     if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1056       return true;
1057 
1058   return false;
1059 }
1060 
1061 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1062   auto *SaveBB = Save->getParent();
1063   auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1064 
1065   if (SaveBB == ResumeOrDestroyBB)
1066     return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1067 
1068   // Any calls from Save to the end of the block?
1069   if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1070     return true;
1071 
1072   // Any calls from begging of the block up to ResumeOrDestroy?
1073   if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1074                              ResumeOrDestroy))
1075     return true;
1076 
1077   // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1078   if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1079     return true;
1080 
1081   return false;
1082 }
1083 
1084 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1085 // suspend point and replace it with nornal control flow.
1086 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1087                                  CoroBeginInst *CoroBegin) {
1088   Instruction *Prev = Suspend->getPrevNode();
1089   if (!Prev) {
1090     auto *Pred = Suspend->getParent()->getSinglePredecessor();
1091     if (!Pred)
1092       return false;
1093     Prev = Pred->getTerminator();
1094   }
1095 
1096   CallSite CS{Prev};
1097   if (!CS)
1098     return false;
1099 
1100   auto *CallInstr = CS.getInstruction();
1101 
1102   auto *Callee = CS.getCalledValue()->stripPointerCasts();
1103 
1104   // See if the callsite is for resumption or destruction of the coroutine.
1105   auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1106   if (!SubFn)
1107     return false;
1108 
1109   // Does not refer to the current coroutine, we cannot do anything with it.
1110   if (SubFn->getFrame() != CoroBegin)
1111     return false;
1112 
1113   // See if the transformation is safe. Specifically, see if there are any
1114   // calls in between Save and CallInstr. They can potenitally resume the
1115   // coroutine rendering this optimization unsafe.
1116   auto *Save = Suspend->getCoroSave();
1117   if (hasCallsBetween(Save, CallInstr))
1118     return false;
1119 
1120   // Replace llvm.coro.suspend with the value that results in resumption over
1121   // the resume or cleanup path.
1122   Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1123   Suspend->eraseFromParent();
1124   Save->eraseFromParent();
1125 
1126   // No longer need a call to coro.resume or coro.destroy.
1127   if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
1128     BranchInst::Create(Invoke->getNormalDest(), Invoke);
1129   }
1130 
1131   // Grab the CalledValue from CS before erasing the CallInstr.
1132   auto *CalledValue = CS.getCalledValue();
1133   CallInstr->eraseFromParent();
1134 
1135   // If no more users remove it. Usually it is a bitcast of SubFn.
1136   if (CalledValue != SubFn && CalledValue->user_empty())
1137     if (auto *I = dyn_cast<Instruction>(CalledValue))
1138       I->eraseFromParent();
1139 
1140   // Now we are good to remove SubFn.
1141   if (SubFn->user_empty())
1142     SubFn->eraseFromParent();
1143 
1144   return true;
1145 }
1146 
1147 // Remove suspend points that are simplified.
1148 static void simplifySuspendPoints(coro::Shape &Shape) {
1149   // Currently, the only simplification we do is switch-lowering-specific.
1150   if (Shape.ABI != coro::ABI::Switch)
1151     return;
1152 
1153   auto &S = Shape.CoroSuspends;
1154   size_t I = 0, N = S.size();
1155   if (N == 0)
1156     return;
1157   while (true) {
1158     auto SI = cast<CoroSuspendInst>(S[I]);
1159     // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1160     // to resume a coroutine suspended at the final suspend point.
1161     if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1162       if (--N == I)
1163         break;
1164       std::swap(S[I], S[N]);
1165       continue;
1166     }
1167     if (++I == N)
1168       break;
1169   }
1170   S.resize(N);
1171 }
1172 
1173 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1174                                  SmallVectorImpl<Function *> &Clones) {
1175   assert(Shape.ABI == coro::ABI::Switch);
1176 
1177   createResumeEntryBlock(F, Shape);
1178   auto ResumeClone = createClone(F, ".resume", Shape,
1179                                  CoroCloner::Kind::SwitchResume);
1180   auto DestroyClone = createClone(F, ".destroy", Shape,
1181                                   CoroCloner::Kind::SwitchUnwind);
1182   auto CleanupClone = createClone(F, ".cleanup", Shape,
1183                                   CoroCloner::Kind::SwitchCleanup);
1184 
1185   postSplitCleanup(*ResumeClone);
1186   postSplitCleanup(*DestroyClone);
1187   postSplitCleanup(*CleanupClone);
1188 
1189   addMustTailToCoroResumes(*ResumeClone);
1190 
1191   // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1192   updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1193 
1194   assert(Clones.empty());
1195   Clones.push_back(ResumeClone);
1196   Clones.push_back(DestroyClone);
1197   Clones.push_back(CleanupClone);
1198 
1199   // Create a constant array referring to resume/destroy/clone functions pointed
1200   // by the last argument of @llvm.coro.info, so that CoroElide pass can
1201   // determined correct function to call.
1202   setCoroInfo(F, Shape, Clones);
1203 }
1204 
1205 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1206                                  SmallVectorImpl<Function *> &Clones) {
1207   assert(Shape.ABI == coro::ABI::Retcon ||
1208          Shape.ABI == coro::ABI::RetconOnce);
1209   assert(Clones.empty());
1210 
1211   // Reset various things that the optimizer might have decided it
1212   // "knows" about the coroutine function due to not seeing a return.
1213   F.removeFnAttr(Attribute::NoReturn);
1214   F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
1215   F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
1216 
1217   // Allocate the frame.
1218   auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1219   Value *RawFramePtr;
1220   if (Shape.RetconLowering.IsFrameInlineInStorage) {
1221     RawFramePtr = Id->getStorage();
1222   } else {
1223     IRBuilder<> Builder(Id);
1224 
1225     // Determine the size of the frame.
1226     const DataLayout &DL = F.getParent()->getDataLayout();
1227     auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1228 
1229     // Allocate.  We don't need to update the call graph node because we're
1230     // going to recompute it from scratch after splitting.
1231     RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1232     RawFramePtr =
1233       Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1234 
1235     // Stash the allocated frame pointer in the continuation storage.
1236     auto Dest = Builder.CreateBitCast(Id->getStorage(),
1237                                       RawFramePtr->getType()->getPointerTo());
1238     Builder.CreateStore(RawFramePtr, Dest);
1239   }
1240 
1241   // Map all uses of llvm.coro.begin to the allocated frame pointer.
1242   {
1243     // Make sure we don't invalidate Shape.FramePtr.
1244     TrackingVH<Instruction> Handle(Shape.FramePtr);
1245     Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1246     Shape.FramePtr = Handle.getValPtr();
1247   }
1248 
1249   // Create a unique return block.
1250   BasicBlock *ReturnBB = nullptr;
1251   SmallVector<PHINode *, 4> ReturnPHIs;
1252 
1253   // Create all the functions in order after the main function.
1254   auto NextF = std::next(F.getIterator());
1255 
1256   // Create a continuation function for each of the suspend points.
1257   Clones.reserve(Shape.CoroSuspends.size());
1258   for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1259     auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1260 
1261     // Create the clone declaration.
1262     auto Continuation =
1263       createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);
1264     Clones.push_back(Continuation);
1265 
1266     // Insert a branch to the unified return block immediately before
1267     // the suspend point.
1268     auto SuspendBB = Suspend->getParent();
1269     auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1270     auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1271 
1272     // Create the unified return block.
1273     if (!ReturnBB) {
1274       // Place it before the first suspend.
1275       ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1276                                     NewSuspendBB);
1277       Shape.RetconLowering.ReturnBlock = ReturnBB;
1278 
1279       IRBuilder<> Builder(ReturnBB);
1280 
1281       // Create PHIs for all the return values.
1282       assert(ReturnPHIs.empty());
1283 
1284       // First, the continuation.
1285       ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1286                                              Shape.CoroSuspends.size()));
1287 
1288       // Next, all the directly-yielded values.
1289       for (auto ResultTy : Shape.getRetconResultTypes())
1290         ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1291                                                Shape.CoroSuspends.size()));
1292 
1293       // Build the return value.
1294       auto RetTy = F.getReturnType();
1295 
1296       // Cast the continuation value if necessary.
1297       // We can't rely on the types matching up because that type would
1298       // have to be infinite.
1299       auto CastedContinuationTy =
1300         (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1301       auto *CastedContinuation =
1302         Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1303 
1304       Value *RetV;
1305       if (ReturnPHIs.size() == 1) {
1306         RetV = CastedContinuation;
1307       } else {
1308         RetV = UndefValue::get(RetTy);
1309         RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1310         for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1311           RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1312       }
1313 
1314       Builder.CreateRet(RetV);
1315     }
1316 
1317     // Branch to the return block.
1318     Branch->setSuccessor(0, ReturnBB);
1319     ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1320     size_t NextPHIIndex = 1;
1321     for (auto &VUse : Suspend->value_operands())
1322       ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1323     assert(NextPHIIndex == ReturnPHIs.size());
1324   }
1325 
1326   assert(Clones.size() == Shape.CoroSuspends.size());
1327   for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1328     auto Suspend = Shape.CoroSuspends[i];
1329     auto Clone = Clones[i];
1330 
1331     CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1332   }
1333 }
1334 
1335 namespace {
1336   class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1337     Function &F;
1338   public:
1339     PrettyStackTraceFunction(Function &F) : F(F) {}
1340     void print(raw_ostream &OS) const override {
1341       OS << "While splitting coroutine ";
1342       F.printAsOperand(OS, /*print type*/ false, F.getParent());
1343       OS << "\n";
1344     }
1345   };
1346 }
1347 
1348 static void splitCoroutine(Function &F, coro::Shape &Shape,
1349                            SmallVectorImpl<Function *> &Clones) {
1350   switch (Shape.ABI) {
1351   case coro::ABI::Switch:
1352     return splitSwitchCoroutine(F, Shape, Clones);
1353   case coro::ABI::Retcon:
1354   case coro::ABI::RetconOnce:
1355     return splitRetconCoroutine(F, Shape, Clones);
1356   }
1357   llvm_unreachable("bad ABI kind");
1358 }
1359 
1360 static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
1361   PrettyStackTraceFunction prettyStackTrace(F);
1362 
1363   // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1364   // up by uses in unreachable blocks, so remove them as a first pass.
1365   removeUnreachableBlocks(F);
1366 
1367   coro::Shape Shape(F);
1368   if (!Shape.CoroBegin)
1369     return;
1370 
1371   simplifySuspendPoints(Shape);
1372   buildCoroutineFrame(F, Shape);
1373   replaceFrameSize(Shape);
1374 
1375   SmallVector<Function*, 4> Clones;
1376 
1377   // If there are no suspend points, no split required, just remove
1378   // the allocation and deallocation blocks, they are not needed.
1379   if (Shape.CoroSuspends.empty()) {
1380     handleNoSuspendCoroutine(Shape);
1381   } else {
1382     splitCoroutine(F, Shape, Clones);
1383   }
1384 
1385   // Replace all the swifterror operations in the original function.
1386   // This invalidates SwiftErrorOps in the Shape.
1387   replaceSwiftErrorOps(F, Shape, nullptr);
1388 
1389   removeCoroEnds(Shape, &CG);
1390   postSplitCleanup(F);
1391 
1392   // Update call graph and add the functions we created to the SCC.
1393   coro::updateCallGraph(F, Clones, CG, SCC);
1394 }
1395 
1396 // When we see the coroutine the first time, we insert an indirect call to a
1397 // devirt trigger function and mark the coroutine that it is now ready for
1398 // split.
1399 static void prepareForSplit(Function &F, CallGraph &CG) {
1400   Module &M = *F.getParent();
1401   LLVMContext &Context = F.getContext();
1402 #ifndef NDEBUG
1403   Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
1404   assert(DevirtFn && "coro.devirt.trigger function not found");
1405 #endif
1406 
1407   F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
1408 
1409   // Insert an indirect call sequence that will be devirtualized by CoroElide
1410   // pass:
1411   //    %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
1412   //    %1 = bitcast i8* %0 to void(i8*)*
1413   //    call void %1(i8* null)
1414   coro::LowererBase Lowerer(M);
1415   Instruction *InsertPt = F.getEntryBlock().getTerminator();
1416   auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
1417   auto *DevirtFnAddr =
1418       Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
1419   FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
1420                                          {Type::getInt8PtrTy(Context)}, false);
1421   auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
1422 
1423   // Update CG graph with an indirect call we just added.
1424   CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
1425 }
1426 
1427 // Make sure that there is a devirtualization trigger function that the
1428 // coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
1429 // trigger function is not found, we will create one and add it to the current
1430 // SCC.
1431 static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
1432   Module &M = CG.getModule();
1433   if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
1434     return;
1435 
1436   LLVMContext &C = M.getContext();
1437   auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
1438                                  /*isVarArg=*/false);
1439   Function *DevirtFn =
1440       Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
1441                        CORO_DEVIRT_TRIGGER_FN, &M);
1442   DevirtFn->addFnAttr(Attribute::AlwaysInline);
1443   auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
1444   ReturnInst::Create(C, Entry);
1445 
1446   auto *Node = CG.getOrInsertFunction(DevirtFn);
1447 
1448   SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
1449   Nodes.push_back(Node);
1450   SCC.initialize(Nodes);
1451 }
1452 
1453 /// Replace a call to llvm.coro.prepare.retcon.
1454 static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
1455   auto CastFn = Prepare->getArgOperand(0); // as an i8*
1456   auto Fn = CastFn->stripPointerCasts(); // as its original type
1457 
1458   // Find call graph nodes for the preparation.
1459   CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
1460   if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
1461     PrepareUserNode = CG[Prepare->getFunction()];
1462     FnNode = CG[ConcreteFn];
1463   }
1464 
1465   // Attempt to peephole this pattern:
1466   //    %0 = bitcast [[TYPE]] @some_function to i8*
1467   //    %1 = call @llvm.coro.prepare.retcon(i8* %0)
1468   //    %2 = bitcast %1 to [[TYPE]]
1469   // ==>
1470   //    %2 = @some_function
1471   for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
1472          UI != UE; ) {
1473     // Look for bitcasts back to the original function type.
1474     auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
1475     if (!Cast || Cast->getType() != Fn->getType()) continue;
1476 
1477     // Check whether the replacement will introduce new direct calls.
1478     // If so, we'll need to update the call graph.
1479     if (PrepareUserNode) {
1480       for (auto &Use : Cast->uses()) {
1481         if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
1482           if (!CB->isCallee(&Use))
1483             continue;
1484           PrepareUserNode->removeCallEdgeFor(*CB);
1485           PrepareUserNode->addCalledFunction(CB, FnNode);
1486         }
1487       }
1488     }
1489 
1490     // Replace and remove the cast.
1491     Cast->replaceAllUsesWith(Fn);
1492     Cast->eraseFromParent();
1493   }
1494 
1495   // Replace any remaining uses with the function as an i8*.
1496   // This can never directly be a callee, so we don't need to update CG.
1497   Prepare->replaceAllUsesWith(CastFn);
1498   Prepare->eraseFromParent();
1499 
1500   // Kill dead bitcasts.
1501   while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
1502     if (!Cast->use_empty()) break;
1503     CastFn = Cast->getOperand(0);
1504     Cast->eraseFromParent();
1505   }
1506 }
1507 
1508 /// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
1509 /// IPO from operating on calls to a retcon coroutine before it's been
1510 /// split.  This is only safe to do after we've split all retcon
1511 /// coroutines in the module.  We can do that this in this pass because
1512 /// this pass does promise to split all retcon coroutines (as opposed to
1513 /// switch coroutines, which are lowered in multiple stages).
1514 static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
1515   bool Changed = false;
1516   for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
1517          PI != PE; ) {
1518     // Intrinsics can only be used in calls.
1519     auto *Prepare = cast<CallInst>((PI++)->getUser());
1520     replacePrepare(Prepare, CG);
1521     Changed = true;
1522   }
1523 
1524   return Changed;
1525 }
1526 
1527 //===----------------------------------------------------------------------===//
1528 //                              Top Level Driver
1529 //===----------------------------------------------------------------------===//
1530 
1531 namespace {
1532 
1533 struct CoroSplitLegacy : public CallGraphSCCPass {
1534   static char ID; // Pass identification, replacement for typeid
1535 
1536   CoroSplitLegacy() : CallGraphSCCPass(ID) {
1537     initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
1538   }
1539 
1540   bool Run = false;
1541 
1542   // A coroutine is identified by the presence of coro.begin intrinsic, if
1543   // we don't have any, this pass has nothing to do.
1544   bool doInitialization(CallGraph &CG) override {
1545     Run = coro::declaresIntrinsics(CG.getModule(),
1546                                    {"llvm.coro.begin",
1547                                     "llvm.coro.prepare.retcon"});
1548     return CallGraphSCCPass::doInitialization(CG);
1549   }
1550 
1551   bool runOnSCC(CallGraphSCC &SCC) override {
1552     if (!Run)
1553       return false;
1554 
1555     // Check for uses of llvm.coro.prepare.retcon.
1556     auto PrepareFn =
1557       SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon");
1558     if (PrepareFn && PrepareFn->use_empty())
1559       PrepareFn = nullptr;
1560 
1561     // Find coroutines for processing.
1562     SmallVector<Function *, 4> Coroutines;
1563     for (CallGraphNode *CGN : SCC)
1564       if (auto *F = CGN->getFunction())
1565         if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
1566           Coroutines.push_back(F);
1567 
1568     if (Coroutines.empty() && !PrepareFn)
1569       return false;
1570 
1571     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
1572 
1573     if (Coroutines.empty())
1574       return replaceAllPrepares(PrepareFn, CG);
1575 
1576     createDevirtTriggerFunc(CG, SCC);
1577 
1578     // Split all the coroutines.
1579     for (Function *F : Coroutines) {
1580       Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
1581       StringRef Value = Attr.getValueAsString();
1582       LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
1583                         << "' state: " << Value << "\n");
1584       if (Value == UNPREPARED_FOR_SPLIT) {
1585         prepareForSplit(*F, CG);
1586         continue;
1587       }
1588       F->removeFnAttr(CORO_PRESPLIT_ATTR);
1589       splitCoroutine(*F, CG, SCC);
1590     }
1591 
1592     if (PrepareFn)
1593       replaceAllPrepares(PrepareFn, CG);
1594 
1595     return true;
1596   }
1597 
1598   void getAnalysisUsage(AnalysisUsage &AU) const override {
1599     CallGraphSCCPass::getAnalysisUsage(AU);
1600   }
1601 
1602   StringRef getPassName() const override { return "Coroutine Splitting"; }
1603 };
1604 
1605 } // end anonymous namespace
1606 
1607 char CoroSplitLegacy::ID = 0;
1608 
1609 INITIALIZE_PASS_BEGIN(
1610     CoroSplitLegacy, "coro-split",
1611     "Split coroutine into a set of functions driving its state machine", false,
1612     false)
1613 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
1614 INITIALIZE_PASS_END(
1615     CoroSplitLegacy, "coro-split",
1616     "Split coroutine into a set of functions driving its state machine", false,
1617     false)
1618 
1619 Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); }
1620