1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
10 //
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
20
21 #include "llvm/Transforms/Coroutines/CoroSplit.h"
22 #include "CoroInstr.h"
23 #include "CoroInternal.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/PriorityWorklist.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/ADT/Twine.h"
30 #include "llvm/Analysis/CFG.h"
31 #include "llvm/Analysis/CallGraph.h"
32 #include "llvm/Analysis/ConstantFolding.h"
33 #include "llvm/Analysis/LazyCallGraph.h"
34 #include "llvm/Analysis/TargetTransformInfo.h"
35 #include "llvm/BinaryFormat/Dwarf.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/CFG.h"
40 #include "llvm/IR/CallingConv.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/GlobalValue.h"
47 #include "llvm/IR/GlobalVariable.h"
48 #include "llvm/IR/IRBuilder.h"
49 #include "llvm/IR/InstIterator.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Module.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/Value.h"
58 #include "llvm/IR/Verifier.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/PrettyStackTrace.h"
62 #include "llvm/Support/raw_ostream.h"
63 #include "llvm/Transforms/Scalar.h"
64 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
65 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
66 #include "llvm/Transforms/Utils/Cloning.h"
67 #include "llvm/Transforms/Utils/Local.h"
68 #include "llvm/Transforms/Utils/ValueMapper.h"
69 #include <cassert>
70 #include <cstddef>
71 #include <cstdint>
72 #include <initializer_list>
73 #include <iterator>
74
75 using namespace llvm;
76
77 #define DEBUG_TYPE "coro-split"
78
79 namespace {
80
81 /// A little helper class for building
82 class CoroCloner {
83 public:
84 enum class Kind {
85 /// The shared resume function for a switch lowering.
86 SwitchResume,
87
88 /// The shared unwind function for a switch lowering.
89 SwitchUnwind,
90
91 /// The shared cleanup function for a switch lowering.
92 SwitchCleanup,
93
94 /// An individual continuation function.
95 Continuation,
96
97 /// An async resume function.
98 Async,
99 };
100
101 private:
102 Function &OrigF;
103 Function *NewF;
104 const Twine &Suffix;
105 coro::Shape &Shape;
106 Kind FKind;
107 ValueToValueMapTy VMap;
108 IRBuilder<> Builder;
109 Value *NewFramePtr = nullptr;
110
111 /// The active suspend instruction; meaningful only for continuation and async
112 /// ABIs.
113 AnyCoroSuspendInst *ActiveSuspend = nullptr;
114
115 public:
116 /// Create a cloner for a switch lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Kind FKind)117 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
118 Kind FKind)
119 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
120 FKind(FKind), Builder(OrigF.getContext()) {
121 assert(Shape.ABI == coro::ABI::Switch);
122 }
123
124 /// Create a cloner for a continuation lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Function * NewF,AnyCoroSuspendInst * ActiveSuspend)125 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
126 Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
127 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
128 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
129 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
130 assert(Shape.ABI == coro::ABI::Retcon ||
131 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
132 assert(NewF && "need existing function for continuation");
133 assert(ActiveSuspend && "need active suspend point for continuation");
134 }
135
getFunction() const136 Function *getFunction() const {
137 assert(NewF != nullptr && "declaration not yet set");
138 return NewF;
139 }
140
141 void create();
142
143 private:
isSwitchDestroyFunction()144 bool isSwitchDestroyFunction() {
145 switch (FKind) {
146 case Kind::Async:
147 case Kind::Continuation:
148 case Kind::SwitchResume:
149 return false;
150 case Kind::SwitchUnwind:
151 case Kind::SwitchCleanup:
152 return true;
153 }
154 llvm_unreachable("Unknown CoroCloner::Kind enum");
155 }
156
157 void replaceEntryBlock();
158 Value *deriveNewFramePointer();
159 void replaceRetconOrAsyncSuspendUses();
160 void replaceCoroSuspends();
161 void replaceCoroEnds();
162 void replaceSwiftErrorOps();
163 void salvageDebugInfo();
164 void handleFinalSuspend();
165 };
166
167 } // end anonymous namespace
168
maybeFreeRetconStorage(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr,CallGraph * CG)169 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
170 const coro::Shape &Shape, Value *FramePtr,
171 CallGraph *CG) {
172 assert(Shape.ABI == coro::ABI::Retcon ||
173 Shape.ABI == coro::ABI::RetconOnce);
174 if (Shape.RetconLowering.IsFrameInlineInStorage)
175 return;
176
177 Shape.emitDealloc(Builder, FramePtr, CG);
178 }
179
180 /// Replace an llvm.coro.end.async.
181 /// Will inline the must tail call function call if there is one.
182 /// \returns true if cleanup of the coro.end block is needed, false otherwise.
replaceCoroEndAsync(AnyCoroEndInst * End)183 static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
184 IRBuilder<> Builder(End);
185
186 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End);
187 if (!EndAsync) {
188 Builder.CreateRetVoid();
189 return true /*needs cleanup of coro.end block*/;
190 }
191
192 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
193 if (!MustTailCallFunc) {
194 Builder.CreateRetVoid();
195 return true /*needs cleanup of coro.end block*/;
196 }
197
198 // Move the must tail call from the predecessor block into the end block.
199 auto *CoroEndBlock = End->getParent();
200 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
201 assert(MustTailCallFuncBlock && "Must have a single predecessor block");
202 auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
203 auto *MustTailCall = cast<CallInst>(&*std::prev(It));
204 CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock,
205 MustTailCall->getIterator());
206
207 // Insert the return instruction.
208 Builder.SetInsertPoint(End);
209 Builder.CreateRetVoid();
210 InlineFunctionInfo FnInfo;
211
212 // Remove the rest of the block, by splitting it into an unreachable block.
213 auto *BB = End->getParent();
214 BB->splitBasicBlock(End);
215 BB->getTerminator()->eraseFromParent();
216
217 auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
218 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
219 (void)InlineRes;
220
221 // We have cleaned up the coro.end block above.
222 return false;
223 }
224
225 /// Replace a non-unwind call to llvm.coro.end.
replaceFallthroughCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)226 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
227 const coro::Shape &Shape, Value *FramePtr,
228 bool InResume, CallGraph *CG) {
229 // Start inserting right before the coro.end.
230 IRBuilder<> Builder(End);
231
232 // Create the return instruction.
233 switch (Shape.ABI) {
234 // The cloned functions in switch-lowering always return void.
235 case coro::ABI::Switch:
236 // coro.end doesn't immediately end the coroutine in the main function
237 // in this lowering, because we need to deallocate the coroutine.
238 if (!InResume)
239 return;
240 Builder.CreateRetVoid();
241 break;
242
243 // In async lowering this returns.
244 case coro::ABI::Async: {
245 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
246 if (!CoroEndBlockNeedsCleanup)
247 return;
248 break;
249 }
250
251 // In unique continuation lowering, the continuations always return void.
252 // But we may have implicitly allocated storage.
253 case coro::ABI::RetconOnce:
254 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
255 Builder.CreateRetVoid();
256 break;
257
258 // In non-unique continuation lowering, we signal completion by returning
259 // a null continuation.
260 case coro::ABI::Retcon: {
261 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
262 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
263 auto RetStructTy = dyn_cast<StructType>(RetTy);
264 PointerType *ContinuationTy =
265 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
266
267 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
268 if (RetStructTy) {
269 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
270 ReturnValue, 0);
271 }
272 Builder.CreateRet(ReturnValue);
273 break;
274 }
275 }
276
277 // Remove the rest of the block, by splitting it into an unreachable block.
278 auto *BB = End->getParent();
279 BB->splitBasicBlock(End);
280 BB->getTerminator()->eraseFromParent();
281 }
282
283 // Mark a coroutine as done, which implies that the coroutine is finished and
284 // never get resumed.
285 //
286 // In resume-switched ABI, the done state is represented by storing zero in
287 // ResumeFnAddr.
288 //
289 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
290 // pointer to the frame in splitted function is not stored in `Shape`.
markCoroutineAsDone(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr)291 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
292 Value *FramePtr) {
293 assert(
294 Shape.ABI == coro::ABI::Switch &&
295 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
296 auto *GepIndex = Builder.CreateStructGEP(
297 Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume,
298 "ResumeFn.addr");
299 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
300 Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
301 Builder.CreateStore(NullPtr, GepIndex);
302 }
303
304 /// Replace an unwind call to llvm.coro.end.
replaceUnwindCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)305 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
306 Value *FramePtr, bool InResume,
307 CallGraph *CG) {
308 IRBuilder<> Builder(End);
309
310 switch (Shape.ABI) {
311 // In switch-lowering, this does nothing in the main function.
312 case coro::ABI::Switch: {
313 // In C++'s specification, the coroutine should be marked as done
314 // if promise.unhandled_exception() throws. The frontend will
315 // call coro.end(true) along this path.
316 //
317 // FIXME: We should refactor this once there is other language
318 // which uses Switch-Resumed style other than C++.
319 markCoroutineAsDone(Builder, Shape, FramePtr);
320 if (!InResume)
321 return;
322 break;
323 }
324 // In async lowering this does nothing.
325 case coro::ABI::Async:
326 break;
327 // In continuation-lowering, this frees the continuation storage.
328 case coro::ABI::Retcon:
329 case coro::ABI::RetconOnce:
330 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
331 break;
332 }
333
334 // If coro.end has an associated bundle, add cleanupret instruction.
335 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
336 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
337 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
338 End->getParent()->splitBasicBlock(End);
339 CleanupRet->getParent()->getTerminator()->eraseFromParent();
340 }
341 }
342
replaceCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)343 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
344 Value *FramePtr, bool InResume, CallGraph *CG) {
345 if (End->isUnwind())
346 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
347 else
348 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
349
350 auto &Context = End->getContext();
351 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
352 : ConstantInt::getFalse(Context));
353 End->eraseFromParent();
354 }
355
356 // Create an entry block for a resume function with a switch that will jump to
357 // suspend points.
createResumeEntryBlock(Function & F,coro::Shape & Shape)358 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
359 assert(Shape.ABI == coro::ABI::Switch);
360 LLVMContext &C = F.getContext();
361
362 // resume.entry:
363 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
364 // i32 2
365 // % index = load i32, i32* %index.addr
366 // switch i32 %index, label %unreachable [
367 // i32 0, label %resume.0
368 // i32 1, label %resume.1
369 // ...
370 // ]
371
372 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
373 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
374
375 IRBuilder<> Builder(NewEntry);
376 auto *FramePtr = Shape.FramePtr;
377 auto *FrameTy = Shape.FrameTy;
378 auto *GepIndex = Builder.CreateStructGEP(
379 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
380 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
381 auto *Switch =
382 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
383 Shape.SwitchLowering.ResumeSwitch = Switch;
384
385 size_t SuspendIndex = 0;
386 for (auto *AnyS : Shape.CoroSuspends) {
387 auto *S = cast<CoroSuspendInst>(AnyS);
388 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
389
390 // Replace CoroSave with a store to Index:
391 // %index.addr = getelementptr %f.frame... (index field number)
392 // store i32 %IndexVal, i32* %index.addr1
393 auto *Save = S->getCoroSave();
394 Builder.SetInsertPoint(Save);
395 if (S->isFinal()) {
396 // The coroutine should be marked done if it reaches the final suspend
397 // point.
398 markCoroutineAsDone(Builder, Shape, FramePtr);
399 }
400
401 // If the coroutine don't have unwind coro end, we could omit the store to
402 // the final suspend point since we could infer the coroutine is suspended
403 // at the final suspend point by the nullness of ResumeFnAddr.
404 // However, we can't skip it if the coroutine have unwind coro end. Since
405 // the coroutine reaches unwind coro end is considered suspended at the
406 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
407 // didn't complete yet. We need the IndexVal for the final suspend point
408 // to make the states clear.
409 if (!S->isFinal() || Shape.SwitchLowering.HasUnwindCoroEnd) {
410 auto *GepIndex = Builder.CreateStructGEP(
411 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
412 Builder.CreateStore(IndexVal, GepIndex);
413 }
414
415 Save->replaceAllUsesWith(ConstantTokenNone::get(C));
416 Save->eraseFromParent();
417
418 // Split block before and after coro.suspend and add a jump from an entry
419 // switch:
420 //
421 // whateverBB:
422 // whatever
423 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
424 // switch i8 %0, label %suspend[i8 0, label %resume
425 // i8 1, label %cleanup]
426 // becomes:
427 //
428 // whateverBB:
429 // whatever
430 // br label %resume.0.landing
431 //
432 // resume.0: ; <--- jump from the switch in the resume.entry
433 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
434 // br label %resume.0.landing
435 //
436 // resume.0.landing:
437 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
438 // switch i8 % 1, label %suspend [i8 0, label %resume
439 // i8 1, label %cleanup]
440
441 auto *SuspendBB = S->getParent();
442 auto *ResumeBB =
443 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
444 auto *LandingBB = ResumeBB->splitBasicBlock(
445 S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
446 Switch->addCase(IndexVal, ResumeBB);
447
448 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
449 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
450 S->replaceAllUsesWith(PN);
451 PN->addIncoming(Builder.getInt8(-1), SuspendBB);
452 PN->addIncoming(S, ResumeBB);
453
454 ++SuspendIndex;
455 }
456
457 Builder.SetInsertPoint(UnreachBB);
458 Builder.CreateUnreachable();
459
460 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
461 }
462
463 // In the resume function, we remove the last case (when coro::Shape is built,
464 // the final suspend point (if present) is always the last element of
465 // CoroSuspends array) since it is an undefined behavior to resume a coroutine
466 // suspended at the final suspend point.
467 // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL
468 // and the coroutine doesn't suspend at the final suspend point actually (this
469 // is possible since the coroutine is considered suspended at the final suspend
470 // point if promise.unhandled_exception() exits via an exception), we can
471 // remove the last case.
handleFinalSuspend()472 void CoroCloner::handleFinalSuspend() {
473 assert(Shape.ABI == coro::ABI::Switch &&
474 Shape.SwitchLowering.HasFinalSuspend);
475
476 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd)
477 return;
478
479 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
480 auto FinalCaseIt = std::prev(Switch->case_end());
481 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
482 Switch->removeCase(FinalCaseIt);
483 if (isSwitchDestroyFunction()) {
484 BasicBlock *OldSwitchBB = Switch->getParent();
485 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
486 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
487 auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
488 coro::Shape::SwitchFieldIndex::Resume,
489 "ResumeFn.addr");
490 auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
491 GepIndex);
492 auto *Cond = Builder.CreateIsNull(Load);
493 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
494 OldSwitchBB->getTerminator()->eraseFromParent();
495 }
496 }
497
498 static FunctionType *
getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst * Suspend)499 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
500 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend);
501 auto *StructTy = cast<StructType>(AsyncSuspend->getType());
502 auto &Context = Suspend->getParent()->getParent()->getContext();
503 auto *VoidTy = Type::getVoidTy(Context);
504 return FunctionType::get(VoidTy, StructTy->elements(), false);
505 }
506
createCloneDeclaration(Function & OrigF,coro::Shape & Shape,const Twine & Suffix,Module::iterator InsertBefore,AnyCoroSuspendInst * ActiveSuspend)507 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
508 const Twine &Suffix,
509 Module::iterator InsertBefore,
510 AnyCoroSuspendInst *ActiveSuspend) {
511 Module *M = OrigF.getParent();
512 auto *FnTy = (Shape.ABI != coro::ABI::Async)
513 ? Shape.getResumeFunctionType()
514 : getFunctionTypeFromAsyncSuspend(ActiveSuspend);
515
516 Function *NewF =
517 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
518 OrigF.getName() + Suffix);
519
520 M->getFunctionList().insert(InsertBefore, NewF);
521
522 return NewF;
523 }
524
525 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the
526 /// arguments to the continuation function.
527 ///
528 /// This assumes that the builder has a meaningful insertion point.
replaceRetconOrAsyncSuspendUses()529 void CoroCloner::replaceRetconOrAsyncSuspendUses() {
530 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
531 Shape.ABI == coro::ABI::Async);
532
533 auto NewS = VMap[ActiveSuspend];
534 if (NewS->use_empty()) return;
535
536 // Copy out all the continuation arguments after the buffer pointer into
537 // an easily-indexed data structure for convenience.
538 SmallVector<Value*, 8> Args;
539 // The async ABI includes all arguments -- including the first argument.
540 bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
541 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()),
542 E = NewF->arg_end();
543 I != E; ++I)
544 Args.push_back(&*I);
545
546 // If the suspend returns a single scalar value, we can just do a simple
547 // replacement.
548 if (!isa<StructType>(NewS->getType())) {
549 assert(Args.size() == 1);
550 NewS->replaceAllUsesWith(Args.front());
551 return;
552 }
553
554 // Try to peephole extracts of an aggregate return.
555 for (Use &U : llvm::make_early_inc_range(NewS->uses())) {
556 auto *EVI = dyn_cast<ExtractValueInst>(U.getUser());
557 if (!EVI || EVI->getNumIndices() != 1)
558 continue;
559
560 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
561 EVI->eraseFromParent();
562 }
563
564 // If we have no remaining uses, we're done.
565 if (NewS->use_empty()) return;
566
567 // Otherwise, we need to create an aggregate.
568 Value *Agg = UndefValue::get(NewS->getType());
569 for (size_t I = 0, E = Args.size(); I != E; ++I)
570 Agg = Builder.CreateInsertValue(Agg, Args[I], I);
571
572 NewS->replaceAllUsesWith(Agg);
573 }
574
replaceCoroSuspends()575 void CoroCloner::replaceCoroSuspends() {
576 Value *SuspendResult;
577
578 switch (Shape.ABI) {
579 // In switch lowering, replace coro.suspend with the appropriate value
580 // for the type of function we're extracting.
581 // Replacing coro.suspend with (0) will result in control flow proceeding to
582 // a resume label associated with a suspend point, replacing it with (1) will
583 // result in control flow proceeding to a cleanup label associated with this
584 // suspend point.
585 case coro::ABI::Switch:
586 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
587 break;
588
589 // In async lowering there are no uses of the result.
590 case coro::ABI::Async:
591 return;
592
593 // In returned-continuation lowering, the arguments from earlier
594 // continuations are theoretically arbitrary, and they should have been
595 // spilled.
596 case coro::ABI::RetconOnce:
597 case coro::ABI::Retcon:
598 return;
599 }
600
601 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
602 // The active suspend was handled earlier.
603 if (CS == ActiveSuspend) continue;
604
605 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
606 MappedCS->replaceAllUsesWith(SuspendResult);
607 MappedCS->eraseFromParent();
608 }
609 }
610
replaceCoroEnds()611 void CoroCloner::replaceCoroEnds() {
612 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
613 // We use a null call graph because there's no call graph node for
614 // the cloned function yet. We'll just be rebuilding that later.
615 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]);
616 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
617 }
618 }
619
replaceSwiftErrorOps(Function & F,coro::Shape & Shape,ValueToValueMapTy * VMap)620 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
621 ValueToValueMapTy *VMap) {
622 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
623 return;
624 Value *CachedSlot = nullptr;
625 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
626 if (CachedSlot) {
627 assert(cast<PointerType>(CachedSlot->getType())
628 ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
629 "multiple swifterror slots in function with different types");
630 return CachedSlot;
631 }
632
633 // Check if the function has a swifterror argument.
634 for (auto &Arg : F.args()) {
635 if (Arg.isSwiftError()) {
636 CachedSlot = &Arg;
637 assert(cast<PointerType>(Arg.getType())
638 ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
639 "swifterror argument does not have expected type");
640 return &Arg;
641 }
642 }
643
644 // Create a swifterror alloca.
645 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
646 auto Alloca = Builder.CreateAlloca(ValueTy);
647 Alloca->setSwiftError(true);
648
649 CachedSlot = Alloca;
650 return Alloca;
651 };
652
653 for (CallInst *Op : Shape.SwiftErrorOps) {
654 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
655 IRBuilder<> Builder(MappedOp);
656
657 // If there are no arguments, this is a 'get' operation.
658 Value *MappedResult;
659 if (Op->arg_empty()) {
660 auto ValueTy = Op->getType();
661 auto Slot = getSwiftErrorSlot(ValueTy);
662 MappedResult = Builder.CreateLoad(ValueTy, Slot);
663 } else {
664 assert(Op->arg_size() == 1);
665 auto Value = MappedOp->getArgOperand(0);
666 auto ValueTy = Value->getType();
667 auto Slot = getSwiftErrorSlot(ValueTy);
668 Builder.CreateStore(Value, Slot);
669 MappedResult = Slot;
670 }
671
672 MappedOp->replaceAllUsesWith(MappedResult);
673 MappedOp->eraseFromParent();
674 }
675
676 // If we're updating the original function, we've invalidated SwiftErrorOps.
677 if (VMap == nullptr) {
678 Shape.SwiftErrorOps.clear();
679 }
680 }
681
replaceSwiftErrorOps()682 void CoroCloner::replaceSwiftErrorOps() {
683 ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
684 }
685
salvageDebugInfo()686 void CoroCloner::salvageDebugInfo() {
687 SmallVector<DbgVariableIntrinsic *, 8> Worklist;
688 SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
689 for (auto &BB : *NewF)
690 for (auto &I : BB)
691 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
692 Worklist.push_back(DVI);
693 for (DbgVariableIntrinsic *DVI : Worklist)
694 coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame);
695
696 // Remove all salvaged dbg.declare intrinsics that became
697 // either unreachable or stale due to the CoroSplit transformation.
698 DominatorTree DomTree(*NewF);
699 auto IsUnreachableBlock = [&](BasicBlock *BB) {
700 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
701 &DomTree);
702 };
703 for (DbgVariableIntrinsic *DVI : Worklist) {
704 if (IsUnreachableBlock(DVI->getParent()))
705 DVI->eraseFromParent();
706 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
707 // Count all non-debuginfo uses in reachable blocks.
708 unsigned Uses = 0;
709 for (auto *User : DVI->getVariableLocationOp(0)->users())
710 if (auto *I = dyn_cast<Instruction>(User))
711 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
712 ++Uses;
713 if (!Uses)
714 DVI->eraseFromParent();
715 }
716 }
717 }
718
replaceEntryBlock()719 void CoroCloner::replaceEntryBlock() {
720 // In the original function, the AllocaSpillBlock is a block immediately
721 // following the allocation of the frame object which defines GEPs for
722 // all the allocas that have been moved into the frame, and it ends by
723 // branching to the original beginning of the coroutine. Make this
724 // the entry block of the cloned function.
725 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
726 auto *OldEntry = &NewF->getEntryBlock();
727 Entry->setName("entry" + Suffix);
728 Entry->moveBefore(OldEntry);
729 Entry->getTerminator()->eraseFromParent();
730
731 // Clear all predecessors of the new entry block. There should be
732 // exactly one predecessor, which we created when splitting out
733 // AllocaSpillBlock to begin with.
734 assert(Entry->hasOneUse());
735 auto BranchToEntry = cast<BranchInst>(Entry->user_back());
736 assert(BranchToEntry->isUnconditional());
737 Builder.SetInsertPoint(BranchToEntry);
738 Builder.CreateUnreachable();
739 BranchToEntry->eraseFromParent();
740
741 // Branch from the entry to the appropriate place.
742 Builder.SetInsertPoint(Entry);
743 switch (Shape.ABI) {
744 case coro::ABI::Switch: {
745 // In switch-lowering, we built a resume-entry block in the original
746 // function. Make the entry block branch to this.
747 auto *SwitchBB =
748 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
749 Builder.CreateBr(SwitchBB);
750 break;
751 }
752 case coro::ABI::Async:
753 case coro::ABI::Retcon:
754 case coro::ABI::RetconOnce: {
755 // In continuation ABIs, we want to branch to immediately after the
756 // active suspend point. Earlier phases will have put the suspend in its
757 // own basic block, so just thread our jump directly to its successor.
758 assert((Shape.ABI == coro::ABI::Async &&
759 isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
760 ((Shape.ABI == coro::ABI::Retcon ||
761 Shape.ABI == coro::ABI::RetconOnce) &&
762 isa<CoroSuspendRetconInst>(ActiveSuspend)));
763 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]);
764 auto Branch = cast<BranchInst>(MappedCS->getNextNode());
765 assert(Branch->isUnconditional());
766 Builder.CreateBr(Branch->getSuccessor(0));
767 break;
768 }
769 }
770
771 // Any static alloca that's still being used but not reachable from the new
772 // entry needs to be moved to the new entry.
773 Function *F = OldEntry->getParent();
774 DominatorTree DT{*F};
775 for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
776 auto *Alloca = dyn_cast<AllocaInst>(&I);
777 if (!Alloca || I.use_empty())
778 continue;
779 if (DT.isReachableFromEntry(I.getParent()) ||
780 !isa<ConstantInt>(Alloca->getArraySize()))
781 continue;
782 I.moveBefore(*Entry, Entry->getFirstInsertionPt());
783 }
784 }
785
786 /// Derive the value of the new frame pointer.
deriveNewFramePointer()787 Value *CoroCloner::deriveNewFramePointer() {
788 // Builder should be inserting to the front of the new entry block.
789
790 switch (Shape.ABI) {
791 // In switch-lowering, the argument is the frame pointer.
792 case coro::ABI::Switch:
793 return &*NewF->arg_begin();
794 // In async-lowering, one of the arguments is an async context as determined
795 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
796 // the resume function from the async context projection function associated
797 // with the active suspend. The frame is located as a tail to the async
798 // context header.
799 case coro::ABI::Async: {
800 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
801 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
802 auto *CalleeContext = NewF->getArg(ContextIdx);
803 auto *FramePtrTy = Shape.FrameTy->getPointerTo();
804 auto *ProjectionFunc =
805 ActiveAsyncSuspend->getAsyncContextProjectionFunction();
806 auto DbgLoc =
807 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
808 // Calling i8* (i8*)
809 auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(),
810 ProjectionFunc, CalleeContext);
811 CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
812 CallerContext->setDebugLoc(DbgLoc);
813 // The frame is located after the async_context header.
814 auto &Context = Builder.getContext();
815 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
816 Type::getInt8Ty(Context), CallerContext,
817 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr");
818 // Inline the projection function.
819 InlineFunctionInfo InlineInfo;
820 auto InlineRes = InlineFunction(*CallerContext, InlineInfo);
821 assert(InlineRes.isSuccess());
822 (void)InlineRes;
823 return Builder.CreateBitCast(FramePtrAddr, FramePtrTy);
824 }
825 // In continuation-lowering, the argument is the opaque storage.
826 case coro::ABI::Retcon:
827 case coro::ABI::RetconOnce: {
828 Argument *NewStorage = &*NewF->arg_begin();
829 auto FramePtrTy = Shape.FrameTy->getPointerTo();
830
831 // If the storage is inline, just bitcast to the storage to the frame type.
832 if (Shape.RetconLowering.IsFrameInlineInStorage)
833 return Builder.CreateBitCast(NewStorage, FramePtrTy);
834
835 // Otherwise, load the real frame from the opaque storage.
836 auto FramePtrPtr =
837 Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
838 return Builder.CreateLoad(FramePtrTy, FramePtrPtr);
839 }
840 }
841 llvm_unreachable("bad ABI");
842 }
843
addFramePointerAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex,uint64_t Size,Align Alignment,bool NoAlias)844 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
845 unsigned ParamIndex, uint64_t Size,
846 Align Alignment, bool NoAlias) {
847 AttrBuilder ParamAttrs(Context);
848 ParamAttrs.addAttribute(Attribute::NonNull);
849 ParamAttrs.addAttribute(Attribute::NoUndef);
850
851 if (NoAlias)
852 ParamAttrs.addAttribute(Attribute::NoAlias);
853
854 ParamAttrs.addAlignmentAttr(Alignment);
855 ParamAttrs.addDereferenceableAttr(Size);
856 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
857 }
858
addAsyncContextAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex)859 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
860 unsigned ParamIndex) {
861 AttrBuilder ParamAttrs(Context);
862 ParamAttrs.addAttribute(Attribute::SwiftAsync);
863 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
864 }
865
addSwiftSelfAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex)866 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
867 unsigned ParamIndex) {
868 AttrBuilder ParamAttrs(Context);
869 ParamAttrs.addAttribute(Attribute::SwiftSelf);
870 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
871 }
872
873 /// Clone the body of the original function into a resume function of
874 /// some sort.
create()875 void CoroCloner::create() {
876 // Create the new function if we don't already have one.
877 if (!NewF) {
878 NewF = createCloneDeclaration(OrigF, Shape, Suffix,
879 OrigF.getParent()->end(), ActiveSuspend);
880 }
881
882 // Replace all args with dummy instructions. If an argument is the old frame
883 // pointer, the dummy will be replaced by the new frame pointer once it is
884 // computed below. Uses of all other arguments should have already been
885 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
886 // frame.
887 SmallVector<Instruction *> DummyArgs;
888 for (Argument &A : OrigF.args()) {
889 DummyArgs.push_back(new FreezeInst(UndefValue::get(A.getType())));
890 VMap[&A] = DummyArgs.back();
891 }
892
893 SmallVector<ReturnInst *, 4> Returns;
894
895 // Ignore attempts to change certain attributes of the function.
896 // TODO: maybe there should be a way to suppress this during cloning?
897 auto savedVisibility = NewF->getVisibility();
898 auto savedUnnamedAddr = NewF->getUnnamedAddr();
899 auto savedDLLStorageClass = NewF->getDLLStorageClass();
900
901 // NewF's linkage (which CloneFunctionInto does *not* change) might not
902 // be compatible with the visibility of OrigF (which it *does* change),
903 // so protect against that.
904 auto savedLinkage = NewF->getLinkage();
905 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
906
907 CloneFunctionInto(NewF, &OrigF, VMap,
908 CloneFunctionChangeType::LocalChangesOnly, Returns);
909
910 auto &Context = NewF->getContext();
911
912 // For async functions / continuations, adjust the scope line of the
913 // clone to the line number of the suspend point. However, only
914 // adjust the scope line when the files are the same. This ensures
915 // line number and file name belong together. The scope line is
916 // associated with all pre-prologue instructions. This avoids a jump
917 // in the linetable from the function declaration to the suspend point.
918 if (DISubprogram *SP = NewF->getSubprogram()) {
919 assert(SP != OrigF.getSubprogram() && SP->isDistinct());
920 if (ActiveSuspend)
921 if (auto DL = ActiveSuspend->getDebugLoc())
922 if (SP->getFile() == DL->getFile())
923 SP->setScopeLine(DL->getLine());
924 // Update the linkage name to reflect the modified symbol name. It
925 // is necessary to update the linkage name in Swift, since the
926 // mangling changes for resume functions. It might also be the
927 // right thing to do in C++, but due to a limitation in LLVM's
928 // AsmPrinter we can only do this if the function doesn't have an
929 // abstract specification, since the DWARF backend expects the
930 // abstract specification to contain the linkage name and asserts
931 // that they are identical.
932 if (!SP->getDeclaration() && SP->getUnit() &&
933 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift)
934 SP->replaceLinkageName(MDString::get(Context, NewF->getName()));
935 }
936
937 NewF->setLinkage(savedLinkage);
938 NewF->setVisibility(savedVisibility);
939 NewF->setUnnamedAddr(savedUnnamedAddr);
940 NewF->setDLLStorageClass(savedDLLStorageClass);
941 // The function sanitizer metadata needs to match the signature of the
942 // function it is being attached to. However this does not hold for split
943 // functions here. Thus remove the metadata for split functions.
944 if (Shape.ABI == coro::ABI::Switch &&
945 NewF->hasMetadata(LLVMContext::MD_func_sanitize))
946 NewF->eraseMetadata(LLVMContext::MD_func_sanitize);
947
948 // Replace the attributes of the new function:
949 auto OrigAttrs = NewF->getAttributes();
950 auto NewAttrs = AttributeList();
951
952 switch (Shape.ABI) {
953 case coro::ABI::Switch:
954 // Bootstrap attributes by copying function attributes from the
955 // original function. This should include optimization settings and so on.
956 NewAttrs = NewAttrs.addFnAttributes(
957 Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
958
959 addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize,
960 Shape.FrameAlign, /*NoAlias=*/false);
961 break;
962 case coro::ABI::Async: {
963 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
964 if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo,
965 Attribute::SwiftAsync)) {
966 uint32_t ArgAttributeIndices =
967 ActiveAsyncSuspend->getStorageArgumentIndex();
968 auto ContextArgIndex = ArgAttributeIndices & 0xff;
969 addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex);
970
971 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
972 // `swiftself`.
973 auto SwiftSelfIndex = ArgAttributeIndices >> 8;
974 if (SwiftSelfIndex)
975 addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex);
976 }
977
978 // Transfer the original function's attributes.
979 auto FnAttrs = OrigF.getAttributes().getFnAttrs();
980 NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs));
981 break;
982 }
983 case coro::ABI::Retcon:
984 case coro::ABI::RetconOnce:
985 // If we have a continuation prototype, just use its attributes,
986 // full-stop.
987 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
988
989 /// FIXME: Is it really good to add the NoAlias attribute?
990 addFramePointerAttrs(NewAttrs, Context, 0,
991 Shape.getRetconCoroId()->getStorageSize(),
992 Shape.getRetconCoroId()->getStorageAlignment(),
993 /*NoAlias=*/true);
994
995 break;
996 }
997
998 switch (Shape.ABI) {
999 // In these ABIs, the cloned functions always return 'void', and the
1000 // existing return sites are meaningless. Note that for unique
1001 // continuations, this includes the returns associated with suspends;
1002 // this is fine because we can't suspend twice.
1003 case coro::ABI::Switch:
1004 case coro::ABI::RetconOnce:
1005 // Remove old returns.
1006 for (ReturnInst *Return : Returns)
1007 changeToUnreachable(Return);
1008 break;
1009
1010 // With multi-suspend continuations, we'll already have eliminated the
1011 // original returns and inserted returns before all the suspend points,
1012 // so we want to leave any returns in place.
1013 case coro::ABI::Retcon:
1014 break;
1015 // Async lowering will insert musttail call functions at all suspend points
1016 // followed by a return.
1017 // Don't change returns to unreachable because that will trip up the verifier.
1018 // These returns should be unreachable from the clone.
1019 case coro::ABI::Async:
1020 break;
1021 }
1022
1023 NewF->setAttributes(NewAttrs);
1024 NewF->setCallingConv(Shape.getResumeFunctionCC());
1025
1026 // Set up the new entry block.
1027 replaceEntryBlock();
1028
1029 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1030 NewFramePtr = deriveNewFramePointer();
1031
1032 // Remap frame pointer.
1033 Value *OldFramePtr = VMap[Shape.FramePtr];
1034 NewFramePtr->takeName(OldFramePtr);
1035 OldFramePtr->replaceAllUsesWith(NewFramePtr);
1036
1037 // Remap vFrame pointer.
1038 auto *NewVFrame = Builder.CreateBitCast(
1039 NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
1040 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
1041 if (OldVFrame != NewVFrame)
1042 OldVFrame->replaceAllUsesWith(NewVFrame);
1043
1044 // All uses of the arguments should have been resolved by this point,
1045 // so we can safely remove the dummy values.
1046 for (Instruction *DummyArg : DummyArgs) {
1047 DummyArg->replaceAllUsesWith(UndefValue::get(DummyArg->getType()));
1048 DummyArg->deleteValue();
1049 }
1050
1051 switch (Shape.ABI) {
1052 case coro::ABI::Switch:
1053 // Rewrite final suspend handling as it is not done via switch (allows to
1054 // remove final case from the switch, since it is undefined behavior to
1055 // resume the coroutine suspended at the final suspend point.
1056 if (Shape.SwitchLowering.HasFinalSuspend)
1057 handleFinalSuspend();
1058 break;
1059 case coro::ABI::Async:
1060 case coro::ABI::Retcon:
1061 case coro::ABI::RetconOnce:
1062 // Replace uses of the active suspend with the corresponding
1063 // continuation-function arguments.
1064 assert(ActiveSuspend != nullptr &&
1065 "no active suspend when lowering a continuation-style coroutine");
1066 replaceRetconOrAsyncSuspendUses();
1067 break;
1068 }
1069
1070 // Handle suspends.
1071 replaceCoroSuspends();
1072
1073 // Handle swifterror.
1074 replaceSwiftErrorOps();
1075
1076 // Remove coro.end intrinsics.
1077 replaceCoroEnds();
1078
1079 // Salvage debug info that points into the coroutine frame.
1080 salvageDebugInfo();
1081
1082 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
1083 // to suppress deallocation code.
1084 if (Shape.ABI == coro::ABI::Switch)
1085 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
1086 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
1087 }
1088
1089 // Create a resume clone by cloning the body of the original function, setting
1090 // new entry block and replacing coro.suspend an appropriate value to force
1091 // resume or cleanup pass for every suspend point.
createClone(Function & F,const Twine & Suffix,coro::Shape & Shape,CoroCloner::Kind FKind)1092 static Function *createClone(Function &F, const Twine &Suffix,
1093 coro::Shape &Shape, CoroCloner::Kind FKind) {
1094 CoroCloner Cloner(F, Suffix, Shape, FKind);
1095 Cloner.create();
1096 return Cloner.getFunction();
1097 }
1098
updateAsyncFuncPointerContextSize(coro::Shape & Shape)1099 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1100 assert(Shape.ABI == coro::ABI::Async);
1101
1102 auto *FuncPtrStruct = cast<ConstantStruct>(
1103 Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1104 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0);
1105 auto *OrigContextSize = FuncPtrStruct->getOperand(1);
1106 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(),
1107 Shape.AsyncLowering.ContextSize);
1108 auto *NewFuncPtrStruct = ConstantStruct::get(
1109 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize);
1110
1111 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1112 }
1113
replaceFrameSizeAndAlignment(coro::Shape & Shape)1114 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1115 if (Shape.ABI == coro::ABI::Async)
1116 updateAsyncFuncPointerContextSize(Shape);
1117
1118 for (CoroAlignInst *CA : Shape.CoroAligns) {
1119 CA->replaceAllUsesWith(
1120 ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
1121 CA->eraseFromParent();
1122 }
1123
1124 if (Shape.CoroSizes.empty())
1125 return;
1126
1127 // In the same function all coro.sizes should have the same result type.
1128 auto *SizeIntrin = Shape.CoroSizes.back();
1129 Module *M = SizeIntrin->getModule();
1130 const DataLayout &DL = M->getDataLayout();
1131 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1132 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
1133
1134 for (CoroSizeInst *CS : Shape.CoroSizes) {
1135 CS->replaceAllUsesWith(SizeConstant);
1136 CS->eraseFromParent();
1137 }
1138 }
1139
1140 // Create a global constant array containing pointers to functions provided and
1141 // set Info parameter of CoroBegin to point at this constant. Example:
1142 //
1143 // @f.resumers = internal constant [2 x void(%f.frame*)*]
1144 // [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
1145 // define void @f() {
1146 // ...
1147 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
1148 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
1149 //
1150 // Assumes that all the functions have the same signature.
setCoroInfo(Function & F,coro::Shape & Shape,ArrayRef<Function * > Fns)1151 static void setCoroInfo(Function &F, coro::Shape &Shape,
1152 ArrayRef<Function *> Fns) {
1153 // This only works under the switch-lowering ABI because coro elision
1154 // only works on the switch-lowering ABI.
1155 assert(Shape.ABI == coro::ABI::Switch);
1156
1157 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
1158 assert(!Args.empty());
1159 Function *Part = *Fns.begin();
1160 Module *M = Part->getParent();
1161 auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
1162
1163 auto *ConstVal = ConstantArray::get(ArrTy, Args);
1164 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
1165 GlobalVariable::PrivateLinkage, ConstVal,
1166 F.getName() + Twine(".resumers"));
1167
1168 // Update coro.begin instruction to refer to this constant.
1169 LLVMContext &C = F.getContext();
1170 auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
1171 Shape.getSwitchCoroId()->setInfo(BC);
1172 }
1173
1174 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
updateCoroFrame(coro::Shape & Shape,Function * ResumeFn,Function * DestroyFn,Function * CleanupFn)1175 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1176 Function *DestroyFn, Function *CleanupFn) {
1177 assert(Shape.ABI == coro::ABI::Switch);
1178
1179 IRBuilder<> Builder(Shape.getInsertPtAfterFramePtr());
1180
1181 auto *ResumeAddr = Builder.CreateStructGEP(
1182 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
1183 "resume.addr");
1184 Builder.CreateStore(ResumeFn, ResumeAddr);
1185
1186 Value *DestroyOrCleanupFn = DestroyFn;
1187
1188 CoroIdInst *CoroId = Shape.getSwitchCoroId();
1189 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1190 // If there is a CoroAlloc and it returns false (meaning we elide the
1191 // allocation, use CleanupFn instead of DestroyFn).
1192 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
1193 }
1194
1195 auto *DestroyAddr = Builder.CreateStructGEP(
1196 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
1197 "destroy.addr");
1198 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
1199 }
1200
postSplitCleanup(Function & F)1201 static void postSplitCleanup(Function &F) {
1202 removeUnreachableBlocks(F);
1203
1204 #ifndef NDEBUG
1205 // For now, we do a mandatory verification step because we don't
1206 // entirely trust this pass. Note that we don't want to add a verifier
1207 // pass to FPM below because it will also verify all the global data.
1208 if (verifyFunction(F, &errs()))
1209 report_fatal_error("Broken function");
1210 #endif
1211 }
1212
1213 // Assuming we arrived at the block NewBlock from Prev instruction, store
1214 // PHI's incoming values in the ResolvedValues map.
1215 static void
scanPHIsAndUpdateValueMap(Instruction * Prev,BasicBlock * NewBlock,DenseMap<Value *,Value * > & ResolvedValues)1216 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
1217 DenseMap<Value *, Value *> &ResolvedValues) {
1218 auto *PrevBB = Prev->getParent();
1219 for (PHINode &PN : NewBlock->phis()) {
1220 auto V = PN.getIncomingValueForBlock(PrevBB);
1221 // See if we already resolved it.
1222 auto VI = ResolvedValues.find(V);
1223 if (VI != ResolvedValues.end())
1224 V = VI->second;
1225 // Remember the value.
1226 ResolvedValues[&PN] = V;
1227 }
1228 }
1229
1230 // Replace a sequence of branches leading to a ret, with a clone of a ret
1231 // instruction. Suspend instruction represented by a switch, track the PHI
1232 // values and select the correct case successor when possible.
simplifyTerminatorLeadingToRet(Instruction * InitialInst)1233 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
1234 DenseMap<Value *, Value *> ResolvedValues;
1235 BasicBlock *UnconditionalSucc = nullptr;
1236 assert(InitialInst->getModule());
1237 const DataLayout &DL = InitialInst->getModule()->getDataLayout();
1238
1239 auto GetFirstValidInstruction = [](Instruction *I) {
1240 while (I) {
1241 // BitCastInst wouldn't generate actual code so that we could skip it.
1242 if (isa<BitCastInst>(I) || I->isDebugOrPseudoInst() ||
1243 I->isLifetimeStartOrEnd())
1244 I = I->getNextNode();
1245 else if (isInstructionTriviallyDead(I))
1246 // Duing we are in the middle of the transformation, we need to erase
1247 // the dead instruction manually.
1248 I = &*I->eraseFromParent();
1249 else
1250 break;
1251 }
1252 return I;
1253 };
1254
1255 auto TryResolveConstant = [&ResolvedValues](Value *V) {
1256 auto It = ResolvedValues.find(V);
1257 if (It != ResolvedValues.end())
1258 V = It->second;
1259 return dyn_cast<ConstantInt>(V);
1260 };
1261
1262 Instruction *I = InitialInst;
1263 while (I->isTerminator() || isa<CmpInst>(I)) {
1264 if (isa<ReturnInst>(I)) {
1265 if (I != InitialInst) {
1266 // If InitialInst is an unconditional branch,
1267 // remove PHI values that come from basic block of InitialInst
1268 if (UnconditionalSucc)
1269 UnconditionalSucc->removePredecessor(InitialInst->getParent(), true);
1270 ReplaceInstWithInst(InitialInst, I->clone());
1271 }
1272 return true;
1273 }
1274 if (auto *BR = dyn_cast<BranchInst>(I)) {
1275 if (BR->isUnconditional()) {
1276 BasicBlock *Succ = BR->getSuccessor(0);
1277 if (I == InitialInst)
1278 UnconditionalSucc = Succ;
1279 scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
1280 I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
1281 continue;
1282 }
1283
1284 BasicBlock *BB = BR->getParent();
1285 // Handle the case the condition of the conditional branch is constant.
1286 // e.g.,
1287 //
1288 // br i1 false, label %cleanup, label %CoroEnd
1289 //
1290 // It is possible during the transformation. We could continue the
1291 // simplifying in this case.
1292 if (ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true)) {
1293 // Handle this branch in next iteration.
1294 I = BB->getTerminator();
1295 continue;
1296 }
1297 } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
1298 // If the case number of suspended switch instruction is reduced to
1299 // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
1300 auto *BR = dyn_cast<BranchInst>(
1301 GetFirstValidInstruction(CondCmp->getNextNode()));
1302 if (!BR || !BR->isConditional() || CondCmp != BR->getCondition())
1303 return false;
1304
1305 // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
1306 // So we try to resolve constant for the first operand only since the
1307 // second operand should be literal constant by design.
1308 ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0));
1309 auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
1310 if (!Cond0 || !Cond1)
1311 return false;
1312
1313 // Both operands of the CmpInst are Constant. So that we could evaluate
1314 // it immediately to get the destination.
1315 auto *ConstResult =
1316 dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
1317 CondCmp->getPredicate(), Cond0, Cond1, DL));
1318 if (!ConstResult)
1319 return false;
1320
1321 CondCmp->replaceAllUsesWith(ConstResult);
1322 CondCmp->eraseFromParent();
1323
1324 // Handle this branch in next iteration.
1325 I = BR;
1326 continue;
1327 } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
1328 ConstantInt *Cond = TryResolveConstant(SI->getCondition());
1329 if (!Cond)
1330 return false;
1331
1332 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
1333 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
1334 I = GetFirstValidInstruction(BB->getFirstNonPHIOrDbgOrLifetime());
1335 continue;
1336 }
1337
1338 return false;
1339 }
1340 return false;
1341 }
1342
1343 // Check whether CI obeys the rules of musttail attribute.
shouldBeMustTail(const CallInst & CI,const Function & F)1344 static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
1345 if (CI.isInlineAsm())
1346 return false;
1347
1348 // Match prototypes and calling conventions of resume function.
1349 FunctionType *CalleeTy = CI.getFunctionType();
1350 if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
1351 return false;
1352
1353 Type *CalleeParmTy = CalleeTy->getParamType(0);
1354 if (!CalleeParmTy->isPointerTy() ||
1355 (CalleeParmTy->getPointerAddressSpace() != 0))
1356 return false;
1357
1358 if (CI.getCallingConv() != F.getCallingConv())
1359 return false;
1360
1361 // CI should not has any ABI-impacting function attributes.
1362 static const Attribute::AttrKind ABIAttrs[] = {
1363 Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
1364 Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
1365 Attribute::SwiftSelf, Attribute::SwiftError};
1366 AttributeList Attrs = CI.getAttributes();
1367 for (auto AK : ABIAttrs)
1368 if (Attrs.hasParamAttr(0, AK))
1369 return false;
1370
1371 return true;
1372 }
1373
1374 // Add musttail to any resume instructions that is immediately followed by a
1375 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
1376 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
1377 // This transformation is done only in the resume part of the coroutine that has
1378 // identical signature and calling convention as the coro.resume call.
addMustTailToCoroResumes(Function & F,TargetTransformInfo & TTI)1379 static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
1380 bool changed = false;
1381
1382 // Collect potential resume instructions.
1383 SmallVector<CallInst *, 4> Resumes;
1384 for (auto &I : instructions(F))
1385 if (auto *Call = dyn_cast<CallInst>(&I))
1386 if (shouldBeMustTail(*Call, F))
1387 Resumes.push_back(Call);
1388
1389 // Set musttail on those that are followed by a ret instruction.
1390 for (CallInst *Call : Resumes)
1391 // Skip targets which don't support tail call on the specific case.
1392 if (TTI.supportsTailCallFor(Call) &&
1393 simplifyTerminatorLeadingToRet(Call->getNextNode())) {
1394 Call->setTailCallKind(CallInst::TCK_MustTail);
1395 changed = true;
1396 }
1397
1398 if (changed)
1399 removeUnreachableBlocks(F);
1400 }
1401
1402 // Coroutine has no suspend points. Remove heap allocation for the coroutine
1403 // frame if possible.
handleNoSuspendCoroutine(coro::Shape & Shape)1404 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1405 auto *CoroBegin = Shape.CoroBegin;
1406 auto *CoroId = CoroBegin->getId();
1407 auto *AllocInst = CoroId->getCoroAlloc();
1408 switch (Shape.ABI) {
1409 case coro::ABI::Switch: {
1410 auto SwitchId = cast<CoroIdInst>(CoroId);
1411 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
1412 if (AllocInst) {
1413 IRBuilder<> Builder(AllocInst);
1414 auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
1415 Frame->setAlignment(Shape.FrameAlign);
1416 auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
1417 AllocInst->replaceAllUsesWith(Builder.getFalse());
1418 AllocInst->eraseFromParent();
1419 CoroBegin->replaceAllUsesWith(VFrame);
1420 } else {
1421 CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1422 }
1423
1424 break;
1425 }
1426 case coro::ABI::Async:
1427 case coro::ABI::Retcon:
1428 case coro::ABI::RetconOnce:
1429 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1430 break;
1431 }
1432
1433 CoroBegin->eraseFromParent();
1434 }
1435
1436 // SimplifySuspendPoint needs to check that there is no calls between
1437 // coro_save and coro_suspend, since any of the calls may potentially resume
1438 // the coroutine and if that is the case we cannot eliminate the suspend point.
hasCallsInBlockBetween(Instruction * From,Instruction * To)1439 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1440 for (Instruction *I = From; I != To; I = I->getNextNode()) {
1441 // Assume that no intrinsic can resume the coroutine.
1442 if (isa<IntrinsicInst>(I))
1443 continue;
1444
1445 if (isa<CallBase>(I))
1446 return true;
1447 }
1448 return false;
1449 }
1450
hasCallsInBlocksBetween(BasicBlock * SaveBB,BasicBlock * ResDesBB)1451 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1452 SmallPtrSet<BasicBlock *, 8> Set;
1453 SmallVector<BasicBlock *, 8> Worklist;
1454
1455 Set.insert(SaveBB);
1456 Worklist.push_back(ResDesBB);
1457
1458 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1459 // returns a token consumed by suspend instruction, all blocks in between
1460 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1461 while (!Worklist.empty()) {
1462 auto *BB = Worklist.pop_back_val();
1463 Set.insert(BB);
1464 for (auto *Pred : predecessors(BB))
1465 if (!Set.contains(Pred))
1466 Worklist.push_back(Pred);
1467 }
1468
1469 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1470 Set.erase(SaveBB);
1471 Set.erase(ResDesBB);
1472
1473 for (auto *BB : Set)
1474 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1475 return true;
1476
1477 return false;
1478 }
1479
hasCallsBetween(Instruction * Save,Instruction * ResumeOrDestroy)1480 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1481 auto *SaveBB = Save->getParent();
1482 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1483
1484 if (SaveBB == ResumeOrDestroyBB)
1485 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1486
1487 // Any calls from Save to the end of the block?
1488 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1489 return true;
1490
1491 // Any calls from begging of the block up to ResumeOrDestroy?
1492 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1493 ResumeOrDestroy))
1494 return true;
1495
1496 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1497 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1498 return true;
1499
1500 return false;
1501 }
1502
1503 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1504 // suspend point and replace it with nornal control flow.
simplifySuspendPoint(CoroSuspendInst * Suspend,CoroBeginInst * CoroBegin)1505 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1506 CoroBeginInst *CoroBegin) {
1507 Instruction *Prev = Suspend->getPrevNode();
1508 if (!Prev) {
1509 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1510 if (!Pred)
1511 return false;
1512 Prev = Pred->getTerminator();
1513 }
1514
1515 CallBase *CB = dyn_cast<CallBase>(Prev);
1516 if (!CB)
1517 return false;
1518
1519 auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1520
1521 // See if the callsite is for resumption or destruction of the coroutine.
1522 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1523 if (!SubFn)
1524 return false;
1525
1526 // Does not refer to the current coroutine, we cannot do anything with it.
1527 if (SubFn->getFrame() != CoroBegin)
1528 return false;
1529
1530 // See if the transformation is safe. Specifically, see if there are any
1531 // calls in between Save and CallInstr. They can potenitally resume the
1532 // coroutine rendering this optimization unsafe.
1533 auto *Save = Suspend->getCoroSave();
1534 if (hasCallsBetween(Save, CB))
1535 return false;
1536
1537 // Replace llvm.coro.suspend with the value that results in resumption over
1538 // the resume or cleanup path.
1539 Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1540 Suspend->eraseFromParent();
1541 Save->eraseFromParent();
1542
1543 // No longer need a call to coro.resume or coro.destroy.
1544 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
1545 BranchInst::Create(Invoke->getNormalDest(), Invoke);
1546 }
1547
1548 // Grab the CalledValue from CB before erasing the CallInstr.
1549 auto *CalledValue = CB->getCalledOperand();
1550 CB->eraseFromParent();
1551
1552 // If no more users remove it. Usually it is a bitcast of SubFn.
1553 if (CalledValue != SubFn && CalledValue->user_empty())
1554 if (auto *I = dyn_cast<Instruction>(CalledValue))
1555 I->eraseFromParent();
1556
1557 // Now we are good to remove SubFn.
1558 if (SubFn->user_empty())
1559 SubFn->eraseFromParent();
1560
1561 return true;
1562 }
1563
1564 // Remove suspend points that are simplified.
simplifySuspendPoints(coro::Shape & Shape)1565 static void simplifySuspendPoints(coro::Shape &Shape) {
1566 // Currently, the only simplification we do is switch-lowering-specific.
1567 if (Shape.ABI != coro::ABI::Switch)
1568 return;
1569
1570 auto &S = Shape.CoroSuspends;
1571 size_t I = 0, N = S.size();
1572 if (N == 0)
1573 return;
1574
1575 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max();
1576 while (true) {
1577 auto SI = cast<CoroSuspendInst>(S[I]);
1578 // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1579 // to resume a coroutine suspended at the final suspend point.
1580 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1581 if (--N == I)
1582 break;
1583
1584 std::swap(S[I], S[N]);
1585
1586 if (cast<CoroSuspendInst>(S[I])->isFinal()) {
1587 assert(Shape.SwitchLowering.HasFinalSuspend);
1588 ChangedFinalIndex = I;
1589 }
1590
1591 continue;
1592 }
1593 if (++I == N)
1594 break;
1595 }
1596 S.resize(N);
1597
1598 // Maintain final.suspend in case final suspend was swapped.
1599 // Due to we requrie the final suspend to be the last element of CoroSuspends.
1600 if (ChangedFinalIndex < N) {
1601 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal());
1602 std::swap(S[ChangedFinalIndex], S.back());
1603 }
1604 }
1605
splitSwitchCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI)1606 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1607 SmallVectorImpl<Function *> &Clones,
1608 TargetTransformInfo &TTI) {
1609 assert(Shape.ABI == coro::ABI::Switch);
1610
1611 createResumeEntryBlock(F, Shape);
1612 auto ResumeClone = createClone(F, ".resume", Shape,
1613 CoroCloner::Kind::SwitchResume);
1614 auto DestroyClone = createClone(F, ".destroy", Shape,
1615 CoroCloner::Kind::SwitchUnwind);
1616 auto CleanupClone = createClone(F, ".cleanup", Shape,
1617 CoroCloner::Kind::SwitchCleanup);
1618
1619 postSplitCleanup(*ResumeClone);
1620 postSplitCleanup(*DestroyClone);
1621 postSplitCleanup(*CleanupClone);
1622
1623 // Adding musttail call to support symmetric transfer.
1624 // Skip targets which don't support tail call.
1625 //
1626 // FIXME: Could we support symmetric transfer effectively without musttail
1627 // call?
1628 if (TTI.supportsTailCalls())
1629 addMustTailToCoroResumes(*ResumeClone, TTI);
1630
1631 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1632 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1633
1634 assert(Clones.empty());
1635 Clones.push_back(ResumeClone);
1636 Clones.push_back(DestroyClone);
1637 Clones.push_back(CleanupClone);
1638
1639 // Create a constant array referring to resume/destroy/clone functions pointed
1640 // by the last argument of @llvm.coro.info, so that CoroElide pass can
1641 // determined correct function to call.
1642 setCoroInfo(F, Shape, Clones);
1643 }
1644
replaceAsyncResumeFunction(CoroSuspendAsyncInst * Suspend,Value * Continuation)1645 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1646 Value *Continuation) {
1647 auto *ResumeIntrinsic = Suspend->getResumeFunction();
1648 auto &Context = Suspend->getParent()->getParent()->getContext();
1649 auto *Int8PtrTy = Type::getInt8PtrTy(Context);
1650
1651 IRBuilder<> Builder(ResumeIntrinsic);
1652 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
1653 ResumeIntrinsic->replaceAllUsesWith(Val);
1654 ResumeIntrinsic->eraseFromParent();
1655 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg,
1656 UndefValue::get(Int8PtrTy));
1657 }
1658
1659 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
coerceArguments(IRBuilder<> & Builder,FunctionType * FnTy,ArrayRef<Value * > FnArgs,SmallVectorImpl<Value * > & CallArgs)1660 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1661 ArrayRef<Value *> FnArgs,
1662 SmallVectorImpl<Value *> &CallArgs) {
1663 size_t ArgIdx = 0;
1664 for (auto *paramTy : FnTy->params()) {
1665 assert(ArgIdx < FnArgs.size());
1666 if (paramTy != FnArgs[ArgIdx]->getType())
1667 CallArgs.push_back(
1668 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy));
1669 else
1670 CallArgs.push_back(FnArgs[ArgIdx]);
1671 ++ArgIdx;
1672 }
1673 }
1674
createMustTailCall(DebugLoc Loc,Function * MustTailCallFn,ArrayRef<Value * > Arguments,IRBuilder<> & Builder)1675 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
1676 ArrayRef<Value *> Arguments,
1677 IRBuilder<> &Builder) {
1678 auto *FnTy = MustTailCallFn->getFunctionType();
1679 // Coerce the arguments, llvm optimizations seem to ignore the types in
1680 // vaarg functions and throws away casts in optimized mode.
1681 SmallVector<Value *, 8> CallArgs;
1682 coerceArguments(Builder, FnTy, Arguments, CallArgs);
1683
1684 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs);
1685 TailCall->setTailCallKind(CallInst::TCK_MustTail);
1686 TailCall->setDebugLoc(Loc);
1687 TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1688 return TailCall;
1689 }
1690
splitAsyncCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones)1691 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
1692 SmallVectorImpl<Function *> &Clones) {
1693 assert(Shape.ABI == coro::ABI::Async);
1694 assert(Clones.empty());
1695 // Reset various things that the optimizer might have decided it
1696 // "knows" about the coroutine function due to not seeing a return.
1697 F.removeFnAttr(Attribute::NoReturn);
1698 F.removeRetAttr(Attribute::NoAlias);
1699 F.removeRetAttr(Attribute::NonNull);
1700
1701 auto &Context = F.getContext();
1702 auto *Int8PtrTy = Type::getInt8PtrTy(Context);
1703
1704 auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
1705 IRBuilder<> Builder(Id);
1706
1707 auto *FramePtr = Id->getStorage();
1708 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy);
1709 FramePtr = Builder.CreateConstInBoundsGEP1_32(
1710 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset,
1711 "async.ctx.frameptr");
1712
1713 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1714 {
1715 // Make sure we don't invalidate Shape.FramePtr.
1716 TrackingVH<Value> Handle(Shape.FramePtr);
1717 Shape.CoroBegin->replaceAllUsesWith(FramePtr);
1718 Shape.FramePtr = Handle.getValPtr();
1719 }
1720
1721 // Create all the functions in order after the main function.
1722 auto NextF = std::next(F.getIterator());
1723
1724 // Create a continuation function for each of the suspend points.
1725 Clones.reserve(Shape.CoroSuspends.size());
1726 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1727 auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);
1728
1729 // Create the clone declaration.
1730 auto ResumeNameSuffix = ".resume.";
1731 auto ProjectionFunctionName =
1732 Suspend->getAsyncContextProjectionFunction()->getName();
1733 bool UseSwiftMangling = false;
1734 if (ProjectionFunctionName.equals("__swift_async_resume_project_context")) {
1735 ResumeNameSuffix = "TQ";
1736 UseSwiftMangling = true;
1737 } else if (ProjectionFunctionName.equals(
1738 "__swift_async_resume_get_context")) {
1739 ResumeNameSuffix = "TY";
1740 UseSwiftMangling = true;
1741 }
1742 auto *Continuation = createCloneDeclaration(
1743 F, Shape,
1744 UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
1745 : ResumeNameSuffix + Twine(Idx),
1746 NextF, Suspend);
1747 Clones.push_back(Continuation);
1748
1749 // Insert a branch to a new return block immediately before the suspend
1750 // point.
1751 auto *SuspendBB = Suspend->getParent();
1752 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1753 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator());
1754
1755 // Place it before the first suspend.
1756 auto *ReturnBB =
1757 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB);
1758 Branch->setSuccessor(0, ReturnBB);
1759
1760 IRBuilder<> Builder(ReturnBB);
1761
1762 // Insert the call to the tail call function and inline it.
1763 auto *Fn = Suspend->getMustTailCallFunction();
1764 SmallVector<Value *, 8> Args(Suspend->args());
1765 auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1766 CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
1767 auto *TailCall =
1768 coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder);
1769 Builder.CreateRetVoid();
1770 InlineFunctionInfo FnInfo;
1771 auto InlineRes = InlineFunction(*TailCall, FnInfo);
1772 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
1773 (void)InlineRes;
1774
1775 // Replace the lvm.coro.async.resume intrisic call.
1776 replaceAsyncResumeFunction(Suspend, Continuation);
1777 }
1778
1779 assert(Clones.size() == Shape.CoroSuspends.size());
1780 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1781 auto *Suspend = Shape.CoroSuspends[Idx];
1782 auto *Clone = Clones[Idx];
1783
1784 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
1785 }
1786 }
1787
splitRetconCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones)1788 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1789 SmallVectorImpl<Function *> &Clones) {
1790 assert(Shape.ABI == coro::ABI::Retcon ||
1791 Shape.ABI == coro::ABI::RetconOnce);
1792 assert(Clones.empty());
1793
1794 // Reset various things that the optimizer might have decided it
1795 // "knows" about the coroutine function due to not seeing a return.
1796 F.removeFnAttr(Attribute::NoReturn);
1797 F.removeRetAttr(Attribute::NoAlias);
1798 F.removeRetAttr(Attribute::NonNull);
1799
1800 // Allocate the frame.
1801 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1802 Value *RawFramePtr;
1803 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1804 RawFramePtr = Id->getStorage();
1805 } else {
1806 IRBuilder<> Builder(Id);
1807
1808 // Determine the size of the frame.
1809 const DataLayout &DL = F.getParent()->getDataLayout();
1810 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1811
1812 // Allocate. We don't need to update the call graph node because we're
1813 // going to recompute it from scratch after splitting.
1814 // FIXME: pass the required alignment
1815 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1816 RawFramePtr =
1817 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1818
1819 // Stash the allocated frame pointer in the continuation storage.
1820 auto Dest = Builder.CreateBitCast(Id->getStorage(),
1821 RawFramePtr->getType()->getPointerTo());
1822 Builder.CreateStore(RawFramePtr, Dest);
1823 }
1824
1825 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1826 {
1827 // Make sure we don't invalidate Shape.FramePtr.
1828 TrackingVH<Value> Handle(Shape.FramePtr);
1829 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1830 Shape.FramePtr = Handle.getValPtr();
1831 }
1832
1833 // Create a unique return block.
1834 BasicBlock *ReturnBB = nullptr;
1835 SmallVector<PHINode *, 4> ReturnPHIs;
1836
1837 // Create all the functions in order after the main function.
1838 auto NextF = std::next(F.getIterator());
1839
1840 // Create a continuation function for each of the suspend points.
1841 Clones.reserve(Shape.CoroSuspends.size());
1842 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1843 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1844
1845 // Create the clone declaration.
1846 auto Continuation =
1847 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr);
1848 Clones.push_back(Continuation);
1849
1850 // Insert a branch to the unified return block immediately before
1851 // the suspend point.
1852 auto SuspendBB = Suspend->getParent();
1853 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1854 auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1855
1856 // Create the unified return block.
1857 if (!ReturnBB) {
1858 // Place it before the first suspend.
1859 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1860 NewSuspendBB);
1861 Shape.RetconLowering.ReturnBlock = ReturnBB;
1862
1863 IRBuilder<> Builder(ReturnBB);
1864
1865 // Create PHIs for all the return values.
1866 assert(ReturnPHIs.empty());
1867
1868 // First, the continuation.
1869 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1870 Shape.CoroSuspends.size()));
1871
1872 // Next, all the directly-yielded values.
1873 for (auto *ResultTy : Shape.getRetconResultTypes())
1874 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1875 Shape.CoroSuspends.size()));
1876
1877 // Build the return value.
1878 auto RetTy = F.getReturnType();
1879
1880 // Cast the continuation value if necessary.
1881 // We can't rely on the types matching up because that type would
1882 // have to be infinite.
1883 auto CastedContinuationTy =
1884 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1885 auto *CastedContinuation =
1886 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1887
1888 Value *RetV;
1889 if (ReturnPHIs.size() == 1) {
1890 RetV = CastedContinuation;
1891 } else {
1892 RetV = UndefValue::get(RetTy);
1893 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1894 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1895 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1896 }
1897
1898 Builder.CreateRet(RetV);
1899 }
1900
1901 // Branch to the return block.
1902 Branch->setSuccessor(0, ReturnBB);
1903 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1904 size_t NextPHIIndex = 1;
1905 for (auto &VUse : Suspend->value_operands())
1906 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1907 assert(NextPHIIndex == ReturnPHIs.size());
1908 }
1909
1910 assert(Clones.size() == Shape.CoroSuspends.size());
1911 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1912 auto Suspend = Shape.CoroSuspends[i];
1913 auto Clone = Clones[i];
1914
1915 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1916 }
1917 }
1918
1919 namespace {
1920 class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1921 Function &F;
1922 public:
PrettyStackTraceFunction(Function & F)1923 PrettyStackTraceFunction(Function &F) : F(F) {}
print(raw_ostream & OS) const1924 void print(raw_ostream &OS) const override {
1925 OS << "While splitting coroutine ";
1926 F.printAsOperand(OS, /*print type*/ false, F.getParent());
1927 OS << "\n";
1928 }
1929 };
1930 }
1931
splitCoroutine(Function & F,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI,bool OptimizeFrame)1932 static coro::Shape splitCoroutine(Function &F,
1933 SmallVectorImpl<Function *> &Clones,
1934 TargetTransformInfo &TTI,
1935 bool OptimizeFrame) {
1936 PrettyStackTraceFunction prettyStackTrace(F);
1937
1938 // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1939 // up by uses in unreachable blocks, so remove them as a first pass.
1940 removeUnreachableBlocks(F);
1941
1942 coro::Shape Shape(F, OptimizeFrame);
1943 if (!Shape.CoroBegin)
1944 return Shape;
1945
1946 simplifySuspendPoints(Shape);
1947 buildCoroutineFrame(F, Shape);
1948 replaceFrameSizeAndAlignment(Shape);
1949
1950 // If there are no suspend points, no split required, just remove
1951 // the allocation and deallocation blocks, they are not needed.
1952 if (Shape.CoroSuspends.empty()) {
1953 handleNoSuspendCoroutine(Shape);
1954 } else {
1955 switch (Shape.ABI) {
1956 case coro::ABI::Switch:
1957 splitSwitchCoroutine(F, Shape, Clones, TTI);
1958 break;
1959 case coro::ABI::Async:
1960 splitAsyncCoroutine(F, Shape, Clones);
1961 break;
1962 case coro::ABI::Retcon:
1963 case coro::ABI::RetconOnce:
1964 splitRetconCoroutine(F, Shape, Clones);
1965 break;
1966 }
1967 }
1968
1969 // Replace all the swifterror operations in the original function.
1970 // This invalidates SwiftErrorOps in the Shape.
1971 replaceSwiftErrorOps(F, Shape, nullptr);
1972
1973 // Finally, salvage the llvm.dbg.{declare,addr} in our original function that
1974 // point into the coroutine frame. We only do this for the current function
1975 // since the Cloner salvaged debug info for us in the new coroutine funclets.
1976 SmallVector<DbgVariableIntrinsic *, 8> Worklist;
1977 SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
1978 for (auto &BB : F) {
1979 for (auto &I : BB) {
1980 if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
1981 Worklist.push_back(DDI);
1982 continue;
1983 }
1984 if (auto *DDI = dyn_cast<DbgAddrIntrinsic>(&I)) {
1985 Worklist.push_back(DDI);
1986 continue;
1987 }
1988 }
1989 }
1990 for (auto *DDI : Worklist)
1991 coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
1992
1993 return Shape;
1994 }
1995
1996 /// Remove calls to llvm.coro.end in the original function.
removeCoroEnds(const coro::Shape & Shape)1997 static void removeCoroEnds(const coro::Shape &Shape) {
1998 for (auto *End : Shape.CoroEnds) {
1999 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
2000 }
2001 }
2002
updateCallGraphAfterCoroutineSplit(LazyCallGraph::Node & N,const coro::Shape & Shape,const SmallVectorImpl<Function * > & Clones,LazyCallGraph::SCC & C,LazyCallGraph & CG,CGSCCAnalysisManager & AM,CGSCCUpdateResult & UR,FunctionAnalysisManager & FAM)2003 static void updateCallGraphAfterCoroutineSplit(
2004 LazyCallGraph::Node &N, const coro::Shape &Shape,
2005 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
2006 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
2007 FunctionAnalysisManager &FAM) {
2008 if (!Shape.CoroBegin)
2009 return;
2010
2011 if (Shape.ABI != coro::ABI::Switch)
2012 removeCoroEnds(Shape);
2013 else {
2014 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
2015 auto &Context = End->getContext();
2016 End->replaceAllUsesWith(ConstantInt::getFalse(Context));
2017 End->eraseFromParent();
2018 }
2019 }
2020
2021 if (!Clones.empty()) {
2022 switch (Shape.ABI) {
2023 case coro::ABI::Switch:
2024 // Each clone in the Switch lowering is independent of the other clones.
2025 // Let the LazyCallGraph know about each one separately.
2026 for (Function *Clone : Clones)
2027 CG.addSplitFunction(N.getFunction(), *Clone);
2028 break;
2029 case coro::ABI::Async:
2030 case coro::ABI::Retcon:
2031 case coro::ABI::RetconOnce:
2032 // Each clone in the Async/Retcon lowering references of the other clones.
2033 // Let the LazyCallGraph know about all of them at once.
2034 if (!Clones.empty())
2035 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
2036 break;
2037 }
2038
2039 // Let the CGSCC infra handle the changes to the original function.
2040 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
2041 }
2042
2043 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
2044 // to the split functions.
2045 postSplitCleanup(N.getFunction());
2046 updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
2047 }
2048
2049 /// Replace a call to llvm.coro.prepare.retcon.
replacePrepare(CallInst * Prepare,LazyCallGraph & CG,LazyCallGraph::SCC & C)2050 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2051 LazyCallGraph::SCC &C) {
2052 auto CastFn = Prepare->getArgOperand(0); // as an i8*
2053 auto Fn = CastFn->stripPointerCasts(); // as its original type
2054
2055 // Attempt to peephole this pattern:
2056 // %0 = bitcast [[TYPE]] @some_function to i8*
2057 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
2058 // %2 = bitcast %1 to [[TYPE]]
2059 // ==>
2060 // %2 = @some_function
2061 for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
2062 // Look for bitcasts back to the original function type.
2063 auto *Cast = dyn_cast<BitCastInst>(U.getUser());
2064 if (!Cast || Cast->getType() != Fn->getType())
2065 continue;
2066
2067 // Replace and remove the cast.
2068 Cast->replaceAllUsesWith(Fn);
2069 Cast->eraseFromParent();
2070 }
2071
2072 // Replace any remaining uses with the function as an i8*.
2073 // This can never directly be a callee, so we don't need to update CG.
2074 Prepare->replaceAllUsesWith(CastFn);
2075 Prepare->eraseFromParent();
2076
2077 // Kill dead bitcasts.
2078 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
2079 if (!Cast->use_empty())
2080 break;
2081 CastFn = Cast->getOperand(0);
2082 Cast->eraseFromParent();
2083 }
2084 }
2085
replaceAllPrepares(Function * PrepareFn,LazyCallGraph & CG,LazyCallGraph::SCC & C)2086 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2087 LazyCallGraph::SCC &C) {
2088 bool Changed = false;
2089 for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
2090 // Intrinsics can only be used in calls.
2091 auto *Prepare = cast<CallInst>(P.getUser());
2092 replacePrepare(Prepare, CG, C);
2093 Changed = true;
2094 }
2095
2096 return Changed;
2097 }
2098
addPrepareFunction(const Module & M,SmallVectorImpl<Function * > & Fns,StringRef Name)2099 static void addPrepareFunction(const Module &M,
2100 SmallVectorImpl<Function *> &Fns,
2101 StringRef Name) {
2102 auto *PrepareFn = M.getFunction(Name);
2103 if (PrepareFn && !PrepareFn->use_empty())
2104 Fns.push_back(PrepareFn);
2105 }
2106
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)2107 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2108 CGSCCAnalysisManager &AM,
2109 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2110 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2111 // non-zero number of nodes, so we assume that here and grab the first
2112 // node's function's module.
2113 Module &M = *C.begin()->getFunction().getParent();
2114 auto &FAM =
2115 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
2116
2117 // Check for uses of llvm.coro.prepare.retcon/async.
2118 SmallVector<Function *, 2> PrepareFns;
2119 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
2120 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
2121
2122 // Find coroutines for processing.
2123 SmallVector<LazyCallGraph::Node *> Coroutines;
2124 for (LazyCallGraph::Node &N : C)
2125 if (N.getFunction().isPresplitCoroutine())
2126 Coroutines.push_back(&N);
2127
2128 if (Coroutines.empty() && PrepareFns.empty())
2129 return PreservedAnalyses::all();
2130
2131 if (Coroutines.empty()) {
2132 for (auto *PrepareFn : PrepareFns) {
2133 replaceAllPrepares(PrepareFn, CG, C);
2134 }
2135 }
2136
2137 // Split all the coroutines.
2138 for (LazyCallGraph::Node *N : Coroutines) {
2139 Function &F = N->getFunction();
2140 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2141 << "\n");
2142 F.setSplittedCoroutine();
2143
2144 SmallVector<Function *, 4> Clones;
2145 const coro::Shape Shape = splitCoroutine(
2146 F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame);
2147 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
2148
2149 if (!Shape.CoroSuspends.empty()) {
2150 // Run the CGSCC pipeline on the original and newly split functions.
2151 UR.CWorklist.insert(&C);
2152 for (Function *Clone : Clones)
2153 UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
2154 }
2155 }
2156
2157 if (!PrepareFns.empty()) {
2158 for (auto *PrepareFn : PrepareFns) {
2159 replaceAllPrepares(PrepareFn, CG, C);
2160 }
2161 }
2162
2163 return PreservedAnalyses::none();
2164 }
2165