1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
10 //
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
20
21 #include "llvm/Transforms/Coroutines/CoroSplit.h"
22 #include "CoroInstr.h"
23 #include "CoroInternal.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/PriorityWorklist.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/ADT/Twine.h"
30 #include "llvm/Analysis/CFG.h"
31 #include "llvm/Analysis/CallGraph.h"
32 #include "llvm/Analysis/ConstantFolding.h"
33 #include "llvm/Analysis/LazyCallGraph.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/TargetTransformInfo.h"
36 #include "llvm/BinaryFormat/Dwarf.h"
37 #include "llvm/IR/Argument.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CFG.h"
41 #include "llvm/IR/CallingConv.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/GlobalValue.h"
48 #include "llvm/IR/GlobalVariable.h"
49 #include "llvm/IR/IRBuilder.h"
50 #include "llvm/IR/InstIterator.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/IR/Verifier.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/PrettyStackTrace.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Scalar.h"
65 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
66 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
67 #include "llvm/Transforms/Utils/Cloning.h"
68 #include "llvm/Transforms/Utils/Local.h"
69 #include "llvm/Transforms/Utils/ValueMapper.h"
70 #include <cassert>
71 #include <cstddef>
72 #include <cstdint>
73 #include <initializer_list>
74 #include <iterator>
75
76 using namespace llvm;
77
78 #define DEBUG_TYPE "coro-split"
79
80 namespace {
81
82 /// A little helper class for building
83 class CoroCloner {
84 public:
85 enum class Kind {
86 /// The shared resume function for a switch lowering.
87 SwitchResume,
88
89 /// The shared unwind function for a switch lowering.
90 SwitchUnwind,
91
92 /// The shared cleanup function for a switch lowering.
93 SwitchCleanup,
94
95 /// An individual continuation function.
96 Continuation,
97
98 /// An async resume function.
99 Async,
100 };
101
102 private:
103 Function &OrigF;
104 Function *NewF;
105 const Twine &Suffix;
106 coro::Shape &Shape;
107 Kind FKind;
108 ValueToValueMapTy VMap;
109 IRBuilder<> Builder;
110 Value *NewFramePtr = nullptr;
111
112 /// The active suspend instruction; meaningful only for continuation and async
113 /// ABIs.
114 AnyCoroSuspendInst *ActiveSuspend = nullptr;
115
116 public:
117 /// Create a cloner for a switch lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Kind FKind)118 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
119 Kind FKind)
120 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
121 FKind(FKind), Builder(OrigF.getContext()) {
122 assert(Shape.ABI == coro::ABI::Switch);
123 }
124
125 /// Create a cloner for a continuation lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Function * NewF,AnyCoroSuspendInst * ActiveSuspend)126 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
127 Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
128 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
129 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
130 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
131 assert(Shape.ABI == coro::ABI::Retcon ||
132 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
133 assert(NewF && "need existing function for continuation");
134 assert(ActiveSuspend && "need active suspend point for continuation");
135 }
136
getFunction() const137 Function *getFunction() const {
138 assert(NewF != nullptr && "declaration not yet set");
139 return NewF;
140 }
141
142 void create();
143
144 private:
isSwitchDestroyFunction()145 bool isSwitchDestroyFunction() {
146 switch (FKind) {
147 case Kind::Async:
148 case Kind::Continuation:
149 case Kind::SwitchResume:
150 return false;
151 case Kind::SwitchUnwind:
152 case Kind::SwitchCleanup:
153 return true;
154 }
155 llvm_unreachable("Unknown CoroCloner::Kind enum");
156 }
157
158 void replaceEntryBlock();
159 Value *deriveNewFramePointer();
160 void replaceRetconOrAsyncSuspendUses();
161 void replaceCoroSuspends();
162 void replaceCoroEnds();
163 void replaceSwiftErrorOps();
164 void salvageDebugInfo();
165 void handleFinalSuspend();
166 };
167
168 } // end anonymous namespace
169
maybeFreeRetconStorage(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr,CallGraph * CG)170 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
171 const coro::Shape &Shape, Value *FramePtr,
172 CallGraph *CG) {
173 assert(Shape.ABI == coro::ABI::Retcon ||
174 Shape.ABI == coro::ABI::RetconOnce);
175 if (Shape.RetconLowering.IsFrameInlineInStorage)
176 return;
177
178 Shape.emitDealloc(Builder, FramePtr, CG);
179 }
180
181 /// Replace an llvm.coro.end.async.
182 /// Will inline the must tail call function call if there is one.
183 /// \returns true if cleanup of the coro.end block is needed, false otherwise.
replaceCoroEndAsync(AnyCoroEndInst * End)184 static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
185 IRBuilder<> Builder(End);
186
187 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End);
188 if (!EndAsync) {
189 Builder.CreateRetVoid();
190 return true /*needs cleanup of coro.end block*/;
191 }
192
193 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
194 if (!MustTailCallFunc) {
195 Builder.CreateRetVoid();
196 return true /*needs cleanup of coro.end block*/;
197 }
198
199 // Move the must tail call from the predecessor block into the end block.
200 auto *CoroEndBlock = End->getParent();
201 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
202 assert(MustTailCallFuncBlock && "Must have a single predecessor block");
203 auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
204 auto *MustTailCall = cast<CallInst>(&*std::prev(It));
205 CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock,
206 MustTailCall->getIterator());
207
208 // Insert the return instruction.
209 Builder.SetInsertPoint(End);
210 Builder.CreateRetVoid();
211 InlineFunctionInfo FnInfo;
212
213 // Remove the rest of the block, by splitting it into an unreachable block.
214 auto *BB = End->getParent();
215 BB->splitBasicBlock(End);
216 BB->getTerminator()->eraseFromParent();
217
218 auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
219 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
220 (void)InlineRes;
221
222 // We have cleaned up the coro.end block above.
223 return false;
224 }
225
226 /// Replace a non-unwind call to llvm.coro.end.
replaceFallthroughCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)227 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
228 const coro::Shape &Shape, Value *FramePtr,
229 bool InResume, CallGraph *CG) {
230 // Start inserting right before the coro.end.
231 IRBuilder<> Builder(End);
232
233 // Create the return instruction.
234 switch (Shape.ABI) {
235 // The cloned functions in switch-lowering always return void.
236 case coro::ABI::Switch:
237 assert(!cast<CoroEndInst>(End)->hasResults() &&
238 "switch coroutine should not return any values");
239 // coro.end doesn't immediately end the coroutine in the main function
240 // in this lowering, because we need to deallocate the coroutine.
241 if (!InResume)
242 return;
243 Builder.CreateRetVoid();
244 break;
245
246 // In async lowering this returns.
247 case coro::ABI::Async: {
248 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
249 if (!CoroEndBlockNeedsCleanup)
250 return;
251 break;
252 }
253
254 // In unique continuation lowering, the continuations always return void.
255 // But we may have implicitly allocated storage.
256 case coro::ABI::RetconOnce: {
257 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
258 auto *CoroEnd = cast<CoroEndInst>(End);
259 auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
260
261 if (!CoroEnd->hasResults()) {
262 assert(RetTy->isVoidTy());
263 Builder.CreateRetVoid();
264 break;
265 }
266
267 auto *CoroResults = CoroEnd->getResults();
268 unsigned NumReturns = CoroResults->numReturns();
269
270 if (auto *RetStructTy = dyn_cast<StructType>(RetTy)) {
271 assert(RetStructTy->getNumElements() == NumReturns &&
272 "numbers of returns should match resume function singature");
273 Value *ReturnValue = UndefValue::get(RetStructTy);
274 unsigned Idx = 0;
275 for (Value *RetValEl : CoroResults->return_values())
276 ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++);
277 Builder.CreateRet(ReturnValue);
278 } else if (NumReturns == 0) {
279 assert(RetTy->isVoidTy());
280 Builder.CreateRetVoid();
281 } else {
282 assert(NumReturns == 1);
283 Builder.CreateRet(*CoroResults->retval_begin());
284 }
285 CoroResults->replaceAllUsesWith(ConstantTokenNone::get(CoroResults->getContext()));
286 CoroResults->eraseFromParent();
287 break;
288 }
289
290 // In non-unique continuation lowering, we signal completion by returning
291 // a null continuation.
292 case coro::ABI::Retcon: {
293 assert(!cast<CoroEndInst>(End)->hasResults() &&
294 "retcon coroutine should not return any values");
295 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
296 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
297 auto RetStructTy = dyn_cast<StructType>(RetTy);
298 PointerType *ContinuationTy =
299 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
300
301 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
302 if (RetStructTy) {
303 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
304 ReturnValue, 0);
305 }
306 Builder.CreateRet(ReturnValue);
307 break;
308 }
309 }
310
311 // Remove the rest of the block, by splitting it into an unreachable block.
312 auto *BB = End->getParent();
313 BB->splitBasicBlock(End);
314 BB->getTerminator()->eraseFromParent();
315 }
316
317 // Mark a coroutine as done, which implies that the coroutine is finished and
318 // never get resumed.
319 //
320 // In resume-switched ABI, the done state is represented by storing zero in
321 // ResumeFnAddr.
322 //
323 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
324 // pointer to the frame in splitted function is not stored in `Shape`.
markCoroutineAsDone(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr)325 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
326 Value *FramePtr) {
327 assert(
328 Shape.ABI == coro::ABI::Switch &&
329 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
330 auto *GepIndex = Builder.CreateStructGEP(
331 Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume,
332 "ResumeFn.addr");
333 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
334 Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
335 Builder.CreateStore(NullPtr, GepIndex);
336
337 // If the coroutine don't have unwind coro end, we could omit the store to
338 // the final suspend point since we could infer the coroutine is suspended
339 // at the final suspend point by the nullness of ResumeFnAddr.
340 // However, we can't skip it if the coroutine have unwind coro end. Since
341 // the coroutine reaches unwind coro end is considered suspended at the
342 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
343 // didn't complete yet. We need the IndexVal for the final suspend point
344 // to make the states clear.
345 if (Shape.SwitchLowering.HasUnwindCoroEnd &&
346 Shape.SwitchLowering.HasFinalSuspend) {
347 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() &&
348 "The final suspend should only live in the last position of "
349 "CoroSuspends.");
350 ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1);
351 auto *FinalIndex = Builder.CreateStructGEP(
352 Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
353
354 Builder.CreateStore(IndexVal, FinalIndex);
355 }
356 }
357
358 /// Replace an unwind call to llvm.coro.end.
replaceUnwindCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)359 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
360 Value *FramePtr, bool InResume,
361 CallGraph *CG) {
362 IRBuilder<> Builder(End);
363
364 switch (Shape.ABI) {
365 // In switch-lowering, this does nothing in the main function.
366 case coro::ABI::Switch: {
367 // In C++'s specification, the coroutine should be marked as done
368 // if promise.unhandled_exception() throws. The frontend will
369 // call coro.end(true) along this path.
370 //
371 // FIXME: We should refactor this once there is other language
372 // which uses Switch-Resumed style other than C++.
373 markCoroutineAsDone(Builder, Shape, FramePtr);
374 if (!InResume)
375 return;
376 break;
377 }
378 // In async lowering this does nothing.
379 case coro::ABI::Async:
380 break;
381 // In continuation-lowering, this frees the continuation storage.
382 case coro::ABI::Retcon:
383 case coro::ABI::RetconOnce:
384 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
385 break;
386 }
387
388 // If coro.end has an associated bundle, add cleanupret instruction.
389 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
390 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
391 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
392 End->getParent()->splitBasicBlock(End);
393 CleanupRet->getParent()->getTerminator()->eraseFromParent();
394 }
395 }
396
replaceCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)397 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
398 Value *FramePtr, bool InResume, CallGraph *CG) {
399 if (End->isUnwind())
400 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
401 else
402 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
403
404 auto &Context = End->getContext();
405 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
406 : ConstantInt::getFalse(Context));
407 End->eraseFromParent();
408 }
409
410 // Create an entry block for a resume function with a switch that will jump to
411 // suspend points.
createResumeEntryBlock(Function & F,coro::Shape & Shape)412 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
413 assert(Shape.ABI == coro::ABI::Switch);
414 LLVMContext &C = F.getContext();
415
416 // resume.entry:
417 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
418 // i32 2
419 // % index = load i32, i32* %index.addr
420 // switch i32 %index, label %unreachable [
421 // i32 0, label %resume.0
422 // i32 1, label %resume.1
423 // ...
424 // ]
425
426 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
427 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
428
429 IRBuilder<> Builder(NewEntry);
430 auto *FramePtr = Shape.FramePtr;
431 auto *FrameTy = Shape.FrameTy;
432 auto *GepIndex = Builder.CreateStructGEP(
433 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
434 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
435 auto *Switch =
436 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
437 Shape.SwitchLowering.ResumeSwitch = Switch;
438
439 size_t SuspendIndex = 0;
440 for (auto *AnyS : Shape.CoroSuspends) {
441 auto *S = cast<CoroSuspendInst>(AnyS);
442 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
443
444 // Replace CoroSave with a store to Index:
445 // %index.addr = getelementptr %f.frame... (index field number)
446 // store i32 %IndexVal, i32* %index.addr1
447 auto *Save = S->getCoroSave();
448 Builder.SetInsertPoint(Save);
449 if (S->isFinal()) {
450 // The coroutine should be marked done if it reaches the final suspend
451 // point.
452 markCoroutineAsDone(Builder, Shape, FramePtr);
453 } else {
454 auto *GepIndex = Builder.CreateStructGEP(
455 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
456 Builder.CreateStore(IndexVal, GepIndex);
457 }
458
459 Save->replaceAllUsesWith(ConstantTokenNone::get(C));
460 Save->eraseFromParent();
461
462 // Split block before and after coro.suspend and add a jump from an entry
463 // switch:
464 //
465 // whateverBB:
466 // whatever
467 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
468 // switch i8 %0, label %suspend[i8 0, label %resume
469 // i8 1, label %cleanup]
470 // becomes:
471 //
472 // whateverBB:
473 // whatever
474 // br label %resume.0.landing
475 //
476 // resume.0: ; <--- jump from the switch in the resume.entry
477 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
478 // br label %resume.0.landing
479 //
480 // resume.0.landing:
481 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
482 // switch i8 % 1, label %suspend [i8 0, label %resume
483 // i8 1, label %cleanup]
484
485 auto *SuspendBB = S->getParent();
486 auto *ResumeBB =
487 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
488 auto *LandingBB = ResumeBB->splitBasicBlock(
489 S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
490 Switch->addCase(IndexVal, ResumeBB);
491
492 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
493 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "");
494 PN->insertBefore(LandingBB->begin());
495 S->replaceAllUsesWith(PN);
496 PN->addIncoming(Builder.getInt8(-1), SuspendBB);
497 PN->addIncoming(S, ResumeBB);
498
499 ++SuspendIndex;
500 }
501
502 Builder.SetInsertPoint(UnreachBB);
503 Builder.CreateUnreachable();
504
505 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
506 }
507
508 // In the resume function, we remove the last case (when coro::Shape is built,
509 // the final suspend point (if present) is always the last element of
510 // CoroSuspends array) since it is an undefined behavior to resume a coroutine
511 // suspended at the final suspend point.
512 // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL
513 // and the coroutine doesn't suspend at the final suspend point actually (this
514 // is possible since the coroutine is considered suspended at the final suspend
515 // point if promise.unhandled_exception() exits via an exception), we can
516 // remove the last case.
handleFinalSuspend()517 void CoroCloner::handleFinalSuspend() {
518 assert(Shape.ABI == coro::ABI::Switch &&
519 Shape.SwitchLowering.HasFinalSuspend);
520
521 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd)
522 return;
523
524 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
525 auto FinalCaseIt = std::prev(Switch->case_end());
526 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
527 Switch->removeCase(FinalCaseIt);
528 if (isSwitchDestroyFunction()) {
529 BasicBlock *OldSwitchBB = Switch->getParent();
530 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
531 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
532
533 if (NewF->isCoroOnlyDestroyWhenComplete()) {
534 // When the coroutine can only be destroyed when complete, we don't need
535 // to generate code for other cases.
536 Builder.CreateBr(ResumeBB);
537 } else {
538 auto *GepIndex = Builder.CreateStructGEP(
539 Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume,
540 "ResumeFn.addr");
541 auto *Load =
542 Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex);
543 auto *Cond = Builder.CreateIsNull(Load);
544 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
545 }
546 OldSwitchBB->getTerminator()->eraseFromParent();
547 }
548 }
549
550 static FunctionType *
getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst * Suspend)551 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
552 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend);
553 auto *StructTy = cast<StructType>(AsyncSuspend->getType());
554 auto &Context = Suspend->getParent()->getParent()->getContext();
555 auto *VoidTy = Type::getVoidTy(Context);
556 return FunctionType::get(VoidTy, StructTy->elements(), false);
557 }
558
createCloneDeclaration(Function & OrigF,coro::Shape & Shape,const Twine & Suffix,Module::iterator InsertBefore,AnyCoroSuspendInst * ActiveSuspend)559 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
560 const Twine &Suffix,
561 Module::iterator InsertBefore,
562 AnyCoroSuspendInst *ActiveSuspend) {
563 Module *M = OrigF.getParent();
564 auto *FnTy = (Shape.ABI != coro::ABI::Async)
565 ? Shape.getResumeFunctionType()
566 : getFunctionTypeFromAsyncSuspend(ActiveSuspend);
567
568 Function *NewF =
569 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
570 OrigF.getName() + Suffix);
571
572 M->getFunctionList().insert(InsertBefore, NewF);
573
574 return NewF;
575 }
576
577 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the
578 /// arguments to the continuation function.
579 ///
580 /// This assumes that the builder has a meaningful insertion point.
replaceRetconOrAsyncSuspendUses()581 void CoroCloner::replaceRetconOrAsyncSuspendUses() {
582 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
583 Shape.ABI == coro::ABI::Async);
584
585 auto NewS = VMap[ActiveSuspend];
586 if (NewS->use_empty()) return;
587
588 // Copy out all the continuation arguments after the buffer pointer into
589 // an easily-indexed data structure for convenience.
590 SmallVector<Value*, 8> Args;
591 // The async ABI includes all arguments -- including the first argument.
592 bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
593 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()),
594 E = NewF->arg_end();
595 I != E; ++I)
596 Args.push_back(&*I);
597
598 // If the suspend returns a single scalar value, we can just do a simple
599 // replacement.
600 if (!isa<StructType>(NewS->getType())) {
601 assert(Args.size() == 1);
602 NewS->replaceAllUsesWith(Args.front());
603 return;
604 }
605
606 // Try to peephole extracts of an aggregate return.
607 for (Use &U : llvm::make_early_inc_range(NewS->uses())) {
608 auto *EVI = dyn_cast<ExtractValueInst>(U.getUser());
609 if (!EVI || EVI->getNumIndices() != 1)
610 continue;
611
612 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
613 EVI->eraseFromParent();
614 }
615
616 // If we have no remaining uses, we're done.
617 if (NewS->use_empty()) return;
618
619 // Otherwise, we need to create an aggregate.
620 Value *Agg = PoisonValue::get(NewS->getType());
621 for (size_t I = 0, E = Args.size(); I != E; ++I)
622 Agg = Builder.CreateInsertValue(Agg, Args[I], I);
623
624 NewS->replaceAllUsesWith(Agg);
625 }
626
replaceCoroSuspends()627 void CoroCloner::replaceCoroSuspends() {
628 Value *SuspendResult;
629
630 switch (Shape.ABI) {
631 // In switch lowering, replace coro.suspend with the appropriate value
632 // for the type of function we're extracting.
633 // Replacing coro.suspend with (0) will result in control flow proceeding to
634 // a resume label associated with a suspend point, replacing it with (1) will
635 // result in control flow proceeding to a cleanup label associated with this
636 // suspend point.
637 case coro::ABI::Switch:
638 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
639 break;
640
641 // In async lowering there are no uses of the result.
642 case coro::ABI::Async:
643 return;
644
645 // In returned-continuation lowering, the arguments from earlier
646 // continuations are theoretically arbitrary, and they should have been
647 // spilled.
648 case coro::ABI::RetconOnce:
649 case coro::ABI::Retcon:
650 return;
651 }
652
653 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
654 // The active suspend was handled earlier.
655 if (CS == ActiveSuspend) continue;
656
657 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
658 MappedCS->replaceAllUsesWith(SuspendResult);
659 MappedCS->eraseFromParent();
660 }
661 }
662
replaceCoroEnds()663 void CoroCloner::replaceCoroEnds() {
664 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
665 // We use a null call graph because there's no call graph node for
666 // the cloned function yet. We'll just be rebuilding that later.
667 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]);
668 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
669 }
670 }
671
replaceSwiftErrorOps(Function & F,coro::Shape & Shape,ValueToValueMapTy * VMap)672 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
673 ValueToValueMapTy *VMap) {
674 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
675 return;
676 Value *CachedSlot = nullptr;
677 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
678 if (CachedSlot)
679 return CachedSlot;
680
681 // Check if the function has a swifterror argument.
682 for (auto &Arg : F.args()) {
683 if (Arg.isSwiftError()) {
684 CachedSlot = &Arg;
685 return &Arg;
686 }
687 }
688
689 // Create a swifterror alloca.
690 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
691 auto Alloca = Builder.CreateAlloca(ValueTy);
692 Alloca->setSwiftError(true);
693
694 CachedSlot = Alloca;
695 return Alloca;
696 };
697
698 for (CallInst *Op : Shape.SwiftErrorOps) {
699 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
700 IRBuilder<> Builder(MappedOp);
701
702 // If there are no arguments, this is a 'get' operation.
703 Value *MappedResult;
704 if (Op->arg_empty()) {
705 auto ValueTy = Op->getType();
706 auto Slot = getSwiftErrorSlot(ValueTy);
707 MappedResult = Builder.CreateLoad(ValueTy, Slot);
708 } else {
709 assert(Op->arg_size() == 1);
710 auto Value = MappedOp->getArgOperand(0);
711 auto ValueTy = Value->getType();
712 auto Slot = getSwiftErrorSlot(ValueTy);
713 Builder.CreateStore(Value, Slot);
714 MappedResult = Slot;
715 }
716
717 MappedOp->replaceAllUsesWith(MappedResult);
718 MappedOp->eraseFromParent();
719 }
720
721 // If we're updating the original function, we've invalidated SwiftErrorOps.
722 if (VMap == nullptr) {
723 Shape.SwiftErrorOps.clear();
724 }
725 }
726
727 /// Returns all DbgVariableIntrinsic in F.
728 static std::pair<SmallVector<DbgVariableIntrinsic *, 8>, SmallVector<DPValue *>>
collectDbgVariableIntrinsics(Function & F)729 collectDbgVariableIntrinsics(Function &F) {
730 SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
731 SmallVector<DPValue *> DPValues;
732 for (auto &I : instructions(F)) {
733 for (DPValue &DPV : I.getDbgValueRange())
734 DPValues.push_back(&DPV);
735 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
736 Intrinsics.push_back(DVI);
737 }
738 return {Intrinsics, DPValues};
739 }
740
replaceSwiftErrorOps()741 void CoroCloner::replaceSwiftErrorOps() {
742 ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
743 }
744
salvageDebugInfo()745 void CoroCloner::salvageDebugInfo() {
746 auto [Worklist, DPValues] = collectDbgVariableIntrinsics(*NewF);
747 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
748
749 // Only 64-bit ABIs have a register we can refer to with the entry value.
750 bool UseEntryValue =
751 llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit();
752 for (DbgVariableIntrinsic *DVI : Worklist)
753 coro::salvageDebugInfo(ArgToAllocaMap, *DVI, Shape.OptimizeFrame,
754 UseEntryValue);
755 for (DPValue *DPV : DPValues)
756 coro::salvageDebugInfo(ArgToAllocaMap, *DPV, Shape.OptimizeFrame,
757 UseEntryValue);
758
759 // Remove all salvaged dbg.declare intrinsics that became
760 // either unreachable or stale due to the CoroSplit transformation.
761 DominatorTree DomTree(*NewF);
762 auto IsUnreachableBlock = [&](BasicBlock *BB) {
763 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
764 &DomTree);
765 };
766 auto RemoveOne = [&](auto *DVI) {
767 if (IsUnreachableBlock(DVI->getParent()))
768 DVI->eraseFromParent();
769 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
770 // Count all non-debuginfo uses in reachable blocks.
771 unsigned Uses = 0;
772 for (auto *User : DVI->getVariableLocationOp(0)->users())
773 if (auto *I = dyn_cast<Instruction>(User))
774 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
775 ++Uses;
776 if (!Uses)
777 DVI->eraseFromParent();
778 }
779 };
780 for_each(Worklist, RemoveOne);
781 for_each(DPValues, RemoveOne);
782 }
783
replaceEntryBlock()784 void CoroCloner::replaceEntryBlock() {
785 // In the original function, the AllocaSpillBlock is a block immediately
786 // following the allocation of the frame object which defines GEPs for
787 // all the allocas that have been moved into the frame, and it ends by
788 // branching to the original beginning of the coroutine. Make this
789 // the entry block of the cloned function.
790 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
791 auto *OldEntry = &NewF->getEntryBlock();
792 Entry->setName("entry" + Suffix);
793 Entry->moveBefore(OldEntry);
794 Entry->getTerminator()->eraseFromParent();
795
796 // Clear all predecessors of the new entry block. There should be
797 // exactly one predecessor, which we created when splitting out
798 // AllocaSpillBlock to begin with.
799 assert(Entry->hasOneUse());
800 auto BranchToEntry = cast<BranchInst>(Entry->user_back());
801 assert(BranchToEntry->isUnconditional());
802 Builder.SetInsertPoint(BranchToEntry);
803 Builder.CreateUnreachable();
804 BranchToEntry->eraseFromParent();
805
806 // Branch from the entry to the appropriate place.
807 Builder.SetInsertPoint(Entry);
808 switch (Shape.ABI) {
809 case coro::ABI::Switch: {
810 // In switch-lowering, we built a resume-entry block in the original
811 // function. Make the entry block branch to this.
812 auto *SwitchBB =
813 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
814 Builder.CreateBr(SwitchBB);
815 break;
816 }
817 case coro::ABI::Async:
818 case coro::ABI::Retcon:
819 case coro::ABI::RetconOnce: {
820 // In continuation ABIs, we want to branch to immediately after the
821 // active suspend point. Earlier phases will have put the suspend in its
822 // own basic block, so just thread our jump directly to its successor.
823 assert((Shape.ABI == coro::ABI::Async &&
824 isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
825 ((Shape.ABI == coro::ABI::Retcon ||
826 Shape.ABI == coro::ABI::RetconOnce) &&
827 isa<CoroSuspendRetconInst>(ActiveSuspend)));
828 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]);
829 auto Branch = cast<BranchInst>(MappedCS->getNextNode());
830 assert(Branch->isUnconditional());
831 Builder.CreateBr(Branch->getSuccessor(0));
832 break;
833 }
834 }
835
836 // Any static alloca that's still being used but not reachable from the new
837 // entry needs to be moved to the new entry.
838 Function *F = OldEntry->getParent();
839 DominatorTree DT{*F};
840 for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
841 auto *Alloca = dyn_cast<AllocaInst>(&I);
842 if (!Alloca || I.use_empty())
843 continue;
844 if (DT.isReachableFromEntry(I.getParent()) ||
845 !isa<ConstantInt>(Alloca->getArraySize()))
846 continue;
847 I.moveBefore(*Entry, Entry->getFirstInsertionPt());
848 }
849 }
850
851 /// Derive the value of the new frame pointer.
deriveNewFramePointer()852 Value *CoroCloner::deriveNewFramePointer() {
853 // Builder should be inserting to the front of the new entry block.
854
855 switch (Shape.ABI) {
856 // In switch-lowering, the argument is the frame pointer.
857 case coro::ABI::Switch:
858 return &*NewF->arg_begin();
859 // In async-lowering, one of the arguments is an async context as determined
860 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
861 // the resume function from the async context projection function associated
862 // with the active suspend. The frame is located as a tail to the async
863 // context header.
864 case coro::ABI::Async: {
865 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
866 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
867 auto *CalleeContext = NewF->getArg(ContextIdx);
868 auto *ProjectionFunc =
869 ActiveAsyncSuspend->getAsyncContextProjectionFunction();
870 auto DbgLoc =
871 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
872 // Calling i8* (i8*)
873 auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(),
874 ProjectionFunc, CalleeContext);
875 CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
876 CallerContext->setDebugLoc(DbgLoc);
877 // The frame is located after the async_context header.
878 auto &Context = Builder.getContext();
879 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
880 Type::getInt8Ty(Context), CallerContext,
881 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr");
882 // Inline the projection function.
883 InlineFunctionInfo InlineInfo;
884 auto InlineRes = InlineFunction(*CallerContext, InlineInfo);
885 assert(InlineRes.isSuccess());
886 (void)InlineRes;
887 return FramePtrAddr;
888 }
889 // In continuation-lowering, the argument is the opaque storage.
890 case coro::ABI::Retcon:
891 case coro::ABI::RetconOnce: {
892 Argument *NewStorage = &*NewF->arg_begin();
893 auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext());
894
895 // If the storage is inline, just bitcast to the storage to the frame type.
896 if (Shape.RetconLowering.IsFrameInlineInStorage)
897 return NewStorage;
898
899 // Otherwise, load the real frame from the opaque storage.
900 return Builder.CreateLoad(FramePtrTy, NewStorage);
901 }
902 }
903 llvm_unreachable("bad ABI");
904 }
905
addFramePointerAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex,uint64_t Size,Align Alignment,bool NoAlias)906 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
907 unsigned ParamIndex, uint64_t Size,
908 Align Alignment, bool NoAlias) {
909 AttrBuilder ParamAttrs(Context);
910 ParamAttrs.addAttribute(Attribute::NonNull);
911 ParamAttrs.addAttribute(Attribute::NoUndef);
912
913 if (NoAlias)
914 ParamAttrs.addAttribute(Attribute::NoAlias);
915
916 ParamAttrs.addAlignmentAttr(Alignment);
917 ParamAttrs.addDereferenceableAttr(Size);
918 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
919 }
920
addAsyncContextAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex)921 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
922 unsigned ParamIndex) {
923 AttrBuilder ParamAttrs(Context);
924 ParamAttrs.addAttribute(Attribute::SwiftAsync);
925 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
926 }
927
addSwiftSelfAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex)928 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
929 unsigned ParamIndex) {
930 AttrBuilder ParamAttrs(Context);
931 ParamAttrs.addAttribute(Attribute::SwiftSelf);
932 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
933 }
934
935 /// Clone the body of the original function into a resume function of
936 /// some sort.
create()937 void CoroCloner::create() {
938 // Create the new function if we don't already have one.
939 if (!NewF) {
940 NewF = createCloneDeclaration(OrigF, Shape, Suffix,
941 OrigF.getParent()->end(), ActiveSuspend);
942 }
943
944 // Replace all args with dummy instructions. If an argument is the old frame
945 // pointer, the dummy will be replaced by the new frame pointer once it is
946 // computed below. Uses of all other arguments should have already been
947 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
948 // frame.
949 SmallVector<Instruction *> DummyArgs;
950 for (Argument &A : OrigF.args()) {
951 DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType())));
952 VMap[&A] = DummyArgs.back();
953 }
954
955 SmallVector<ReturnInst *, 4> Returns;
956
957 // Ignore attempts to change certain attributes of the function.
958 // TODO: maybe there should be a way to suppress this during cloning?
959 auto savedVisibility = NewF->getVisibility();
960 auto savedUnnamedAddr = NewF->getUnnamedAddr();
961 auto savedDLLStorageClass = NewF->getDLLStorageClass();
962
963 // NewF's linkage (which CloneFunctionInto does *not* change) might not
964 // be compatible with the visibility of OrigF (which it *does* change),
965 // so protect against that.
966 auto savedLinkage = NewF->getLinkage();
967 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
968
969 CloneFunctionInto(NewF, &OrigF, VMap,
970 CloneFunctionChangeType::LocalChangesOnly, Returns);
971
972 auto &Context = NewF->getContext();
973
974 // For async functions / continuations, adjust the scope line of the
975 // clone to the line number of the suspend point. However, only
976 // adjust the scope line when the files are the same. This ensures
977 // line number and file name belong together. The scope line is
978 // associated with all pre-prologue instructions. This avoids a jump
979 // in the linetable from the function declaration to the suspend point.
980 if (DISubprogram *SP = NewF->getSubprogram()) {
981 assert(SP != OrigF.getSubprogram() && SP->isDistinct());
982 if (ActiveSuspend)
983 if (auto DL = ActiveSuspend->getDebugLoc())
984 if (SP->getFile() == DL->getFile())
985 SP->setScopeLine(DL->getLine());
986 // Update the linkage name to reflect the modified symbol name. It
987 // is necessary to update the linkage name in Swift, since the
988 // mangling changes for resume functions. It might also be the
989 // right thing to do in C++, but due to a limitation in LLVM's
990 // AsmPrinter we can only do this if the function doesn't have an
991 // abstract specification, since the DWARF backend expects the
992 // abstract specification to contain the linkage name and asserts
993 // that they are identical.
994 if (SP->getUnit() &&
995 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) {
996 SP->replaceLinkageName(MDString::get(Context, NewF->getName()));
997 if (auto *Decl = SP->getDeclaration()) {
998 auto *NewDecl = DISubprogram::get(
999 Decl->getContext(), Decl->getScope(), Decl->getName(),
1000 NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(),
1001 Decl->getScopeLine(), Decl->getContainingType(),
1002 Decl->getVirtualIndex(), Decl->getThisAdjustment(),
1003 Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(),
1004 Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(),
1005 Decl->getThrownTypes(), Decl->getAnnotations(),
1006 Decl->getTargetFuncName());
1007 SP->replaceDeclaration(NewDecl);
1008 }
1009 }
1010 }
1011
1012 NewF->setLinkage(savedLinkage);
1013 NewF->setVisibility(savedVisibility);
1014 NewF->setUnnamedAddr(savedUnnamedAddr);
1015 NewF->setDLLStorageClass(savedDLLStorageClass);
1016 // The function sanitizer metadata needs to match the signature of the
1017 // function it is being attached to. However this does not hold for split
1018 // functions here. Thus remove the metadata for split functions.
1019 if (Shape.ABI == coro::ABI::Switch &&
1020 NewF->hasMetadata(LLVMContext::MD_func_sanitize))
1021 NewF->eraseMetadata(LLVMContext::MD_func_sanitize);
1022
1023 // Replace the attributes of the new function:
1024 auto OrigAttrs = NewF->getAttributes();
1025 auto NewAttrs = AttributeList();
1026
1027 switch (Shape.ABI) {
1028 case coro::ABI::Switch:
1029 // Bootstrap attributes by copying function attributes from the
1030 // original function. This should include optimization settings and so on.
1031 NewAttrs = NewAttrs.addFnAttributes(
1032 Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
1033
1034 addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize,
1035 Shape.FrameAlign, /*NoAlias=*/false);
1036 break;
1037 case coro::ABI::Async: {
1038 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
1039 if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo,
1040 Attribute::SwiftAsync)) {
1041 uint32_t ArgAttributeIndices =
1042 ActiveAsyncSuspend->getStorageArgumentIndex();
1043 auto ContextArgIndex = ArgAttributeIndices & 0xff;
1044 addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex);
1045
1046 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
1047 // `swiftself`.
1048 auto SwiftSelfIndex = ArgAttributeIndices >> 8;
1049 if (SwiftSelfIndex)
1050 addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex);
1051 }
1052
1053 // Transfer the original function's attributes.
1054 auto FnAttrs = OrigF.getAttributes().getFnAttrs();
1055 NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs));
1056 break;
1057 }
1058 case coro::ABI::Retcon:
1059 case coro::ABI::RetconOnce:
1060 // If we have a continuation prototype, just use its attributes,
1061 // full-stop.
1062 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
1063
1064 /// FIXME: Is it really good to add the NoAlias attribute?
1065 addFramePointerAttrs(NewAttrs, Context, 0,
1066 Shape.getRetconCoroId()->getStorageSize(),
1067 Shape.getRetconCoroId()->getStorageAlignment(),
1068 /*NoAlias=*/true);
1069
1070 break;
1071 }
1072
1073 switch (Shape.ABI) {
1074 // In these ABIs, the cloned functions always return 'void', and the
1075 // existing return sites are meaningless. Note that for unique
1076 // continuations, this includes the returns associated with suspends;
1077 // this is fine because we can't suspend twice.
1078 case coro::ABI::Switch:
1079 case coro::ABI::RetconOnce:
1080 // Remove old returns.
1081 for (ReturnInst *Return : Returns)
1082 changeToUnreachable(Return);
1083 break;
1084
1085 // With multi-suspend continuations, we'll already have eliminated the
1086 // original returns and inserted returns before all the suspend points,
1087 // so we want to leave any returns in place.
1088 case coro::ABI::Retcon:
1089 break;
1090 // Async lowering will insert musttail call functions at all suspend points
1091 // followed by a return.
1092 // Don't change returns to unreachable because that will trip up the verifier.
1093 // These returns should be unreachable from the clone.
1094 case coro::ABI::Async:
1095 break;
1096 }
1097
1098 NewF->setAttributes(NewAttrs);
1099 NewF->setCallingConv(Shape.getResumeFunctionCC());
1100
1101 // Set up the new entry block.
1102 replaceEntryBlock();
1103
1104 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1105 NewFramePtr = deriveNewFramePointer();
1106
1107 // Remap frame pointer.
1108 Value *OldFramePtr = VMap[Shape.FramePtr];
1109 NewFramePtr->takeName(OldFramePtr);
1110 OldFramePtr->replaceAllUsesWith(NewFramePtr);
1111
1112 // Remap vFrame pointer.
1113 auto *NewVFrame = Builder.CreateBitCast(
1114 NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame");
1115 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
1116 if (OldVFrame != NewVFrame)
1117 OldVFrame->replaceAllUsesWith(NewVFrame);
1118
1119 // All uses of the arguments should have been resolved by this point,
1120 // so we can safely remove the dummy values.
1121 for (Instruction *DummyArg : DummyArgs) {
1122 DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType()));
1123 DummyArg->deleteValue();
1124 }
1125
1126 switch (Shape.ABI) {
1127 case coro::ABI::Switch:
1128 // Rewrite final suspend handling as it is not done via switch (allows to
1129 // remove final case from the switch, since it is undefined behavior to
1130 // resume the coroutine suspended at the final suspend point.
1131 if (Shape.SwitchLowering.HasFinalSuspend)
1132 handleFinalSuspend();
1133 break;
1134 case coro::ABI::Async:
1135 case coro::ABI::Retcon:
1136 case coro::ABI::RetconOnce:
1137 // Replace uses of the active suspend with the corresponding
1138 // continuation-function arguments.
1139 assert(ActiveSuspend != nullptr &&
1140 "no active suspend when lowering a continuation-style coroutine");
1141 replaceRetconOrAsyncSuspendUses();
1142 break;
1143 }
1144
1145 // Handle suspends.
1146 replaceCoroSuspends();
1147
1148 // Handle swifterror.
1149 replaceSwiftErrorOps();
1150
1151 // Remove coro.end intrinsics.
1152 replaceCoroEnds();
1153
1154 // Salvage debug info that points into the coroutine frame.
1155 salvageDebugInfo();
1156
1157 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
1158 // to suppress deallocation code.
1159 if (Shape.ABI == coro::ABI::Switch)
1160 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
1161 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
1162 }
1163
1164 // Create a resume clone by cloning the body of the original function, setting
1165 // new entry block and replacing coro.suspend an appropriate value to force
1166 // resume or cleanup pass for every suspend point.
createClone(Function & F,const Twine & Suffix,coro::Shape & Shape,CoroCloner::Kind FKind)1167 static Function *createClone(Function &F, const Twine &Suffix,
1168 coro::Shape &Shape, CoroCloner::Kind FKind) {
1169 CoroCloner Cloner(F, Suffix, Shape, FKind);
1170 Cloner.create();
1171 return Cloner.getFunction();
1172 }
1173
updateAsyncFuncPointerContextSize(coro::Shape & Shape)1174 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1175 assert(Shape.ABI == coro::ABI::Async);
1176
1177 auto *FuncPtrStruct = cast<ConstantStruct>(
1178 Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1179 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0);
1180 auto *OrigContextSize = FuncPtrStruct->getOperand(1);
1181 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(),
1182 Shape.AsyncLowering.ContextSize);
1183 auto *NewFuncPtrStruct = ConstantStruct::get(
1184 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize);
1185
1186 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1187 }
1188
replaceFrameSizeAndAlignment(coro::Shape & Shape)1189 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1190 if (Shape.ABI == coro::ABI::Async)
1191 updateAsyncFuncPointerContextSize(Shape);
1192
1193 for (CoroAlignInst *CA : Shape.CoroAligns) {
1194 CA->replaceAllUsesWith(
1195 ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
1196 CA->eraseFromParent();
1197 }
1198
1199 if (Shape.CoroSizes.empty())
1200 return;
1201
1202 // In the same function all coro.sizes should have the same result type.
1203 auto *SizeIntrin = Shape.CoroSizes.back();
1204 Module *M = SizeIntrin->getModule();
1205 const DataLayout &DL = M->getDataLayout();
1206 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1207 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
1208
1209 for (CoroSizeInst *CS : Shape.CoroSizes) {
1210 CS->replaceAllUsesWith(SizeConstant);
1211 CS->eraseFromParent();
1212 }
1213 }
1214
1215 // Create a global constant array containing pointers to functions provided and
1216 // set Info parameter of CoroBegin to point at this constant. Example:
1217 //
1218 // @f.resumers = internal constant [2 x void(%f.frame*)*]
1219 // [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
1220 // define void @f() {
1221 // ...
1222 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
1223 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
1224 //
1225 // Assumes that all the functions have the same signature.
setCoroInfo(Function & F,coro::Shape & Shape,ArrayRef<Function * > Fns)1226 static void setCoroInfo(Function &F, coro::Shape &Shape,
1227 ArrayRef<Function *> Fns) {
1228 // This only works under the switch-lowering ABI because coro elision
1229 // only works on the switch-lowering ABI.
1230 assert(Shape.ABI == coro::ABI::Switch);
1231
1232 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
1233 assert(!Args.empty());
1234 Function *Part = *Fns.begin();
1235 Module *M = Part->getParent();
1236 auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
1237
1238 auto *ConstVal = ConstantArray::get(ArrTy, Args);
1239 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
1240 GlobalVariable::PrivateLinkage, ConstVal,
1241 F.getName() + Twine(".resumers"));
1242
1243 // Update coro.begin instruction to refer to this constant.
1244 LLVMContext &C = F.getContext();
1245 auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C));
1246 Shape.getSwitchCoroId()->setInfo(BC);
1247 }
1248
1249 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
updateCoroFrame(coro::Shape & Shape,Function * ResumeFn,Function * DestroyFn,Function * CleanupFn)1250 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1251 Function *DestroyFn, Function *CleanupFn) {
1252 assert(Shape.ABI == coro::ABI::Switch);
1253
1254 IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr());
1255
1256 auto *ResumeAddr = Builder.CreateStructGEP(
1257 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
1258 "resume.addr");
1259 Builder.CreateStore(ResumeFn, ResumeAddr);
1260
1261 Value *DestroyOrCleanupFn = DestroyFn;
1262
1263 CoroIdInst *CoroId = Shape.getSwitchCoroId();
1264 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1265 // If there is a CoroAlloc and it returns false (meaning we elide the
1266 // allocation, use CleanupFn instead of DestroyFn).
1267 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
1268 }
1269
1270 auto *DestroyAddr = Builder.CreateStructGEP(
1271 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
1272 "destroy.addr");
1273 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
1274 }
1275
postSplitCleanup(Function & F)1276 static void postSplitCleanup(Function &F) {
1277 removeUnreachableBlocks(F);
1278
1279 #ifndef NDEBUG
1280 // For now, we do a mandatory verification step because we don't
1281 // entirely trust this pass. Note that we don't want to add a verifier
1282 // pass to FPM below because it will also verify all the global data.
1283 if (verifyFunction(F, &errs()))
1284 report_fatal_error("Broken function");
1285 #endif
1286 }
1287
1288 // Assuming we arrived at the block NewBlock from Prev instruction, store
1289 // PHI's incoming values in the ResolvedValues map.
1290 static void
scanPHIsAndUpdateValueMap(Instruction * Prev,BasicBlock * NewBlock,DenseMap<Value *,Value * > & ResolvedValues)1291 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
1292 DenseMap<Value *, Value *> &ResolvedValues) {
1293 auto *PrevBB = Prev->getParent();
1294 for (PHINode &PN : NewBlock->phis()) {
1295 auto V = PN.getIncomingValueForBlock(PrevBB);
1296 // See if we already resolved it.
1297 auto VI = ResolvedValues.find(V);
1298 if (VI != ResolvedValues.end())
1299 V = VI->second;
1300 // Remember the value.
1301 ResolvedValues[&PN] = V;
1302 }
1303 }
1304
1305 // Replace a sequence of branches leading to a ret, with a clone of a ret
1306 // instruction. Suspend instruction represented by a switch, track the PHI
1307 // values and select the correct case successor when possible.
simplifyTerminatorLeadingToRet(Instruction * InitialInst)1308 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
1309 // There is nothing to simplify.
1310 if (isa<ReturnInst>(InitialInst))
1311 return false;
1312
1313 DenseMap<Value *, Value *> ResolvedValues;
1314 assert(InitialInst->getModule());
1315 const DataLayout &DL = InitialInst->getModule()->getDataLayout();
1316
1317 auto GetFirstValidInstruction = [](Instruction *I) {
1318 while (I) {
1319 // BitCastInst wouldn't generate actual code so that we could skip it.
1320 if (isa<BitCastInst>(I) || I->isDebugOrPseudoInst() ||
1321 I->isLifetimeStartOrEnd())
1322 I = I->getNextNode();
1323 else if (isInstructionTriviallyDead(I))
1324 // Duing we are in the middle of the transformation, we need to erase
1325 // the dead instruction manually.
1326 I = &*I->eraseFromParent();
1327 else
1328 break;
1329 }
1330 return I;
1331 };
1332
1333 auto TryResolveConstant = [&ResolvedValues](Value *V) {
1334 auto It = ResolvedValues.find(V);
1335 if (It != ResolvedValues.end())
1336 V = It->second;
1337 return dyn_cast<ConstantInt>(V);
1338 };
1339
1340 Instruction *I = InitialInst;
1341 while (I->isTerminator() || isa<CmpInst>(I)) {
1342 if (isa<ReturnInst>(I)) {
1343 ReplaceInstWithInst(InitialInst, I->clone());
1344 return true;
1345 }
1346
1347 if (auto *BR = dyn_cast<BranchInst>(I)) {
1348 unsigned SuccIndex = 0;
1349 if (BR->isConditional()) {
1350 // Handle the case the condition of the conditional branch is constant.
1351 // e.g.,
1352 //
1353 // br i1 false, label %cleanup, label %CoroEnd
1354 //
1355 // It is possible during the transformation. We could continue the
1356 // simplifying in this case.
1357 ConstantInt *Cond = TryResolveConstant(BR->getCondition());
1358 if (!Cond)
1359 return false;
1360
1361 SuccIndex = Cond->isOne() ? 0 : 1;
1362 }
1363
1364 BasicBlock *Succ = BR->getSuccessor(SuccIndex);
1365 scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
1366 I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
1367
1368 continue;
1369 }
1370
1371 if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
1372 // If the case number of suspended switch instruction is reduced to
1373 // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
1374 auto *BR = dyn_cast<BranchInst>(
1375 GetFirstValidInstruction(CondCmp->getNextNode()));
1376 if (!BR || !BR->isConditional() || CondCmp != BR->getCondition())
1377 return false;
1378
1379 // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
1380 // So we try to resolve constant for the first operand only since the
1381 // second operand should be literal constant by design.
1382 ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0));
1383 auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
1384 if (!Cond0 || !Cond1)
1385 return false;
1386
1387 // Both operands of the CmpInst are Constant. So that we could evaluate
1388 // it immediately to get the destination.
1389 auto *ConstResult =
1390 dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
1391 CondCmp->getPredicate(), Cond0, Cond1, DL));
1392 if (!ConstResult)
1393 return false;
1394
1395 ResolvedValues[BR->getCondition()] = ConstResult;
1396
1397 // Handle this branch in next iteration.
1398 I = BR;
1399 continue;
1400 }
1401
1402 if (auto *SI = dyn_cast<SwitchInst>(I)) {
1403 ConstantInt *Cond = TryResolveConstant(SI->getCondition());
1404 if (!Cond)
1405 return false;
1406
1407 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
1408 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
1409 I = GetFirstValidInstruction(BB->getFirstNonPHIOrDbgOrLifetime());
1410 continue;
1411 }
1412
1413 return false;
1414 }
1415
1416 return false;
1417 }
1418
1419 // Check whether CI obeys the rules of musttail attribute.
shouldBeMustTail(const CallInst & CI,const Function & F)1420 static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
1421 if (CI.isInlineAsm())
1422 return false;
1423
1424 // Match prototypes and calling conventions of resume function.
1425 FunctionType *CalleeTy = CI.getFunctionType();
1426 if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
1427 return false;
1428
1429 Type *CalleeParmTy = CalleeTy->getParamType(0);
1430 if (!CalleeParmTy->isPointerTy() ||
1431 (CalleeParmTy->getPointerAddressSpace() != 0))
1432 return false;
1433
1434 if (CI.getCallingConv() != F.getCallingConv())
1435 return false;
1436
1437 // CI should not has any ABI-impacting function attributes.
1438 static const Attribute::AttrKind ABIAttrs[] = {
1439 Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
1440 Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
1441 Attribute::SwiftSelf, Attribute::SwiftError};
1442 AttributeList Attrs = CI.getAttributes();
1443 for (auto AK : ABIAttrs)
1444 if (Attrs.hasParamAttr(0, AK))
1445 return false;
1446
1447 return true;
1448 }
1449
1450 // Add musttail to any resume instructions that is immediately followed by a
1451 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
1452 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
1453 // This transformation is done only in the resume part of the coroutine that has
1454 // identical signature and calling convention as the coro.resume call.
addMustTailToCoroResumes(Function & F,TargetTransformInfo & TTI)1455 static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
1456 bool changed = false;
1457
1458 // Collect potential resume instructions.
1459 SmallVector<CallInst *, 4> Resumes;
1460 for (auto &I : instructions(F))
1461 if (auto *Call = dyn_cast<CallInst>(&I))
1462 if (shouldBeMustTail(*Call, F))
1463 Resumes.push_back(Call);
1464
1465 // Set musttail on those that are followed by a ret instruction.
1466 for (CallInst *Call : Resumes)
1467 // Skip targets which don't support tail call on the specific case.
1468 if (TTI.supportsTailCallFor(Call) &&
1469 simplifyTerminatorLeadingToRet(Call->getNextNode())) {
1470 Call->setTailCallKind(CallInst::TCK_MustTail);
1471 changed = true;
1472 }
1473
1474 if (changed)
1475 removeUnreachableBlocks(F);
1476 }
1477
1478 // Coroutine has no suspend points. Remove heap allocation for the coroutine
1479 // frame if possible.
handleNoSuspendCoroutine(coro::Shape & Shape)1480 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1481 auto *CoroBegin = Shape.CoroBegin;
1482 auto *CoroId = CoroBegin->getId();
1483 auto *AllocInst = CoroId->getCoroAlloc();
1484 switch (Shape.ABI) {
1485 case coro::ABI::Switch: {
1486 auto SwitchId = cast<CoroIdInst>(CoroId);
1487 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
1488 if (AllocInst) {
1489 IRBuilder<> Builder(AllocInst);
1490 auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
1491 Frame->setAlignment(Shape.FrameAlign);
1492 AllocInst->replaceAllUsesWith(Builder.getFalse());
1493 AllocInst->eraseFromParent();
1494 CoroBegin->replaceAllUsesWith(Frame);
1495 } else {
1496 CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1497 }
1498
1499 break;
1500 }
1501 case coro::ABI::Async:
1502 case coro::ABI::Retcon:
1503 case coro::ABI::RetconOnce:
1504 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1505 break;
1506 }
1507
1508 CoroBegin->eraseFromParent();
1509 }
1510
1511 // SimplifySuspendPoint needs to check that there is no calls between
1512 // coro_save and coro_suspend, since any of the calls may potentially resume
1513 // the coroutine and if that is the case we cannot eliminate the suspend point.
hasCallsInBlockBetween(Instruction * From,Instruction * To)1514 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1515 for (Instruction *I = From; I != To; I = I->getNextNode()) {
1516 // Assume that no intrinsic can resume the coroutine.
1517 if (isa<IntrinsicInst>(I))
1518 continue;
1519
1520 if (isa<CallBase>(I))
1521 return true;
1522 }
1523 return false;
1524 }
1525
hasCallsInBlocksBetween(BasicBlock * SaveBB,BasicBlock * ResDesBB)1526 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1527 SmallPtrSet<BasicBlock *, 8> Set;
1528 SmallVector<BasicBlock *, 8> Worklist;
1529
1530 Set.insert(SaveBB);
1531 Worklist.push_back(ResDesBB);
1532
1533 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1534 // returns a token consumed by suspend instruction, all blocks in between
1535 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1536 while (!Worklist.empty()) {
1537 auto *BB = Worklist.pop_back_val();
1538 Set.insert(BB);
1539 for (auto *Pred : predecessors(BB))
1540 if (!Set.contains(Pred))
1541 Worklist.push_back(Pred);
1542 }
1543
1544 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1545 Set.erase(SaveBB);
1546 Set.erase(ResDesBB);
1547
1548 for (auto *BB : Set)
1549 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1550 return true;
1551
1552 return false;
1553 }
1554
hasCallsBetween(Instruction * Save,Instruction * ResumeOrDestroy)1555 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1556 auto *SaveBB = Save->getParent();
1557 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1558
1559 if (SaveBB == ResumeOrDestroyBB)
1560 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1561
1562 // Any calls from Save to the end of the block?
1563 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1564 return true;
1565
1566 // Any calls from begging of the block up to ResumeOrDestroy?
1567 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1568 ResumeOrDestroy))
1569 return true;
1570
1571 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1572 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1573 return true;
1574
1575 return false;
1576 }
1577
1578 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1579 // suspend point and replace it with nornal control flow.
simplifySuspendPoint(CoroSuspendInst * Suspend,CoroBeginInst * CoroBegin)1580 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1581 CoroBeginInst *CoroBegin) {
1582 Instruction *Prev = Suspend->getPrevNode();
1583 if (!Prev) {
1584 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1585 if (!Pred)
1586 return false;
1587 Prev = Pred->getTerminator();
1588 }
1589
1590 CallBase *CB = dyn_cast<CallBase>(Prev);
1591 if (!CB)
1592 return false;
1593
1594 auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1595
1596 // See if the callsite is for resumption or destruction of the coroutine.
1597 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1598 if (!SubFn)
1599 return false;
1600
1601 // Does not refer to the current coroutine, we cannot do anything with it.
1602 if (SubFn->getFrame() != CoroBegin)
1603 return false;
1604
1605 // See if the transformation is safe. Specifically, see if there are any
1606 // calls in between Save and CallInstr. They can potenitally resume the
1607 // coroutine rendering this optimization unsafe.
1608 auto *Save = Suspend->getCoroSave();
1609 if (hasCallsBetween(Save, CB))
1610 return false;
1611
1612 // Replace llvm.coro.suspend with the value that results in resumption over
1613 // the resume or cleanup path.
1614 Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1615 Suspend->eraseFromParent();
1616 Save->eraseFromParent();
1617
1618 // No longer need a call to coro.resume or coro.destroy.
1619 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
1620 BranchInst::Create(Invoke->getNormalDest(), Invoke);
1621 }
1622
1623 // Grab the CalledValue from CB before erasing the CallInstr.
1624 auto *CalledValue = CB->getCalledOperand();
1625 CB->eraseFromParent();
1626
1627 // If no more users remove it. Usually it is a bitcast of SubFn.
1628 if (CalledValue != SubFn && CalledValue->user_empty())
1629 if (auto *I = dyn_cast<Instruction>(CalledValue))
1630 I->eraseFromParent();
1631
1632 // Now we are good to remove SubFn.
1633 if (SubFn->user_empty())
1634 SubFn->eraseFromParent();
1635
1636 return true;
1637 }
1638
1639 // Remove suspend points that are simplified.
simplifySuspendPoints(coro::Shape & Shape)1640 static void simplifySuspendPoints(coro::Shape &Shape) {
1641 // Currently, the only simplification we do is switch-lowering-specific.
1642 if (Shape.ABI != coro::ABI::Switch)
1643 return;
1644
1645 auto &S = Shape.CoroSuspends;
1646 size_t I = 0, N = S.size();
1647 if (N == 0)
1648 return;
1649
1650 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max();
1651 while (true) {
1652 auto SI = cast<CoroSuspendInst>(S[I]);
1653 // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1654 // to resume a coroutine suspended at the final suspend point.
1655 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1656 if (--N == I)
1657 break;
1658
1659 std::swap(S[I], S[N]);
1660
1661 if (cast<CoroSuspendInst>(S[I])->isFinal()) {
1662 assert(Shape.SwitchLowering.HasFinalSuspend);
1663 ChangedFinalIndex = I;
1664 }
1665
1666 continue;
1667 }
1668 if (++I == N)
1669 break;
1670 }
1671 S.resize(N);
1672
1673 // Maintain final.suspend in case final suspend was swapped.
1674 // Due to we requrie the final suspend to be the last element of CoroSuspends.
1675 if (ChangedFinalIndex < N) {
1676 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal());
1677 std::swap(S[ChangedFinalIndex], S.back());
1678 }
1679 }
1680
splitSwitchCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI)1681 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1682 SmallVectorImpl<Function *> &Clones,
1683 TargetTransformInfo &TTI) {
1684 assert(Shape.ABI == coro::ABI::Switch);
1685
1686 createResumeEntryBlock(F, Shape);
1687 auto ResumeClone = createClone(F, ".resume", Shape,
1688 CoroCloner::Kind::SwitchResume);
1689 auto DestroyClone = createClone(F, ".destroy", Shape,
1690 CoroCloner::Kind::SwitchUnwind);
1691 auto CleanupClone = createClone(F, ".cleanup", Shape,
1692 CoroCloner::Kind::SwitchCleanup);
1693
1694 postSplitCleanup(*ResumeClone);
1695 postSplitCleanup(*DestroyClone);
1696 postSplitCleanup(*CleanupClone);
1697
1698 // Adding musttail call to support symmetric transfer.
1699 // Skip targets which don't support tail call.
1700 //
1701 // FIXME: Could we support symmetric transfer effectively without musttail
1702 // call?
1703 if (TTI.supportsTailCalls())
1704 addMustTailToCoroResumes(*ResumeClone, TTI);
1705
1706 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1707 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1708
1709 assert(Clones.empty());
1710 Clones.push_back(ResumeClone);
1711 Clones.push_back(DestroyClone);
1712 Clones.push_back(CleanupClone);
1713
1714 // Create a constant array referring to resume/destroy/clone functions pointed
1715 // by the last argument of @llvm.coro.info, so that CoroElide pass can
1716 // determined correct function to call.
1717 setCoroInfo(F, Shape, Clones);
1718 }
1719
replaceAsyncResumeFunction(CoroSuspendAsyncInst * Suspend,Value * Continuation)1720 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1721 Value *Continuation) {
1722 auto *ResumeIntrinsic = Suspend->getResumeFunction();
1723 auto &Context = Suspend->getParent()->getParent()->getContext();
1724 auto *Int8PtrTy = PointerType::getUnqual(Context);
1725
1726 IRBuilder<> Builder(ResumeIntrinsic);
1727 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
1728 ResumeIntrinsic->replaceAllUsesWith(Val);
1729 ResumeIntrinsic->eraseFromParent();
1730 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg,
1731 UndefValue::get(Int8PtrTy));
1732 }
1733
1734 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
coerceArguments(IRBuilder<> & Builder,FunctionType * FnTy,ArrayRef<Value * > FnArgs,SmallVectorImpl<Value * > & CallArgs)1735 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1736 ArrayRef<Value *> FnArgs,
1737 SmallVectorImpl<Value *> &CallArgs) {
1738 size_t ArgIdx = 0;
1739 for (auto *paramTy : FnTy->params()) {
1740 assert(ArgIdx < FnArgs.size());
1741 if (paramTy != FnArgs[ArgIdx]->getType())
1742 CallArgs.push_back(
1743 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy));
1744 else
1745 CallArgs.push_back(FnArgs[ArgIdx]);
1746 ++ArgIdx;
1747 }
1748 }
1749
createMustTailCall(DebugLoc Loc,Function * MustTailCallFn,ArrayRef<Value * > Arguments,IRBuilder<> & Builder)1750 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
1751 ArrayRef<Value *> Arguments,
1752 IRBuilder<> &Builder) {
1753 auto *FnTy = MustTailCallFn->getFunctionType();
1754 // Coerce the arguments, llvm optimizations seem to ignore the types in
1755 // vaarg functions and throws away casts in optimized mode.
1756 SmallVector<Value *, 8> CallArgs;
1757 coerceArguments(Builder, FnTy, Arguments, CallArgs);
1758
1759 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs);
1760 TailCall->setTailCallKind(CallInst::TCK_MustTail);
1761 TailCall->setDebugLoc(Loc);
1762 TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1763 return TailCall;
1764 }
1765
splitAsyncCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones)1766 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
1767 SmallVectorImpl<Function *> &Clones) {
1768 assert(Shape.ABI == coro::ABI::Async);
1769 assert(Clones.empty());
1770 // Reset various things that the optimizer might have decided it
1771 // "knows" about the coroutine function due to not seeing a return.
1772 F.removeFnAttr(Attribute::NoReturn);
1773 F.removeRetAttr(Attribute::NoAlias);
1774 F.removeRetAttr(Attribute::NonNull);
1775
1776 auto &Context = F.getContext();
1777 auto *Int8PtrTy = PointerType::getUnqual(Context);
1778
1779 auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
1780 IRBuilder<> Builder(Id);
1781
1782 auto *FramePtr = Id->getStorage();
1783 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy);
1784 FramePtr = Builder.CreateConstInBoundsGEP1_32(
1785 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset,
1786 "async.ctx.frameptr");
1787
1788 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1789 {
1790 // Make sure we don't invalidate Shape.FramePtr.
1791 TrackingVH<Value> Handle(Shape.FramePtr);
1792 Shape.CoroBegin->replaceAllUsesWith(FramePtr);
1793 Shape.FramePtr = Handle.getValPtr();
1794 }
1795
1796 // Create all the functions in order after the main function.
1797 auto NextF = std::next(F.getIterator());
1798
1799 // Create a continuation function for each of the suspend points.
1800 Clones.reserve(Shape.CoroSuspends.size());
1801 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1802 auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);
1803
1804 // Create the clone declaration.
1805 auto ResumeNameSuffix = ".resume.";
1806 auto ProjectionFunctionName =
1807 Suspend->getAsyncContextProjectionFunction()->getName();
1808 bool UseSwiftMangling = false;
1809 if (ProjectionFunctionName.equals("__swift_async_resume_project_context")) {
1810 ResumeNameSuffix = "TQ";
1811 UseSwiftMangling = true;
1812 } else if (ProjectionFunctionName.equals(
1813 "__swift_async_resume_get_context")) {
1814 ResumeNameSuffix = "TY";
1815 UseSwiftMangling = true;
1816 }
1817 auto *Continuation = createCloneDeclaration(
1818 F, Shape,
1819 UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
1820 : ResumeNameSuffix + Twine(Idx),
1821 NextF, Suspend);
1822 Clones.push_back(Continuation);
1823
1824 // Insert a branch to a new return block immediately before the suspend
1825 // point.
1826 auto *SuspendBB = Suspend->getParent();
1827 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1828 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator());
1829
1830 // Place it before the first suspend.
1831 auto *ReturnBB =
1832 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB);
1833 Branch->setSuccessor(0, ReturnBB);
1834
1835 IRBuilder<> Builder(ReturnBB);
1836
1837 // Insert the call to the tail call function and inline it.
1838 auto *Fn = Suspend->getMustTailCallFunction();
1839 SmallVector<Value *, 8> Args(Suspend->args());
1840 auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1841 CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
1842 auto *TailCall =
1843 coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder);
1844 Builder.CreateRetVoid();
1845 InlineFunctionInfo FnInfo;
1846 auto InlineRes = InlineFunction(*TailCall, FnInfo);
1847 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
1848 (void)InlineRes;
1849
1850 // Replace the lvm.coro.async.resume intrisic call.
1851 replaceAsyncResumeFunction(Suspend, Continuation);
1852 }
1853
1854 assert(Clones.size() == Shape.CoroSuspends.size());
1855 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1856 auto *Suspend = Shape.CoroSuspends[Idx];
1857 auto *Clone = Clones[Idx];
1858
1859 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
1860 }
1861 }
1862
splitRetconCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones)1863 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1864 SmallVectorImpl<Function *> &Clones) {
1865 assert(Shape.ABI == coro::ABI::Retcon ||
1866 Shape.ABI == coro::ABI::RetconOnce);
1867 assert(Clones.empty());
1868
1869 // Reset various things that the optimizer might have decided it
1870 // "knows" about the coroutine function due to not seeing a return.
1871 F.removeFnAttr(Attribute::NoReturn);
1872 F.removeRetAttr(Attribute::NoAlias);
1873 F.removeRetAttr(Attribute::NonNull);
1874
1875 // Allocate the frame.
1876 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1877 Value *RawFramePtr;
1878 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1879 RawFramePtr = Id->getStorage();
1880 } else {
1881 IRBuilder<> Builder(Id);
1882
1883 // Determine the size of the frame.
1884 const DataLayout &DL = F.getParent()->getDataLayout();
1885 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1886
1887 // Allocate. We don't need to update the call graph node because we're
1888 // going to recompute it from scratch after splitting.
1889 // FIXME: pass the required alignment
1890 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1891 RawFramePtr =
1892 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1893
1894 // Stash the allocated frame pointer in the continuation storage.
1895 Builder.CreateStore(RawFramePtr, Id->getStorage());
1896 }
1897
1898 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1899 {
1900 // Make sure we don't invalidate Shape.FramePtr.
1901 TrackingVH<Value> Handle(Shape.FramePtr);
1902 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1903 Shape.FramePtr = Handle.getValPtr();
1904 }
1905
1906 // Create a unique return block.
1907 BasicBlock *ReturnBB = nullptr;
1908 SmallVector<PHINode *, 4> ReturnPHIs;
1909
1910 // Create all the functions in order after the main function.
1911 auto NextF = std::next(F.getIterator());
1912
1913 // Create a continuation function for each of the suspend points.
1914 Clones.reserve(Shape.CoroSuspends.size());
1915 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1916 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1917
1918 // Create the clone declaration.
1919 auto Continuation =
1920 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr);
1921 Clones.push_back(Continuation);
1922
1923 // Insert a branch to the unified return block immediately before
1924 // the suspend point.
1925 auto SuspendBB = Suspend->getParent();
1926 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1927 auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1928
1929 // Create the unified return block.
1930 if (!ReturnBB) {
1931 // Place it before the first suspend.
1932 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1933 NewSuspendBB);
1934 Shape.RetconLowering.ReturnBlock = ReturnBB;
1935
1936 IRBuilder<> Builder(ReturnBB);
1937
1938 // Create PHIs for all the return values.
1939 assert(ReturnPHIs.empty());
1940
1941 // First, the continuation.
1942 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1943 Shape.CoroSuspends.size()));
1944
1945 // Next, all the directly-yielded values.
1946 for (auto *ResultTy : Shape.getRetconResultTypes())
1947 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1948 Shape.CoroSuspends.size()));
1949
1950 // Build the return value.
1951 auto RetTy = F.getReturnType();
1952
1953 // Cast the continuation value if necessary.
1954 // We can't rely on the types matching up because that type would
1955 // have to be infinite.
1956 auto CastedContinuationTy =
1957 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1958 auto *CastedContinuation =
1959 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1960
1961 Value *RetV;
1962 if (ReturnPHIs.size() == 1) {
1963 RetV = CastedContinuation;
1964 } else {
1965 RetV = PoisonValue::get(RetTy);
1966 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1967 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1968 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1969 }
1970
1971 Builder.CreateRet(RetV);
1972 }
1973
1974 // Branch to the return block.
1975 Branch->setSuccessor(0, ReturnBB);
1976 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1977 size_t NextPHIIndex = 1;
1978 for (auto &VUse : Suspend->value_operands())
1979 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1980 assert(NextPHIIndex == ReturnPHIs.size());
1981 }
1982
1983 assert(Clones.size() == Shape.CoroSuspends.size());
1984 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1985 auto Suspend = Shape.CoroSuspends[i];
1986 auto Clone = Clones[i];
1987
1988 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1989 }
1990 }
1991
1992 namespace {
1993 class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1994 Function &F;
1995 public:
PrettyStackTraceFunction(Function & F)1996 PrettyStackTraceFunction(Function &F) : F(F) {}
print(raw_ostream & OS) const1997 void print(raw_ostream &OS) const override {
1998 OS << "While splitting coroutine ";
1999 F.printAsOperand(OS, /*print type*/ false, F.getParent());
2000 OS << "\n";
2001 }
2002 };
2003 }
2004
2005 static coro::Shape
splitCoroutine(Function & F,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI,bool OptimizeFrame,std::function<bool (Instruction &)> MaterializableCallback)2006 splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
2007 TargetTransformInfo &TTI, bool OptimizeFrame,
2008 std::function<bool(Instruction &)> MaterializableCallback) {
2009 PrettyStackTraceFunction prettyStackTrace(F);
2010
2011 // The suspend-crossing algorithm in buildCoroutineFrame get tripped
2012 // up by uses in unreachable blocks, so remove them as a first pass.
2013 removeUnreachableBlocks(F);
2014
2015 coro::Shape Shape(F, OptimizeFrame);
2016 if (!Shape.CoroBegin)
2017 return Shape;
2018
2019 simplifySuspendPoints(Shape);
2020 buildCoroutineFrame(F, Shape, MaterializableCallback);
2021 replaceFrameSizeAndAlignment(Shape);
2022
2023 // If there are no suspend points, no split required, just remove
2024 // the allocation and deallocation blocks, they are not needed.
2025 if (Shape.CoroSuspends.empty()) {
2026 handleNoSuspendCoroutine(Shape);
2027 } else {
2028 switch (Shape.ABI) {
2029 case coro::ABI::Switch:
2030 splitSwitchCoroutine(F, Shape, Clones, TTI);
2031 break;
2032 case coro::ABI::Async:
2033 splitAsyncCoroutine(F, Shape, Clones);
2034 break;
2035 case coro::ABI::Retcon:
2036 case coro::ABI::RetconOnce:
2037 splitRetconCoroutine(F, Shape, Clones);
2038 break;
2039 }
2040 }
2041
2042 // Replace all the swifterror operations in the original function.
2043 // This invalidates SwiftErrorOps in the Shape.
2044 replaceSwiftErrorOps(F, Shape, nullptr);
2045
2046 // Salvage debug intrinsics that point into the coroutine frame in the
2047 // original function. The Cloner has already salvaged debug info in the new
2048 // coroutine funclets.
2049 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
2050 auto [DbgInsts, DPValues] = collectDbgVariableIntrinsics(F);
2051 for (auto *DDI : DbgInsts)
2052 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
2053 false /*UseEntryValue*/);
2054 for (DPValue *DPV : DPValues)
2055 coro::salvageDebugInfo(ArgToAllocaMap, *DPV, Shape.OptimizeFrame,
2056 false /*UseEntryValue*/);
2057 return Shape;
2058 }
2059
2060 /// Remove calls to llvm.coro.end in the original function.
removeCoroEnds(const coro::Shape & Shape)2061 static void removeCoroEnds(const coro::Shape &Shape) {
2062 for (auto *End : Shape.CoroEnds) {
2063 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
2064 }
2065 }
2066
updateCallGraphAfterCoroutineSplit(LazyCallGraph::Node & N,const coro::Shape & Shape,const SmallVectorImpl<Function * > & Clones,LazyCallGraph::SCC & C,LazyCallGraph & CG,CGSCCAnalysisManager & AM,CGSCCUpdateResult & UR,FunctionAnalysisManager & FAM)2067 static void updateCallGraphAfterCoroutineSplit(
2068 LazyCallGraph::Node &N, const coro::Shape &Shape,
2069 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
2070 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
2071 FunctionAnalysisManager &FAM) {
2072 if (!Shape.CoroBegin)
2073 return;
2074
2075 if (Shape.ABI != coro::ABI::Switch)
2076 removeCoroEnds(Shape);
2077 else {
2078 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
2079 auto &Context = End->getContext();
2080 End->replaceAllUsesWith(ConstantInt::getFalse(Context));
2081 End->eraseFromParent();
2082 }
2083 }
2084
2085 if (!Clones.empty()) {
2086 switch (Shape.ABI) {
2087 case coro::ABI::Switch:
2088 // Each clone in the Switch lowering is independent of the other clones.
2089 // Let the LazyCallGraph know about each one separately.
2090 for (Function *Clone : Clones)
2091 CG.addSplitFunction(N.getFunction(), *Clone);
2092 break;
2093 case coro::ABI::Async:
2094 case coro::ABI::Retcon:
2095 case coro::ABI::RetconOnce:
2096 // Each clone in the Async/Retcon lowering references of the other clones.
2097 // Let the LazyCallGraph know about all of them at once.
2098 if (!Clones.empty())
2099 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
2100 break;
2101 }
2102
2103 // Let the CGSCC infra handle the changes to the original function.
2104 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
2105 }
2106
2107 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
2108 // to the split functions.
2109 postSplitCleanup(N.getFunction());
2110 updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
2111 }
2112
2113 /// Replace a call to llvm.coro.prepare.retcon.
replacePrepare(CallInst * Prepare,LazyCallGraph & CG,LazyCallGraph::SCC & C)2114 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2115 LazyCallGraph::SCC &C) {
2116 auto CastFn = Prepare->getArgOperand(0); // as an i8*
2117 auto Fn = CastFn->stripPointerCasts(); // as its original type
2118
2119 // Attempt to peephole this pattern:
2120 // %0 = bitcast [[TYPE]] @some_function to i8*
2121 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
2122 // %2 = bitcast %1 to [[TYPE]]
2123 // ==>
2124 // %2 = @some_function
2125 for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
2126 // Look for bitcasts back to the original function type.
2127 auto *Cast = dyn_cast<BitCastInst>(U.getUser());
2128 if (!Cast || Cast->getType() != Fn->getType())
2129 continue;
2130
2131 // Replace and remove the cast.
2132 Cast->replaceAllUsesWith(Fn);
2133 Cast->eraseFromParent();
2134 }
2135
2136 // Replace any remaining uses with the function as an i8*.
2137 // This can never directly be a callee, so we don't need to update CG.
2138 Prepare->replaceAllUsesWith(CastFn);
2139 Prepare->eraseFromParent();
2140
2141 // Kill dead bitcasts.
2142 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
2143 if (!Cast->use_empty())
2144 break;
2145 CastFn = Cast->getOperand(0);
2146 Cast->eraseFromParent();
2147 }
2148 }
2149
replaceAllPrepares(Function * PrepareFn,LazyCallGraph & CG,LazyCallGraph::SCC & C)2150 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2151 LazyCallGraph::SCC &C) {
2152 bool Changed = false;
2153 for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
2154 // Intrinsics can only be used in calls.
2155 auto *Prepare = cast<CallInst>(P.getUser());
2156 replacePrepare(Prepare, CG, C);
2157 Changed = true;
2158 }
2159
2160 return Changed;
2161 }
2162
addPrepareFunction(const Module & M,SmallVectorImpl<Function * > & Fns,StringRef Name)2163 static void addPrepareFunction(const Module &M,
2164 SmallVectorImpl<Function *> &Fns,
2165 StringRef Name) {
2166 auto *PrepareFn = M.getFunction(Name);
2167 if (PrepareFn && !PrepareFn->use_empty())
2168 Fns.push_back(PrepareFn);
2169 }
2170
CoroSplitPass(bool OptimizeFrame)2171 CoroSplitPass::CoroSplitPass(bool OptimizeFrame)
2172 : MaterializableCallback(coro::defaultMaterializable),
2173 OptimizeFrame(OptimizeFrame) {}
2174
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)2175 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2176 CGSCCAnalysisManager &AM,
2177 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2178 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2179 // non-zero number of nodes, so we assume that here and grab the first
2180 // node's function's module.
2181 Module &M = *C.begin()->getFunction().getParent();
2182 auto &FAM =
2183 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
2184
2185 // Check for uses of llvm.coro.prepare.retcon/async.
2186 SmallVector<Function *, 2> PrepareFns;
2187 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
2188 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
2189
2190 // Find coroutines for processing.
2191 SmallVector<LazyCallGraph::Node *> Coroutines;
2192 for (LazyCallGraph::Node &N : C)
2193 if (N.getFunction().isPresplitCoroutine())
2194 Coroutines.push_back(&N);
2195
2196 if (Coroutines.empty() && PrepareFns.empty())
2197 return PreservedAnalyses::all();
2198
2199 if (Coroutines.empty()) {
2200 for (auto *PrepareFn : PrepareFns) {
2201 replaceAllPrepares(PrepareFn, CG, C);
2202 }
2203 }
2204
2205 // Split all the coroutines.
2206 for (LazyCallGraph::Node *N : Coroutines) {
2207 Function &F = N->getFunction();
2208 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2209 << "\n");
2210 F.setSplittedCoroutine();
2211
2212 SmallVector<Function *, 4> Clones;
2213 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2214 const coro::Shape Shape =
2215 splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
2216 OptimizeFrame, MaterializableCallback);
2217 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
2218
2219 ORE.emit([&]() {
2220 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F)
2221 << "Split '" << ore::NV("function", F.getName())
2222 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize)
2223 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")";
2224 });
2225
2226 if (!Shape.CoroSuspends.empty()) {
2227 // Run the CGSCC pipeline on the original and newly split functions.
2228 UR.CWorklist.insert(&C);
2229 for (Function *Clone : Clones)
2230 UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
2231 }
2232 }
2233
2234 if (!PrepareFns.empty()) {
2235 for (auto *PrepareFn : PrepareFns) {
2236 replaceAllPrepares(PrepareFn, CG, C);
2237 }
2238 }
2239
2240 return PreservedAnalyses::none();
2241 }
2242