1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
10 //
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
20
21 #include "llvm/Transforms/Coroutines/CoroSplit.h"
22 #include "CoroInstr.h"
23 #include "CoroInternal.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/Twine.h"
29 #include "llvm/Analysis/CallGraph.h"
30 #include "llvm/Analysis/CallGraphSCCPass.h"
31 #include "llvm/IR/Argument.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CFG.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/LegacyPassManager.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/IR/Verifier.h"
54 #include "llvm/InitializePasses.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/PrettyStackTrace.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include "llvm/Transforms/Scalar.h"
61 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
62 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
63 #include "llvm/Transforms/Utils/Cloning.h"
64 #include "llvm/Transforms/Utils/Local.h"
65 #include "llvm/Transforms/Utils/ValueMapper.h"
66 #include <cassert>
67 #include <cstddef>
68 #include <cstdint>
69 #include <initializer_list>
70 #include <iterator>
71
72 using namespace llvm;
73
74 #define DEBUG_TYPE "coro-split"
75
76 namespace {
77
78 /// A little helper class for building
79 class CoroCloner {
80 public:
81 enum class Kind {
82 /// The shared resume function for a switch lowering.
83 SwitchResume,
84
85 /// The shared unwind function for a switch lowering.
86 SwitchUnwind,
87
88 /// The shared cleanup function for a switch lowering.
89 SwitchCleanup,
90
91 /// An individual continuation function.
92 Continuation,
93 };
94 private:
95 Function &OrigF;
96 Function *NewF;
97 const Twine &Suffix;
98 coro::Shape &Shape;
99 Kind FKind;
100 ValueToValueMapTy VMap;
101 IRBuilder<> Builder;
102 Value *NewFramePtr = nullptr;
103 Value *SwiftErrorSlot = nullptr;
104
105 /// The active suspend instruction; meaningful only for continuation ABIs.
106 AnyCoroSuspendInst *ActiveSuspend = nullptr;
107
108 public:
109 /// Create a cloner for a switch lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Kind FKind)110 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
111 Kind FKind)
112 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
113 FKind(FKind), Builder(OrigF.getContext()) {
114 assert(Shape.ABI == coro::ABI::Switch);
115 }
116
117 /// Create a cloner for a continuation lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Function * NewF,AnyCoroSuspendInst * ActiveSuspend)118 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
119 Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
120 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
121 FKind(Kind::Continuation), Builder(OrigF.getContext()),
122 ActiveSuspend(ActiveSuspend) {
123 assert(Shape.ABI == coro::ABI::Retcon ||
124 Shape.ABI == coro::ABI::RetconOnce);
125 assert(NewF && "need existing function for continuation");
126 assert(ActiveSuspend && "need active suspend point for continuation");
127 }
128
getFunction() const129 Function *getFunction() const {
130 assert(NewF != nullptr && "declaration not yet set");
131 return NewF;
132 }
133
134 void create();
135
136 private:
isSwitchDestroyFunction()137 bool isSwitchDestroyFunction() {
138 switch (FKind) {
139 case Kind::Continuation:
140 case Kind::SwitchResume:
141 return false;
142 case Kind::SwitchUnwind:
143 case Kind::SwitchCleanup:
144 return true;
145 }
146 llvm_unreachable("Unknown CoroCloner::Kind enum");
147 }
148
149 void createDeclaration();
150 void replaceEntryBlock();
151 Value *deriveNewFramePointer();
152 void replaceRetconSuspendUses();
153 void replaceCoroSuspends();
154 void replaceCoroEnds();
155 void replaceSwiftErrorOps();
156 void handleFinalSuspend();
157 void maybeFreeContinuationStorage();
158 };
159
160 } // end anonymous namespace
161
maybeFreeRetconStorage(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr,CallGraph * CG)162 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
163 const coro::Shape &Shape, Value *FramePtr,
164 CallGraph *CG) {
165 assert(Shape.ABI == coro::ABI::Retcon ||
166 Shape.ABI == coro::ABI::RetconOnce);
167 if (Shape.RetconLowering.IsFrameInlineInStorage)
168 return;
169
170 Shape.emitDealloc(Builder, FramePtr, CG);
171 }
172
173 /// Replace a non-unwind call to llvm.coro.end.
replaceFallthroughCoroEnd(CoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)174 static void replaceFallthroughCoroEnd(CoroEndInst *End,
175 const coro::Shape &Shape, Value *FramePtr,
176 bool InResume, CallGraph *CG) {
177 // Start inserting right before the coro.end.
178 IRBuilder<> Builder(End);
179
180 // Create the return instruction.
181 switch (Shape.ABI) {
182 // The cloned functions in switch-lowering always return void.
183 case coro::ABI::Switch:
184 // coro.end doesn't immediately end the coroutine in the main function
185 // in this lowering, because we need to deallocate the coroutine.
186 if (!InResume)
187 return;
188 Builder.CreateRetVoid();
189 break;
190
191 // In unique continuation lowering, the continuations always return void.
192 // But we may have implicitly allocated storage.
193 case coro::ABI::RetconOnce:
194 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
195 Builder.CreateRetVoid();
196 break;
197
198 // In non-unique continuation lowering, we signal completion by returning
199 // a null continuation.
200 case coro::ABI::Retcon: {
201 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
202 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
203 auto RetStructTy = dyn_cast<StructType>(RetTy);
204 PointerType *ContinuationTy =
205 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
206
207 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
208 if (RetStructTy) {
209 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
210 ReturnValue, 0);
211 }
212 Builder.CreateRet(ReturnValue);
213 break;
214 }
215 }
216
217 // Remove the rest of the block, by splitting it into an unreachable block.
218 auto *BB = End->getParent();
219 BB->splitBasicBlock(End);
220 BB->getTerminator()->eraseFromParent();
221 }
222
223 /// Replace an unwind call to llvm.coro.end.
replaceUnwindCoroEnd(CoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)224 static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
225 Value *FramePtr, bool InResume, CallGraph *CG){
226 IRBuilder<> Builder(End);
227
228 switch (Shape.ABI) {
229 // In switch-lowering, this does nothing in the main function.
230 case coro::ABI::Switch:
231 if (!InResume)
232 return;
233 break;
234
235 // In continuation-lowering, this frees the continuation storage.
236 case coro::ABI::Retcon:
237 case coro::ABI::RetconOnce:
238 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
239 break;
240 }
241
242 // If coro.end has an associated bundle, add cleanupret instruction.
243 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
244 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
245 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
246 End->getParent()->splitBasicBlock(End);
247 CleanupRet->getParent()->getTerminator()->eraseFromParent();
248 }
249 }
250
replaceCoroEnd(CoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)251 static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
252 Value *FramePtr, bool InResume, CallGraph *CG) {
253 if (End->isUnwind())
254 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
255 else
256 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
257
258 auto &Context = End->getContext();
259 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
260 : ConstantInt::getFalse(Context));
261 End->eraseFromParent();
262 }
263
264 // Create an entry block for a resume function with a switch that will jump to
265 // suspend points.
createResumeEntryBlock(Function & F,coro::Shape & Shape)266 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
267 assert(Shape.ABI == coro::ABI::Switch);
268 LLVMContext &C = F.getContext();
269
270 // resume.entry:
271 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
272 // i32 2
273 // % index = load i32, i32* %index.addr
274 // switch i32 %index, label %unreachable [
275 // i32 0, label %resume.0
276 // i32 1, label %resume.1
277 // ...
278 // ]
279
280 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
281 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
282
283 IRBuilder<> Builder(NewEntry);
284 auto *FramePtr = Shape.FramePtr;
285 auto *FrameTy = Shape.FrameTy;
286 auto *GepIndex = Builder.CreateStructGEP(
287 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
288 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
289 auto *Switch =
290 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
291 Shape.SwitchLowering.ResumeSwitch = Switch;
292
293 size_t SuspendIndex = 0;
294 for (auto *AnyS : Shape.CoroSuspends) {
295 auto *S = cast<CoroSuspendInst>(AnyS);
296 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
297
298 // Replace CoroSave with a store to Index:
299 // %index.addr = getelementptr %f.frame... (index field number)
300 // store i32 0, i32* %index.addr1
301 auto *Save = S->getCoroSave();
302 Builder.SetInsertPoint(Save);
303 if (S->isFinal()) {
304 // Final suspend point is represented by storing zero in ResumeFnAddr.
305 auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr,
306 coro::Shape::SwitchFieldIndex::Resume,
307 "ResumeFn.addr");
308 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
309 cast<PointerType>(GepIndex->getType())->getElementType()));
310 Builder.CreateStore(NullPtr, GepIndex);
311 } else {
312 auto *GepIndex = Builder.CreateStructGEP(
313 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
314 Builder.CreateStore(IndexVal, GepIndex);
315 }
316 Save->replaceAllUsesWith(ConstantTokenNone::get(C));
317 Save->eraseFromParent();
318
319 // Split block before and after coro.suspend and add a jump from an entry
320 // switch:
321 //
322 // whateverBB:
323 // whatever
324 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
325 // switch i8 %0, label %suspend[i8 0, label %resume
326 // i8 1, label %cleanup]
327 // becomes:
328 //
329 // whateverBB:
330 // whatever
331 // br label %resume.0.landing
332 //
333 // resume.0: ; <--- jump from the switch in the resume.entry
334 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
335 // br label %resume.0.landing
336 //
337 // resume.0.landing:
338 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
339 // switch i8 % 1, label %suspend [i8 0, label %resume
340 // i8 1, label %cleanup]
341
342 auto *SuspendBB = S->getParent();
343 auto *ResumeBB =
344 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
345 auto *LandingBB = ResumeBB->splitBasicBlock(
346 S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
347 Switch->addCase(IndexVal, ResumeBB);
348
349 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
350 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
351 S->replaceAllUsesWith(PN);
352 PN->addIncoming(Builder.getInt8(-1), SuspendBB);
353 PN->addIncoming(S, ResumeBB);
354
355 ++SuspendIndex;
356 }
357
358 Builder.SetInsertPoint(UnreachBB);
359 Builder.CreateUnreachable();
360
361 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
362 }
363
364
365 // Rewrite final suspend point handling. We do not use suspend index to
366 // represent the final suspend point. Instead we zero-out ResumeFnAddr in the
367 // coroutine frame, since it is undefined behavior to resume a coroutine
368 // suspended at the final suspend point. Thus, in the resume function, we can
369 // simply remove the last case (when coro::Shape is built, the final suspend
370 // point (if present) is always the last element of CoroSuspends array).
371 // In the destroy function, we add a code sequence to check if ResumeFnAddress
372 // is Null, and if so, jump to the appropriate label to handle cleanup from the
373 // final suspend point.
handleFinalSuspend()374 void CoroCloner::handleFinalSuspend() {
375 assert(Shape.ABI == coro::ABI::Switch &&
376 Shape.SwitchLowering.HasFinalSuspend);
377 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
378 auto FinalCaseIt = std::prev(Switch->case_end());
379 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
380 Switch->removeCase(FinalCaseIt);
381 if (isSwitchDestroyFunction()) {
382 BasicBlock *OldSwitchBB = Switch->getParent();
383 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
384 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
385 auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
386 coro::Shape::SwitchFieldIndex::Resume,
387 "ResumeFn.addr");
388 auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
389 GepIndex);
390 auto *Cond = Builder.CreateIsNull(Load);
391 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
392 OldSwitchBB->getTerminator()->eraseFromParent();
393 }
394 }
395
createCloneDeclaration(Function & OrigF,coro::Shape & Shape,const Twine & Suffix,Module::iterator InsertBefore)396 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
397 const Twine &Suffix,
398 Module::iterator InsertBefore) {
399 Module *M = OrigF.getParent();
400 auto *FnTy = Shape.getResumeFunctionType();
401
402 Function *NewF =
403 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
404 OrigF.getAddressSpace(), OrigF.getName() + Suffix);
405 NewF->addParamAttr(0, Attribute::NonNull);
406 NewF->addParamAttr(0, Attribute::NoAlias);
407
408 M->getFunctionList().insert(InsertBefore, NewF);
409
410 return NewF;
411 }
412
413 /// Replace uses of the active llvm.coro.suspend.retcon call with the
414 /// arguments to the continuation function.
415 ///
416 /// This assumes that the builder has a meaningful insertion point.
replaceRetconSuspendUses()417 void CoroCloner::replaceRetconSuspendUses() {
418 assert(Shape.ABI == coro::ABI::Retcon ||
419 Shape.ABI == coro::ABI::RetconOnce);
420
421 auto NewS = VMap[ActiveSuspend];
422 if (NewS->use_empty()) return;
423
424 // Copy out all the continuation arguments after the buffer pointer into
425 // an easily-indexed data structure for convenience.
426 SmallVector<Value*, 8> Args;
427 for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I)
428 Args.push_back(&*I);
429
430 // If the suspend returns a single scalar value, we can just do a simple
431 // replacement.
432 if (!isa<StructType>(NewS->getType())) {
433 assert(Args.size() == 1);
434 NewS->replaceAllUsesWith(Args.front());
435 return;
436 }
437
438 // Try to peephole extracts of an aggregate return.
439 for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
440 auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
441 if (!EVI || EVI->getNumIndices() != 1)
442 continue;
443
444 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
445 EVI->eraseFromParent();
446 }
447
448 // If we have no remaining uses, we're done.
449 if (NewS->use_empty()) return;
450
451 // Otherwise, we need to create an aggregate.
452 Value *Agg = UndefValue::get(NewS->getType());
453 for (size_t I = 0, E = Args.size(); I != E; ++I)
454 Agg = Builder.CreateInsertValue(Agg, Args[I], I);
455
456 NewS->replaceAllUsesWith(Agg);
457 }
458
replaceCoroSuspends()459 void CoroCloner::replaceCoroSuspends() {
460 Value *SuspendResult;
461
462 switch (Shape.ABI) {
463 // In switch lowering, replace coro.suspend with the appropriate value
464 // for the type of function we're extracting.
465 // Replacing coro.suspend with (0) will result in control flow proceeding to
466 // a resume label associated with a suspend point, replacing it with (1) will
467 // result in control flow proceeding to a cleanup label associated with this
468 // suspend point.
469 case coro::ABI::Switch:
470 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
471 break;
472
473 // In returned-continuation lowering, the arguments from earlier
474 // continuations are theoretically arbitrary, and they should have been
475 // spilled.
476 case coro::ABI::RetconOnce:
477 case coro::ABI::Retcon:
478 return;
479 }
480
481 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
482 // The active suspend was handled earlier.
483 if (CS == ActiveSuspend) continue;
484
485 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
486 MappedCS->replaceAllUsesWith(SuspendResult);
487 MappedCS->eraseFromParent();
488 }
489 }
490
replaceCoroEnds()491 void CoroCloner::replaceCoroEnds() {
492 for (CoroEndInst *CE : Shape.CoroEnds) {
493 // We use a null call graph because there's no call graph node for
494 // the cloned function yet. We'll just be rebuilding that later.
495 auto NewCE = cast<CoroEndInst>(VMap[CE]);
496 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
497 }
498 }
499
replaceSwiftErrorOps(Function & F,coro::Shape & Shape,ValueToValueMapTy * VMap)500 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
501 ValueToValueMapTy *VMap) {
502 Value *CachedSlot = nullptr;
503 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
504 if (CachedSlot) {
505 assert(CachedSlot->getType()->getPointerElementType() == ValueTy &&
506 "multiple swifterror slots in function with different types");
507 return CachedSlot;
508 }
509
510 // Check if the function has a swifterror argument.
511 for (auto &Arg : F.args()) {
512 if (Arg.isSwiftError()) {
513 CachedSlot = &Arg;
514 assert(Arg.getType()->getPointerElementType() == ValueTy &&
515 "swifterror argument does not have expected type");
516 return &Arg;
517 }
518 }
519
520 // Create a swifterror alloca.
521 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
522 auto Alloca = Builder.CreateAlloca(ValueTy);
523 Alloca->setSwiftError(true);
524
525 CachedSlot = Alloca;
526 return Alloca;
527 };
528
529 for (CallInst *Op : Shape.SwiftErrorOps) {
530 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
531 IRBuilder<> Builder(MappedOp);
532
533 // If there are no arguments, this is a 'get' operation.
534 Value *MappedResult;
535 if (Op->getNumArgOperands() == 0) {
536 auto ValueTy = Op->getType();
537 auto Slot = getSwiftErrorSlot(ValueTy);
538 MappedResult = Builder.CreateLoad(ValueTy, Slot);
539 } else {
540 assert(Op->getNumArgOperands() == 1);
541 auto Value = MappedOp->getArgOperand(0);
542 auto ValueTy = Value->getType();
543 auto Slot = getSwiftErrorSlot(ValueTy);
544 Builder.CreateStore(Value, Slot);
545 MappedResult = Slot;
546 }
547
548 MappedOp->replaceAllUsesWith(MappedResult);
549 MappedOp->eraseFromParent();
550 }
551
552 // If we're updating the original function, we've invalidated SwiftErrorOps.
553 if (VMap == nullptr) {
554 Shape.SwiftErrorOps.clear();
555 }
556 }
557
replaceSwiftErrorOps()558 void CoroCloner::replaceSwiftErrorOps() {
559 ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
560 }
561
replaceEntryBlock()562 void CoroCloner::replaceEntryBlock() {
563 // In the original function, the AllocaSpillBlock is a block immediately
564 // following the allocation of the frame object which defines GEPs for
565 // all the allocas that have been moved into the frame, and it ends by
566 // branching to the original beginning of the coroutine. Make this
567 // the entry block of the cloned function.
568 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
569 auto *OldEntry = &NewF->getEntryBlock();
570 Entry->setName("entry" + Suffix);
571 Entry->moveBefore(OldEntry);
572 Entry->getTerminator()->eraseFromParent();
573
574 // Clear all predecessors of the new entry block. There should be
575 // exactly one predecessor, which we created when splitting out
576 // AllocaSpillBlock to begin with.
577 assert(Entry->hasOneUse());
578 auto BranchToEntry = cast<BranchInst>(Entry->user_back());
579 assert(BranchToEntry->isUnconditional());
580 Builder.SetInsertPoint(BranchToEntry);
581 Builder.CreateUnreachable();
582 BranchToEntry->eraseFromParent();
583
584 // Move any allocas into Entry that weren't moved into the frame.
585 for (auto IT = OldEntry->begin(), End = OldEntry->end(); IT != End;) {
586 Instruction &I = *IT++;
587 if (!isa<AllocaInst>(&I) || I.use_empty())
588 continue;
589
590 I.moveBefore(*Entry, Entry->getFirstInsertionPt());
591 }
592
593 // Branch from the entry to the appropriate place.
594 Builder.SetInsertPoint(Entry);
595 switch (Shape.ABI) {
596 case coro::ABI::Switch: {
597 // In switch-lowering, we built a resume-entry block in the original
598 // function. Make the entry block branch to this.
599 auto *SwitchBB =
600 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
601 Builder.CreateBr(SwitchBB);
602 break;
603 }
604
605 case coro::ABI::Retcon:
606 case coro::ABI::RetconOnce: {
607 // In continuation ABIs, we want to branch to immediately after the
608 // active suspend point. Earlier phases will have put the suspend in its
609 // own basic block, so just thread our jump directly to its successor.
610 auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]);
611 auto Branch = cast<BranchInst>(MappedCS->getNextNode());
612 assert(Branch->isUnconditional());
613 Builder.CreateBr(Branch->getSuccessor(0));
614 break;
615 }
616 }
617 }
618
619 /// Derive the value of the new frame pointer.
deriveNewFramePointer()620 Value *CoroCloner::deriveNewFramePointer() {
621 // Builder should be inserting to the front of the new entry block.
622
623 switch (Shape.ABI) {
624 // In switch-lowering, the argument is the frame pointer.
625 case coro::ABI::Switch:
626 return &*NewF->arg_begin();
627
628 // In continuation-lowering, the argument is the opaque storage.
629 case coro::ABI::Retcon:
630 case coro::ABI::RetconOnce: {
631 Argument *NewStorage = &*NewF->arg_begin();
632 auto FramePtrTy = Shape.FrameTy->getPointerTo();
633
634 // If the storage is inline, just bitcast to the storage to the frame type.
635 if (Shape.RetconLowering.IsFrameInlineInStorage)
636 return Builder.CreateBitCast(NewStorage, FramePtrTy);
637
638 // Otherwise, load the real frame from the opaque storage.
639 auto FramePtrPtr =
640 Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
641 return Builder.CreateLoad(FramePtrTy, FramePtrPtr);
642 }
643 }
644 llvm_unreachable("bad ABI");
645 }
646
addFramePointerAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex,uint64_t Size,Align Alignment)647 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
648 unsigned ParamIndex,
649 uint64_t Size, Align Alignment) {
650 AttrBuilder ParamAttrs;
651 ParamAttrs.addAttribute(Attribute::NonNull);
652 ParamAttrs.addAttribute(Attribute::NoAlias);
653 ParamAttrs.addAlignmentAttr(Alignment);
654 ParamAttrs.addDereferenceableAttr(Size);
655 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
656 }
657
658 /// Clone the body of the original function into a resume function of
659 /// some sort.
create()660 void CoroCloner::create() {
661 // Create the new function if we don't already have one.
662 if (!NewF) {
663 NewF = createCloneDeclaration(OrigF, Shape, Suffix,
664 OrigF.getParent()->end());
665 }
666
667 // Replace all args with undefs. The buildCoroutineFrame algorithm already
668 // rewritten access to the args that occurs after suspend points with loads
669 // and stores to/from the coroutine frame.
670 for (Argument &A : OrigF.args())
671 VMap[&A] = UndefValue::get(A.getType());
672
673 SmallVector<ReturnInst *, 4> Returns;
674
675 // Ignore attempts to change certain attributes of the function.
676 // TODO: maybe there should be a way to suppress this during cloning?
677 auto savedVisibility = NewF->getVisibility();
678 auto savedUnnamedAddr = NewF->getUnnamedAddr();
679 auto savedDLLStorageClass = NewF->getDLLStorageClass();
680
681 // NewF's linkage (which CloneFunctionInto does *not* change) might not
682 // be compatible with the visibility of OrigF (which it *does* change),
683 // so protect against that.
684 auto savedLinkage = NewF->getLinkage();
685 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
686
687 CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);
688
689 NewF->setLinkage(savedLinkage);
690 NewF->setVisibility(savedVisibility);
691 NewF->setUnnamedAddr(savedUnnamedAddr);
692 NewF->setDLLStorageClass(savedDLLStorageClass);
693
694 auto &Context = NewF->getContext();
695
696 // Replace the attributes of the new function:
697 auto OrigAttrs = NewF->getAttributes();
698 auto NewAttrs = AttributeList();
699
700 switch (Shape.ABI) {
701 case coro::ABI::Switch:
702 // Bootstrap attributes by copying function attributes from the
703 // original function. This should include optimization settings and so on.
704 NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
705 OrigAttrs.getFnAttributes());
706
707 addFramePointerAttrs(NewAttrs, Context, 0,
708 Shape.FrameSize, Shape.FrameAlign);
709 break;
710
711 case coro::ABI::Retcon:
712 case coro::ABI::RetconOnce:
713 // If we have a continuation prototype, just use its attributes,
714 // full-stop.
715 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
716
717 addFramePointerAttrs(NewAttrs, Context, 0,
718 Shape.getRetconCoroId()->getStorageSize(),
719 Shape.getRetconCoroId()->getStorageAlignment());
720 break;
721 }
722
723 switch (Shape.ABI) {
724 // In these ABIs, the cloned functions always return 'void', and the
725 // existing return sites are meaningless. Note that for unique
726 // continuations, this includes the returns associated with suspends;
727 // this is fine because we can't suspend twice.
728 case coro::ABI::Switch:
729 case coro::ABI::RetconOnce:
730 // Remove old returns.
731 for (ReturnInst *Return : Returns)
732 changeToUnreachable(Return, /*UseLLVMTrap=*/false);
733 break;
734
735 // With multi-suspend continuations, we'll already have eliminated the
736 // original returns and inserted returns before all the suspend points,
737 // so we want to leave any returns in place.
738 case coro::ABI::Retcon:
739 break;
740 }
741
742 NewF->setAttributes(NewAttrs);
743 NewF->setCallingConv(Shape.getResumeFunctionCC());
744
745 // Set up the new entry block.
746 replaceEntryBlock();
747
748 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
749 NewFramePtr = deriveNewFramePointer();
750
751 // Remap frame pointer.
752 Value *OldFramePtr = VMap[Shape.FramePtr];
753 NewFramePtr->takeName(OldFramePtr);
754 OldFramePtr->replaceAllUsesWith(NewFramePtr);
755
756 // Remap vFrame pointer.
757 auto *NewVFrame = Builder.CreateBitCast(
758 NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
759 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
760 OldVFrame->replaceAllUsesWith(NewVFrame);
761
762 switch (Shape.ABI) {
763 case coro::ABI::Switch:
764 // Rewrite final suspend handling as it is not done via switch (allows to
765 // remove final case from the switch, since it is undefined behavior to
766 // resume the coroutine suspended at the final suspend point.
767 if (Shape.SwitchLowering.HasFinalSuspend)
768 handleFinalSuspend();
769 break;
770
771 case coro::ABI::Retcon:
772 case coro::ABI::RetconOnce:
773 // Replace uses of the active suspend with the corresponding
774 // continuation-function arguments.
775 assert(ActiveSuspend != nullptr &&
776 "no active suspend when lowering a continuation-style coroutine");
777 replaceRetconSuspendUses();
778 break;
779 }
780
781 // Handle suspends.
782 replaceCoroSuspends();
783
784 // Handle swifterror.
785 replaceSwiftErrorOps();
786
787 // Remove coro.end intrinsics.
788 replaceCoroEnds();
789
790 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
791 // to suppress deallocation code.
792 if (Shape.ABI == coro::ABI::Switch)
793 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
794 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
795 }
796
797 // Create a resume clone by cloning the body of the original function, setting
798 // new entry block and replacing coro.suspend an appropriate value to force
799 // resume or cleanup pass for every suspend point.
createClone(Function & F,const Twine & Suffix,coro::Shape & Shape,CoroCloner::Kind FKind)800 static Function *createClone(Function &F, const Twine &Suffix,
801 coro::Shape &Shape, CoroCloner::Kind FKind) {
802 CoroCloner Cloner(F, Suffix, Shape, FKind);
803 Cloner.create();
804 return Cloner.getFunction();
805 }
806
807 /// Remove calls to llvm.coro.end in the original function.
removeCoroEnds(const coro::Shape & Shape,CallGraph * CG)808 static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
809 for (auto End : Shape.CoroEnds) {
810 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
811 }
812 }
813
replaceFrameSize(coro::Shape & Shape)814 static void replaceFrameSize(coro::Shape &Shape) {
815 if (Shape.CoroSizes.empty())
816 return;
817
818 // In the same function all coro.sizes should have the same result type.
819 auto *SizeIntrin = Shape.CoroSizes.back();
820 Module *M = SizeIntrin->getModule();
821 const DataLayout &DL = M->getDataLayout();
822 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
823 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
824
825 for (CoroSizeInst *CS : Shape.CoroSizes) {
826 CS->replaceAllUsesWith(SizeConstant);
827 CS->eraseFromParent();
828 }
829 }
830
831 // Create a global constant array containing pointers to functions provided and
832 // set Info parameter of CoroBegin to point at this constant. Example:
833 //
834 // @f.resumers = internal constant [2 x void(%f.frame*)*]
835 // [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
836 // define void @f() {
837 // ...
838 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
839 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
840 //
841 // Assumes that all the functions have the same signature.
setCoroInfo(Function & F,coro::Shape & Shape,ArrayRef<Function * > Fns)842 static void setCoroInfo(Function &F, coro::Shape &Shape,
843 ArrayRef<Function *> Fns) {
844 // This only works under the switch-lowering ABI because coro elision
845 // only works on the switch-lowering ABI.
846 assert(Shape.ABI == coro::ABI::Switch);
847
848 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
849 assert(!Args.empty());
850 Function *Part = *Fns.begin();
851 Module *M = Part->getParent();
852 auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
853
854 auto *ConstVal = ConstantArray::get(ArrTy, Args);
855 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
856 GlobalVariable::PrivateLinkage, ConstVal,
857 F.getName() + Twine(".resumers"));
858
859 // Update coro.begin instruction to refer to this constant.
860 LLVMContext &C = F.getContext();
861 auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
862 Shape.getSwitchCoroId()->setInfo(BC);
863 }
864
865 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
updateCoroFrame(coro::Shape & Shape,Function * ResumeFn,Function * DestroyFn,Function * CleanupFn)866 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
867 Function *DestroyFn, Function *CleanupFn) {
868 assert(Shape.ABI == coro::ABI::Switch);
869
870 IRBuilder<> Builder(Shape.FramePtr->getNextNode());
871 auto *ResumeAddr = Builder.CreateStructGEP(
872 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
873 "resume.addr");
874 Builder.CreateStore(ResumeFn, ResumeAddr);
875
876 Value *DestroyOrCleanupFn = DestroyFn;
877
878 CoroIdInst *CoroId = Shape.getSwitchCoroId();
879 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
880 // If there is a CoroAlloc and it returns false (meaning we elide the
881 // allocation, use CleanupFn instead of DestroyFn).
882 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
883 }
884
885 auto *DestroyAddr = Builder.CreateStructGEP(
886 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
887 "destroy.addr");
888 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
889 }
890
postSplitCleanup(Function & F)891 static void postSplitCleanup(Function &F) {
892 removeUnreachableBlocks(F);
893
894 // For now, we do a mandatory verification step because we don't
895 // entirely trust this pass. Note that we don't want to add a verifier
896 // pass to FPM below because it will also verify all the global data.
897 if (verifyFunction(F, &errs()))
898 report_fatal_error("Broken function");
899
900 legacy::FunctionPassManager FPM(F.getParent());
901
902 FPM.add(createSCCPPass());
903 FPM.add(createCFGSimplificationPass());
904 FPM.add(createEarlyCSEPass());
905 FPM.add(createCFGSimplificationPass());
906
907 FPM.doInitialization();
908 FPM.run(F);
909 FPM.doFinalization();
910 }
911
912 // Assuming we arrived at the block NewBlock from Prev instruction, store
913 // PHI's incoming values in the ResolvedValues map.
914 static void
scanPHIsAndUpdateValueMap(Instruction * Prev,BasicBlock * NewBlock,DenseMap<Value *,Value * > & ResolvedValues)915 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
916 DenseMap<Value *, Value *> &ResolvedValues) {
917 auto *PrevBB = Prev->getParent();
918 for (PHINode &PN : NewBlock->phis()) {
919 auto V = PN.getIncomingValueForBlock(PrevBB);
920 // See if we already resolved it.
921 auto VI = ResolvedValues.find(V);
922 if (VI != ResolvedValues.end())
923 V = VI->second;
924 // Remember the value.
925 ResolvedValues[&PN] = V;
926 }
927 }
928
929 // Replace a sequence of branches leading to a ret, with a clone of a ret
930 // instruction. Suspend instruction represented by a switch, track the PHI
931 // values and select the correct case successor when possible.
simplifyTerminatorLeadingToRet(Instruction * InitialInst)932 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
933 DenseMap<Value *, Value *> ResolvedValues;
934 BasicBlock *UnconditionalSucc = nullptr;
935
936 Instruction *I = InitialInst;
937 while (I->isTerminator() ||
938 (isa<CmpInst>(I) && I->getNextNode()->isTerminator())) {
939 if (isa<ReturnInst>(I)) {
940 if (I != InitialInst) {
941 // If InitialInst is an unconditional branch,
942 // remove PHI values that come from basic block of InitialInst
943 if (UnconditionalSucc)
944 UnconditionalSucc->removePredecessor(InitialInst->getParent(), true);
945 ReplaceInstWithInst(InitialInst, I->clone());
946 }
947 return true;
948 }
949 if (auto *BR = dyn_cast<BranchInst>(I)) {
950 if (BR->isUnconditional()) {
951 BasicBlock *BB = BR->getSuccessor(0);
952 if (I == InitialInst)
953 UnconditionalSucc = BB;
954 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
955 I = BB->getFirstNonPHIOrDbgOrLifetime();
956 continue;
957 }
958 } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
959 auto *BR = dyn_cast<BranchInst>(I->getNextNode());
960 if (BR && BR->isConditional() && CondCmp == BR->getCondition()) {
961 // If the case number of suspended switch instruction is reduced to
962 // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
963 // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
964 ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
965 if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) {
966 Value *V = CondCmp->getOperand(0);
967 auto it = ResolvedValues.find(V);
968 if (it != ResolvedValues.end())
969 V = it->second;
970
971 if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) {
972 BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue())
973 ? BR->getSuccessor(0)
974 : BR->getSuccessor(1);
975 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
976 I = BB->getFirstNonPHIOrDbgOrLifetime();
977 continue;
978 }
979 }
980 }
981 } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
982 Value *V = SI->getCondition();
983 auto it = ResolvedValues.find(V);
984 if (it != ResolvedValues.end())
985 V = it->second;
986 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
987 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
988 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
989 I = BB->getFirstNonPHIOrDbgOrLifetime();
990 continue;
991 }
992 }
993 return false;
994 }
995 return false;
996 }
997
998 // Check whether CI obeys the rules of musttail attribute.
shouldBeMustTail(const CallInst & CI,const Function & F)999 static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
1000 if (CI.isInlineAsm())
1001 return false;
1002
1003 // Match prototypes and calling conventions of resume function.
1004 FunctionType *CalleeTy = CI.getFunctionType();
1005 if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
1006 return false;
1007
1008 Type *CalleeParmTy = CalleeTy->getParamType(0);
1009 if (!CalleeParmTy->isPointerTy() ||
1010 (CalleeParmTy->getPointerAddressSpace() != 0))
1011 return false;
1012
1013 if (CI.getCallingConv() != F.getCallingConv())
1014 return false;
1015
1016 // CI should not has any ABI-impacting function attributes.
1017 static const Attribute::AttrKind ABIAttrs[] = {
1018 Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
1019 Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
1020 Attribute::SwiftSelf, Attribute::SwiftError};
1021 AttributeList Attrs = CI.getAttributes();
1022 for (auto AK : ABIAttrs)
1023 if (Attrs.hasParamAttribute(0, AK))
1024 return false;
1025
1026 return true;
1027 }
1028
1029 // Add musttail to any resume instructions that is immediately followed by a
1030 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
1031 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
1032 // This transformation is done only in the resume part of the coroutine that has
1033 // identical signature and calling convention as the coro.resume call.
addMustTailToCoroResumes(Function & F)1034 static void addMustTailToCoroResumes(Function &F) {
1035 bool changed = false;
1036
1037 // Collect potential resume instructions.
1038 SmallVector<CallInst *, 4> Resumes;
1039 for (auto &I : instructions(F))
1040 if (auto *Call = dyn_cast<CallInst>(&I))
1041 if (shouldBeMustTail(*Call, F))
1042 Resumes.push_back(Call);
1043
1044 // Set musttail on those that are followed by a ret instruction.
1045 for (CallInst *Call : Resumes)
1046 if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
1047 Call->setTailCallKind(CallInst::TCK_MustTail);
1048 changed = true;
1049 }
1050
1051 if (changed)
1052 removeUnreachableBlocks(F);
1053 }
1054
1055 // Coroutine has no suspend points. Remove heap allocation for the coroutine
1056 // frame if possible.
handleNoSuspendCoroutine(coro::Shape & Shape)1057 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1058 auto *CoroBegin = Shape.CoroBegin;
1059 auto *CoroId = CoroBegin->getId();
1060 auto *AllocInst = CoroId->getCoroAlloc();
1061 switch (Shape.ABI) {
1062 case coro::ABI::Switch: {
1063 auto SwitchId = cast<CoroIdInst>(CoroId);
1064 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
1065 if (AllocInst) {
1066 IRBuilder<> Builder(AllocInst);
1067 auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
1068 Frame->setAlignment(Shape.FrameAlign);
1069 auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
1070 AllocInst->replaceAllUsesWith(Builder.getFalse());
1071 AllocInst->eraseFromParent();
1072 CoroBegin->replaceAllUsesWith(VFrame);
1073 } else {
1074 CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1075 }
1076 break;
1077 }
1078
1079 case coro::ABI::Retcon:
1080 case coro::ABI::RetconOnce:
1081 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1082 break;
1083 }
1084
1085 CoroBegin->eraseFromParent();
1086 }
1087
1088 // SimplifySuspendPoint needs to check that there is no calls between
1089 // coro_save and coro_suspend, since any of the calls may potentially resume
1090 // the coroutine and if that is the case we cannot eliminate the suspend point.
hasCallsInBlockBetween(Instruction * From,Instruction * To)1091 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1092 for (Instruction *I = From; I != To; I = I->getNextNode()) {
1093 // Assume that no intrinsic can resume the coroutine.
1094 if (isa<IntrinsicInst>(I))
1095 continue;
1096
1097 if (isa<CallBase>(I))
1098 return true;
1099 }
1100 return false;
1101 }
1102
hasCallsInBlocksBetween(BasicBlock * SaveBB,BasicBlock * ResDesBB)1103 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1104 SmallPtrSet<BasicBlock *, 8> Set;
1105 SmallVector<BasicBlock *, 8> Worklist;
1106
1107 Set.insert(SaveBB);
1108 Worklist.push_back(ResDesBB);
1109
1110 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1111 // returns a token consumed by suspend instruction, all blocks in between
1112 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1113 while (!Worklist.empty()) {
1114 auto *BB = Worklist.pop_back_val();
1115 Set.insert(BB);
1116 for (auto *Pred : predecessors(BB))
1117 if (Set.count(Pred) == 0)
1118 Worklist.push_back(Pred);
1119 }
1120
1121 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1122 Set.erase(SaveBB);
1123 Set.erase(ResDesBB);
1124
1125 for (auto *BB : Set)
1126 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1127 return true;
1128
1129 return false;
1130 }
1131
hasCallsBetween(Instruction * Save,Instruction * ResumeOrDestroy)1132 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1133 auto *SaveBB = Save->getParent();
1134 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1135
1136 if (SaveBB == ResumeOrDestroyBB)
1137 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1138
1139 // Any calls from Save to the end of the block?
1140 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1141 return true;
1142
1143 // Any calls from begging of the block up to ResumeOrDestroy?
1144 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1145 ResumeOrDestroy))
1146 return true;
1147
1148 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1149 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1150 return true;
1151
1152 return false;
1153 }
1154
1155 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1156 // suspend point and replace it with nornal control flow.
simplifySuspendPoint(CoroSuspendInst * Suspend,CoroBeginInst * CoroBegin)1157 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1158 CoroBeginInst *CoroBegin) {
1159 Instruction *Prev = Suspend->getPrevNode();
1160 if (!Prev) {
1161 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1162 if (!Pred)
1163 return false;
1164 Prev = Pred->getTerminator();
1165 }
1166
1167 CallBase *CB = dyn_cast<CallBase>(Prev);
1168 if (!CB)
1169 return false;
1170
1171 auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1172
1173 // See if the callsite is for resumption or destruction of the coroutine.
1174 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1175 if (!SubFn)
1176 return false;
1177
1178 // Does not refer to the current coroutine, we cannot do anything with it.
1179 if (SubFn->getFrame() != CoroBegin)
1180 return false;
1181
1182 // See if the transformation is safe. Specifically, see if there are any
1183 // calls in between Save and CallInstr. They can potenitally resume the
1184 // coroutine rendering this optimization unsafe.
1185 auto *Save = Suspend->getCoroSave();
1186 if (hasCallsBetween(Save, CB))
1187 return false;
1188
1189 // Replace llvm.coro.suspend with the value that results in resumption over
1190 // the resume or cleanup path.
1191 Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1192 Suspend->eraseFromParent();
1193 Save->eraseFromParent();
1194
1195 // No longer need a call to coro.resume or coro.destroy.
1196 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
1197 BranchInst::Create(Invoke->getNormalDest(), Invoke);
1198 }
1199
1200 // Grab the CalledValue from CB before erasing the CallInstr.
1201 auto *CalledValue = CB->getCalledOperand();
1202 CB->eraseFromParent();
1203
1204 // If no more users remove it. Usually it is a bitcast of SubFn.
1205 if (CalledValue != SubFn && CalledValue->user_empty())
1206 if (auto *I = dyn_cast<Instruction>(CalledValue))
1207 I->eraseFromParent();
1208
1209 // Now we are good to remove SubFn.
1210 if (SubFn->user_empty())
1211 SubFn->eraseFromParent();
1212
1213 return true;
1214 }
1215
1216 // Remove suspend points that are simplified.
simplifySuspendPoints(coro::Shape & Shape)1217 static void simplifySuspendPoints(coro::Shape &Shape) {
1218 // Currently, the only simplification we do is switch-lowering-specific.
1219 if (Shape.ABI != coro::ABI::Switch)
1220 return;
1221
1222 auto &S = Shape.CoroSuspends;
1223 size_t I = 0, N = S.size();
1224 if (N == 0)
1225 return;
1226 while (true) {
1227 auto SI = cast<CoroSuspendInst>(S[I]);
1228 // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1229 // to resume a coroutine suspended at the final suspend point.
1230 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1231 if (--N == I)
1232 break;
1233 std::swap(S[I], S[N]);
1234 continue;
1235 }
1236 if (++I == N)
1237 break;
1238 }
1239 S.resize(N);
1240 }
1241
splitSwitchCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones)1242 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1243 SmallVectorImpl<Function *> &Clones) {
1244 assert(Shape.ABI == coro::ABI::Switch);
1245
1246 createResumeEntryBlock(F, Shape);
1247 auto ResumeClone = createClone(F, ".resume", Shape,
1248 CoroCloner::Kind::SwitchResume);
1249 auto DestroyClone = createClone(F, ".destroy", Shape,
1250 CoroCloner::Kind::SwitchUnwind);
1251 auto CleanupClone = createClone(F, ".cleanup", Shape,
1252 CoroCloner::Kind::SwitchCleanup);
1253
1254 postSplitCleanup(*ResumeClone);
1255 postSplitCleanup(*DestroyClone);
1256 postSplitCleanup(*CleanupClone);
1257
1258 addMustTailToCoroResumes(*ResumeClone);
1259
1260 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1261 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1262
1263 assert(Clones.empty());
1264 Clones.push_back(ResumeClone);
1265 Clones.push_back(DestroyClone);
1266 Clones.push_back(CleanupClone);
1267
1268 // Create a constant array referring to resume/destroy/clone functions pointed
1269 // by the last argument of @llvm.coro.info, so that CoroElide pass can
1270 // determined correct function to call.
1271 setCoroInfo(F, Shape, Clones);
1272 }
1273
splitRetconCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones)1274 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1275 SmallVectorImpl<Function *> &Clones) {
1276 assert(Shape.ABI == coro::ABI::Retcon ||
1277 Shape.ABI == coro::ABI::RetconOnce);
1278 assert(Clones.empty());
1279
1280 // Reset various things that the optimizer might have decided it
1281 // "knows" about the coroutine function due to not seeing a return.
1282 F.removeFnAttr(Attribute::NoReturn);
1283 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
1284 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
1285
1286 // Allocate the frame.
1287 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1288 Value *RawFramePtr;
1289 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1290 RawFramePtr = Id->getStorage();
1291 } else {
1292 IRBuilder<> Builder(Id);
1293
1294 // Determine the size of the frame.
1295 const DataLayout &DL = F.getParent()->getDataLayout();
1296 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1297
1298 // Allocate. We don't need to update the call graph node because we're
1299 // going to recompute it from scratch after splitting.
1300 // FIXME: pass the required alignment
1301 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1302 RawFramePtr =
1303 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1304
1305 // Stash the allocated frame pointer in the continuation storage.
1306 auto Dest = Builder.CreateBitCast(Id->getStorage(),
1307 RawFramePtr->getType()->getPointerTo());
1308 Builder.CreateStore(RawFramePtr, Dest);
1309 }
1310
1311 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1312 {
1313 // Make sure we don't invalidate Shape.FramePtr.
1314 TrackingVH<Instruction> Handle(Shape.FramePtr);
1315 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1316 Shape.FramePtr = Handle.getValPtr();
1317 }
1318
1319 // Create a unique return block.
1320 BasicBlock *ReturnBB = nullptr;
1321 SmallVector<PHINode *, 4> ReturnPHIs;
1322
1323 // Create all the functions in order after the main function.
1324 auto NextF = std::next(F.getIterator());
1325
1326 // Create a continuation function for each of the suspend points.
1327 Clones.reserve(Shape.CoroSuspends.size());
1328 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1329 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1330
1331 // Create the clone declaration.
1332 auto Continuation =
1333 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);
1334 Clones.push_back(Continuation);
1335
1336 // Insert a branch to the unified return block immediately before
1337 // the suspend point.
1338 auto SuspendBB = Suspend->getParent();
1339 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1340 auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1341
1342 // Create the unified return block.
1343 if (!ReturnBB) {
1344 // Place it before the first suspend.
1345 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1346 NewSuspendBB);
1347 Shape.RetconLowering.ReturnBlock = ReturnBB;
1348
1349 IRBuilder<> Builder(ReturnBB);
1350
1351 // Create PHIs for all the return values.
1352 assert(ReturnPHIs.empty());
1353
1354 // First, the continuation.
1355 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1356 Shape.CoroSuspends.size()));
1357
1358 // Next, all the directly-yielded values.
1359 for (auto ResultTy : Shape.getRetconResultTypes())
1360 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1361 Shape.CoroSuspends.size()));
1362
1363 // Build the return value.
1364 auto RetTy = F.getReturnType();
1365
1366 // Cast the continuation value if necessary.
1367 // We can't rely on the types matching up because that type would
1368 // have to be infinite.
1369 auto CastedContinuationTy =
1370 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1371 auto *CastedContinuation =
1372 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1373
1374 Value *RetV;
1375 if (ReturnPHIs.size() == 1) {
1376 RetV = CastedContinuation;
1377 } else {
1378 RetV = UndefValue::get(RetTy);
1379 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1380 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1381 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1382 }
1383
1384 Builder.CreateRet(RetV);
1385 }
1386
1387 // Branch to the return block.
1388 Branch->setSuccessor(0, ReturnBB);
1389 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1390 size_t NextPHIIndex = 1;
1391 for (auto &VUse : Suspend->value_operands())
1392 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1393 assert(NextPHIIndex == ReturnPHIs.size());
1394 }
1395
1396 assert(Clones.size() == Shape.CoroSuspends.size());
1397 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1398 auto Suspend = Shape.CoroSuspends[i];
1399 auto Clone = Clones[i];
1400
1401 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1402 }
1403 }
1404
1405 namespace {
1406 class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1407 Function &F;
1408 public:
PrettyStackTraceFunction(Function & F)1409 PrettyStackTraceFunction(Function &F) : F(F) {}
print(raw_ostream & OS) const1410 void print(raw_ostream &OS) const override {
1411 OS << "While splitting coroutine ";
1412 F.printAsOperand(OS, /*print type*/ false, F.getParent());
1413 OS << "\n";
1414 }
1415 };
1416 }
1417
splitCoroutine(Function & F,SmallVectorImpl<Function * > & Clones)1418 static coro::Shape splitCoroutine(Function &F,
1419 SmallVectorImpl<Function *> &Clones) {
1420 PrettyStackTraceFunction prettyStackTrace(F);
1421
1422 // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1423 // up by uses in unreachable blocks, so remove them as a first pass.
1424 removeUnreachableBlocks(F);
1425
1426 coro::Shape Shape(F);
1427 if (!Shape.CoroBegin)
1428 return Shape;
1429
1430 simplifySuspendPoints(Shape);
1431 buildCoroutineFrame(F, Shape);
1432 replaceFrameSize(Shape);
1433
1434 // If there are no suspend points, no split required, just remove
1435 // the allocation and deallocation blocks, they are not needed.
1436 if (Shape.CoroSuspends.empty()) {
1437 handleNoSuspendCoroutine(Shape);
1438 } else {
1439 switch (Shape.ABI) {
1440 case coro::ABI::Switch:
1441 splitSwitchCoroutine(F, Shape, Clones);
1442 break;
1443 case coro::ABI::Retcon:
1444 case coro::ABI::RetconOnce:
1445 splitRetconCoroutine(F, Shape, Clones);
1446 break;
1447 }
1448 }
1449
1450 // Replace all the swifterror operations in the original function.
1451 // This invalidates SwiftErrorOps in the Shape.
1452 replaceSwiftErrorOps(F, Shape, nullptr);
1453
1454 return Shape;
1455 }
1456
1457 static void
updateCallGraphAfterCoroutineSplit(Function & F,const coro::Shape & Shape,const SmallVectorImpl<Function * > & Clones,CallGraph & CG,CallGraphSCC & SCC)1458 updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape,
1459 const SmallVectorImpl<Function *> &Clones,
1460 CallGraph &CG, CallGraphSCC &SCC) {
1461 if (!Shape.CoroBegin)
1462 return;
1463
1464 removeCoroEnds(Shape, &CG);
1465 postSplitCleanup(F);
1466
1467 // Update call graph and add the functions we created to the SCC.
1468 coro::updateCallGraph(F, Clones, CG, SCC);
1469 }
1470
updateCallGraphAfterCoroutineSplit(LazyCallGraph::Node & N,const coro::Shape & Shape,const SmallVectorImpl<Function * > & Clones,LazyCallGraph::SCC & C,LazyCallGraph & CG,CGSCCAnalysisManager & AM,CGSCCUpdateResult & UR,FunctionAnalysisManager & FAM)1471 static void updateCallGraphAfterCoroutineSplit(
1472 LazyCallGraph::Node &N, const coro::Shape &Shape,
1473 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
1474 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
1475 FunctionAnalysisManager &FAM) {
1476 if (!Shape.CoroBegin)
1477 return;
1478
1479 for (llvm::CoroEndInst *End : Shape.CoroEnds) {
1480 auto &Context = End->getContext();
1481 End->replaceAllUsesWith(ConstantInt::getFalse(Context));
1482 End->eraseFromParent();
1483 }
1484
1485 postSplitCleanup(N.getFunction());
1486
1487 // To insert the newly created coroutine funclets 'f.resume', 'f.destroy', and
1488 // 'f.cleanup' into the same SCC as the coroutine 'f' they were outlined from,
1489 // we make use of the CallGraphUpdater class, which can modify the internal
1490 // state of the LazyCallGraph.
1491 for (Function *Clone : Clones)
1492 CG.addNewFunctionIntoRefSCC(*Clone, C.getOuterRefSCC());
1493
1494 // We've inserted instructions into coroutine 'f' that reference the three new
1495 // coroutine funclets. We must now update the call graph so that reference
1496 // edges between 'f' and its funclets are added to it. LazyCallGraph only
1497 // allows CGSCC passes to insert "trivial" reference edges. We've ensured
1498 // above, by inserting the funclets into the same SCC as the corutine, that
1499 // the edges are trivial.
1500 //
1501 // N.B.: If we didn't update the call graph here, a CGSCCToFunctionPassAdaptor
1502 // later in this CGSCC pass pipeline may be run, triggering a call graph
1503 // update of its own. Function passes run by the adaptor are not permitted to
1504 // add new edges of any kind to the graph, and the new edges inserted by this
1505 // pass would be misattributed to that unrelated function pass.
1506 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
1507 }
1508
1509 // When we see the coroutine the first time, we insert an indirect call to a
1510 // devirt trigger function and mark the coroutine that it is now ready for
1511 // split.
prepareForSplit(Function & F,CallGraph & CG)1512 static void prepareForSplit(Function &F, CallGraph &CG) {
1513 Module &M = *F.getParent();
1514 LLVMContext &Context = F.getContext();
1515 #ifndef NDEBUG
1516 Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
1517 assert(DevirtFn && "coro.devirt.trigger function not found");
1518 #endif
1519
1520 F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
1521
1522 // Insert an indirect call sequence that will be devirtualized by CoroElide
1523 // pass:
1524 // %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
1525 // %1 = bitcast i8* %0 to void(i8*)*
1526 // call void %1(i8* null)
1527 coro::LowererBase Lowerer(M);
1528 Instruction *InsertPt = F.getEntryBlock().getTerminator();
1529 auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
1530 auto *DevirtFnAddr =
1531 Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
1532 FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
1533 {Type::getInt8PtrTy(Context)}, false);
1534 auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
1535
1536 // Update CG graph with an indirect call we just added.
1537 CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
1538 }
1539
1540 // Make sure that there is a devirtualization trigger function that the
1541 // coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
1542 // trigger function is not found, we will create one and add it to the current
1543 // SCC.
createDevirtTriggerFunc(CallGraph & CG,CallGraphSCC & SCC)1544 static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
1545 Module &M = CG.getModule();
1546 if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
1547 return;
1548
1549 LLVMContext &C = M.getContext();
1550 auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
1551 /*isVarArg=*/false);
1552 Function *DevirtFn =
1553 Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
1554 CORO_DEVIRT_TRIGGER_FN, &M);
1555 DevirtFn->addFnAttr(Attribute::AlwaysInline);
1556 auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
1557 ReturnInst::Create(C, Entry);
1558
1559 auto *Node = CG.getOrInsertFunction(DevirtFn);
1560
1561 SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
1562 Nodes.push_back(Node);
1563 SCC.initialize(Nodes);
1564 }
1565
1566 /// Replace a call to llvm.coro.prepare.retcon.
replacePrepare(CallInst * Prepare,CallGraph & CG)1567 static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
1568 auto CastFn = Prepare->getArgOperand(0); // as an i8*
1569 auto Fn = CastFn->stripPointerCasts(); // as its original type
1570
1571 // Find call graph nodes for the preparation.
1572 CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
1573 if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
1574 PrepareUserNode = CG[Prepare->getFunction()];
1575 FnNode = CG[ConcreteFn];
1576 }
1577
1578 // Attempt to peephole this pattern:
1579 // %0 = bitcast [[TYPE]] @some_function to i8*
1580 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
1581 // %2 = bitcast %1 to [[TYPE]]
1582 // ==>
1583 // %2 = @some_function
1584 for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
1585 UI != UE; ) {
1586 // Look for bitcasts back to the original function type.
1587 auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
1588 if (!Cast || Cast->getType() != Fn->getType()) continue;
1589
1590 // Check whether the replacement will introduce new direct calls.
1591 // If so, we'll need to update the call graph.
1592 if (PrepareUserNode) {
1593 for (auto &Use : Cast->uses()) {
1594 if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
1595 if (!CB->isCallee(&Use))
1596 continue;
1597 PrepareUserNode->removeCallEdgeFor(*CB);
1598 PrepareUserNode->addCalledFunction(CB, FnNode);
1599 }
1600 }
1601 }
1602
1603 // Replace and remove the cast.
1604 Cast->replaceAllUsesWith(Fn);
1605 Cast->eraseFromParent();
1606 }
1607
1608 // Replace any remaining uses with the function as an i8*.
1609 // This can never directly be a callee, so we don't need to update CG.
1610 Prepare->replaceAllUsesWith(CastFn);
1611 Prepare->eraseFromParent();
1612
1613 // Kill dead bitcasts.
1614 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
1615 if (!Cast->use_empty()) break;
1616 CastFn = Cast->getOperand(0);
1617 Cast->eraseFromParent();
1618 }
1619 }
1620
1621 /// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
1622 /// IPO from operating on calls to a retcon coroutine before it's been
1623 /// split. This is only safe to do after we've split all retcon
1624 /// coroutines in the module. We can do that this in this pass because
1625 /// this pass does promise to split all retcon coroutines (as opposed to
1626 /// switch coroutines, which are lowered in multiple stages).
replaceAllPrepares(Function * PrepareFn,CallGraph & CG)1627 static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
1628 bool Changed = false;
1629 for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
1630 PI != PE; ) {
1631 // Intrinsics can only be used in calls.
1632 auto *Prepare = cast<CallInst>((PI++)->getUser());
1633 replacePrepare(Prepare, CG);
1634 Changed = true;
1635 }
1636
1637 return Changed;
1638 }
1639
declaresCoroSplitIntrinsics(const Module & M)1640 static bool declaresCoroSplitIntrinsics(const Module &M) {
1641 return coro::declaresIntrinsics(
1642 M, {"llvm.coro.begin", "llvm.coro.prepare.retcon"});
1643 }
1644
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)1645 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
1646 CGSCCAnalysisManager &AM,
1647 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
1648 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
1649 // non-zero number of nodes, so we assume that here and grab the first
1650 // node's function's module.
1651 Module &M = *C.begin()->getFunction().getParent();
1652 auto &FAM =
1653 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
1654
1655 if (!declaresCoroSplitIntrinsics(M))
1656 return PreservedAnalyses::all();
1657
1658 // Check for uses of llvm.coro.prepare.retcon.
1659 const auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon");
1660 if (PrepareFn && PrepareFn->use_empty())
1661 PrepareFn = nullptr;
1662
1663 // Find coroutines for processing.
1664 SmallVector<LazyCallGraph::Node *, 4> Coroutines;
1665 for (LazyCallGraph::Node &N : C)
1666 if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR))
1667 Coroutines.push_back(&N);
1668
1669 if (Coroutines.empty() && !PrepareFn)
1670 return PreservedAnalyses::all();
1671
1672 if (Coroutines.empty())
1673 llvm_unreachable("new pass manager cannot yet handle "
1674 "'llvm.coro.prepare.retcon'");
1675
1676 // Split all the coroutines.
1677 for (LazyCallGraph::Node *N : Coroutines) {
1678 Function &F = N->getFunction();
1679 Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR);
1680 StringRef Value = Attr.getValueAsString();
1681 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
1682 << "' state: " << Value << "\n");
1683 if (Value == UNPREPARED_FOR_SPLIT) {
1684 // Enqueue a second iteration of the CGSCC pipeline.
1685 // N.B.:
1686 // The CoroSplitLegacy pass "triggers" a restart of the CGSCC pass
1687 // pipeline by inserting an indirect function call that the
1688 // CoroElideLegacy pass then replaces with a direct function call. The
1689 // legacy CGSCC pipeline's implicit behavior was as if wrapped in the new
1690 // pass manager abstraction DevirtSCCRepeatedPass.
1691 //
1692 // This pass does not need to "trigger" another run of the pipeline.
1693 // Instead, it simply enqueues the same RefSCC onto the pipeline's
1694 // worklist.
1695 UR.CWorklist.insert(&C);
1696 F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
1697 continue;
1698 }
1699 F.removeFnAttr(CORO_PRESPLIT_ATTR);
1700
1701 SmallVector<Function *, 4> Clones;
1702 const coro::Shape Shape = splitCoroutine(F, Clones);
1703 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
1704 }
1705
1706 if (PrepareFn)
1707 llvm_unreachable("new pass manager cannot yet handle "
1708 "'llvm.coro.prepare.retcon'");
1709
1710 return PreservedAnalyses::none();
1711 }
1712
1713 namespace {
1714
1715 // We present a coroutine to LLVM as an ordinary function with suspension
1716 // points marked up with intrinsics. We let the optimizer party on the coroutine
1717 // as a single function for as long as possible. Shortly before the coroutine is
1718 // eligible to be inlined into its callers, we split up the coroutine into parts
1719 // corresponding to initial, resume and destroy invocations of the coroutine,
1720 // add them to the current SCC and restart the IPO pipeline to optimize the
1721 // coroutine subfunctions we extracted before proceeding to the caller of the
1722 // coroutine.
1723 struct CoroSplitLegacy : public CallGraphSCCPass {
1724 static char ID; // Pass identification, replacement for typeid
1725
CoroSplitLegacy__anonef49c9d50411::CoroSplitLegacy1726 CoroSplitLegacy() : CallGraphSCCPass(ID) {
1727 initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
1728 }
1729
1730 bool Run = false;
1731
1732 // A coroutine is identified by the presence of coro.begin intrinsic, if
1733 // we don't have any, this pass has nothing to do.
doInitialization__anonef49c9d50411::CoroSplitLegacy1734 bool doInitialization(CallGraph &CG) override {
1735 Run = declaresCoroSplitIntrinsics(CG.getModule());
1736 return CallGraphSCCPass::doInitialization(CG);
1737 }
1738
runOnSCC__anonef49c9d50411::CoroSplitLegacy1739 bool runOnSCC(CallGraphSCC &SCC) override {
1740 if (!Run)
1741 return false;
1742
1743 // Check for uses of llvm.coro.prepare.retcon.
1744 auto PrepareFn =
1745 SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon");
1746 if (PrepareFn && PrepareFn->use_empty())
1747 PrepareFn = nullptr;
1748
1749 // Find coroutines for processing.
1750 SmallVector<Function *, 4> Coroutines;
1751 for (CallGraphNode *CGN : SCC)
1752 if (auto *F = CGN->getFunction())
1753 if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
1754 Coroutines.push_back(F);
1755
1756 if (Coroutines.empty() && !PrepareFn)
1757 return false;
1758
1759 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
1760
1761 if (Coroutines.empty())
1762 return replaceAllPrepares(PrepareFn, CG);
1763
1764 createDevirtTriggerFunc(CG, SCC);
1765
1766 // Split all the coroutines.
1767 for (Function *F : Coroutines) {
1768 Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
1769 StringRef Value = Attr.getValueAsString();
1770 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
1771 << "' state: " << Value << "\n");
1772 if (Value == UNPREPARED_FOR_SPLIT) {
1773 prepareForSplit(*F, CG);
1774 continue;
1775 }
1776 F->removeFnAttr(CORO_PRESPLIT_ATTR);
1777
1778 SmallVector<Function *, 4> Clones;
1779 const coro::Shape Shape = splitCoroutine(*F, Clones);
1780 updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC);
1781 }
1782
1783 if (PrepareFn)
1784 replaceAllPrepares(PrepareFn, CG);
1785
1786 return true;
1787 }
1788
getAnalysisUsage__anonef49c9d50411::CoroSplitLegacy1789 void getAnalysisUsage(AnalysisUsage &AU) const override {
1790 CallGraphSCCPass::getAnalysisUsage(AU);
1791 }
1792
getPassName__anonef49c9d50411::CoroSplitLegacy1793 StringRef getPassName() const override { return "Coroutine Splitting"; }
1794 };
1795
1796 } // end anonymous namespace
1797
1798 char CoroSplitLegacy::ID = 0;
1799
1800 INITIALIZE_PASS_BEGIN(
1801 CoroSplitLegacy, "coro-split",
1802 "Split coroutine into a set of functions driving its state machine", false,
1803 false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)1804 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
1805 INITIALIZE_PASS_END(
1806 CoroSplitLegacy, "coro-split",
1807 "Split coroutine into a set of functions driving its state machine", false,
1808 false)
1809
1810 Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); }
1811