1 //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass extracts the specified basic blocks from the module into their
10 // own functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/BlockExtractor.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/PassManager.h"
20 #include "llvm/InitializePasses.h"
21 #include "llvm/Pass.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Transforms/IPO.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 #include "llvm/Transforms/Utils/CodeExtractor.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "block-extractor"
32 
33 STATISTIC(NumExtracted, "Number of basic blocks extracted");
34 
35 static cl::opt<std::string> BlockExtractorFile(
36     "extract-blocks-file", cl::value_desc("filename"),
37     cl::desc("A file containing list of basic blocks to extract"), cl::Hidden);
38 
39 static cl::opt<bool>
40     BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
41                              cl::desc("Erase the existing functions"),
42                              cl::Hidden);
43 namespace {
44 class BlockExtractor {
45 public:
46   BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
47   bool runOnModule(Module &M);
48   void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
49                 &GroupsOfBlocksToExtract) {
50     for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
51          GroupsOfBlocksToExtract) {
52       SmallVector<BasicBlock *, 16> NewGroup;
53       NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end());
54       GroupsOfBlocks.emplace_back(NewGroup);
55     }
56     if (!BlockExtractorFile.empty())
57       loadFile();
58   }
59 
60 private:
61   SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
62   bool EraseFunctions;
63   /// Map a function name to groups of blocks.
64   SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
65       BlocksByName;
66 
67   void loadFile();
68   void splitLandingPadPreds(Function &F);
69 };
70 
71 class BlockExtractorLegacyPass : public ModulePass {
72   BlockExtractor BE;
73   bool runOnModule(Module &M) override;
74 
75 public:
76   static char ID;
77   BlockExtractorLegacyPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
78                            bool EraseFunctions)
79       : ModulePass(ID), BE(EraseFunctions) {
80     // We want one group per element of the input list.
81     SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
82     for (BasicBlock *BB : BlocksToExtract) {
83       SmallVector<BasicBlock *, 16> NewGroup;
84       NewGroup.push_back(BB);
85       MassagedGroupsOfBlocks.push_back(NewGroup);
86     }
87     BE.init(MassagedGroupsOfBlocks);
88   }
89 
90   BlockExtractorLegacyPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
91                                &GroupsOfBlocksToExtract,
92                            bool EraseFunctions)
93       : ModulePass(ID), BE(EraseFunctions) {
94     BE.init(GroupsOfBlocksToExtract);
95   }
96 
97   BlockExtractorLegacyPass()
98       : BlockExtractorLegacyPass(SmallVector<BasicBlock *, 0>(), false) {}
99 };
100 
101 } // end anonymous namespace
102 
103 char BlockExtractorLegacyPass::ID = 0;
104 INITIALIZE_PASS(BlockExtractorLegacyPass, "extract-blocks",
105                 "Extract basic blocks from module", false, false)
106 
107 ModulePass *llvm::createBlockExtractorPass() {
108   return new BlockExtractorLegacyPass();
109 }
110 ModulePass *llvm::createBlockExtractorPass(
111     const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
112   return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions);
113 }
114 ModulePass *llvm::createBlockExtractorPass(
115     const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
116         &GroupsOfBlocksToExtract,
117     bool EraseFunctions) {
118   return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions);
119 }
120 
121 /// Gets all of the blocks specified in the input file.
122 void BlockExtractor::loadFile() {
123   auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile);
124   if (ErrOrBuf.getError())
125     report_fatal_error("BlockExtractor couldn't load the file.");
126   // Read the file.
127   auto &Buf = *ErrOrBuf;
128   SmallVector<StringRef, 16> Lines;
129   Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
130                          /*KeepEmpty=*/false);
131   for (const auto &Line : Lines) {
132     SmallVector<StringRef, 4> LineSplit;
133     Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
134                /*KeepEmpty=*/false);
135     if (LineSplit.empty())
136       continue;
137     if (LineSplit.size()!=2)
138       report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
139                          /*GenCrashDiag=*/false);
140     SmallVector<StringRef, 4> BBNames;
141     LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
142                        /*KeepEmpty=*/false);
143     if (BBNames.empty())
144       report_fatal_error("Missing bbs name");
145     BlocksByName.push_back(
146         {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}});
147   }
148 }
149 
150 /// Extracts the landing pads to make sure all of them have only one
151 /// predecessor.
152 void BlockExtractor::splitLandingPadPreds(Function &F) {
153   for (BasicBlock &BB : F) {
154     for (Instruction &I : BB) {
155       if (!isa<InvokeInst>(&I))
156         continue;
157       InvokeInst *II = cast<InvokeInst>(&I);
158       BasicBlock *Parent = II->getParent();
159       BasicBlock *LPad = II->getUnwindDest();
160 
161       // Look through the landing pad's predecessors. If one of them ends in an
162       // 'invoke', then we want to split the landing pad.
163       bool Split = false;
164       for (auto PredBB : predecessors(LPad)) {
165         if (PredBB->isLandingPad() && PredBB != Parent &&
166             isa<InvokeInst>(Parent->getTerminator())) {
167           Split = true;
168           break;
169         }
170       }
171 
172       if (!Split)
173         continue;
174 
175       SmallVector<BasicBlock *, 2> NewBBs;
176       SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
177     }
178   }
179 }
180 
181 bool BlockExtractor::runOnModule(Module &M) {
182 
183   bool Changed = false;
184 
185   // Get all the functions.
186   SmallVector<Function *, 4> Functions;
187   for (Function &F : M) {
188     splitLandingPadPreds(F);
189     Functions.push_back(&F);
190   }
191 
192   // Get all the blocks specified in the input file.
193   unsigned NextGroupIdx = GroupsOfBlocks.size();
194   GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
195   for (const auto &BInfo : BlocksByName) {
196     Function *F = M.getFunction(BInfo.first);
197     if (!F)
198       report_fatal_error("Invalid function name specified in the input file",
199                          /*GenCrashDiag=*/false);
200     for (const auto &BBInfo : BInfo.second) {
201       auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
202         return BB.getName().equals(BBInfo);
203       });
204       if (Res == F->end())
205         report_fatal_error("Invalid block name specified in the input file",
206                            /*GenCrashDiag=*/false);
207       GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
208     }
209     ++NextGroupIdx;
210   }
211 
212   // Extract each group of basic blocks.
213   for (auto &BBs : GroupsOfBlocks) {
214     SmallVector<BasicBlock *, 32> BlocksToExtractVec;
215     for (BasicBlock *BB : BBs) {
216       // Check if the module contains BB.
217       if (BB->getParent()->getParent() != &M)
218         report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
219       LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
220                         << BB->getParent()->getName() << ":" << BB->getName()
221                         << "\n");
222       BlocksToExtractVec.push_back(BB);
223       if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
224         BlocksToExtractVec.push_back(II->getUnwindDest());
225       ++NumExtracted;
226       Changed = true;
227     }
228     CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
229     Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
230     if (F)
231       LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
232                         << "' in: " << F->getName() << '\n');
233     else
234       LLVM_DEBUG(dbgs() << "Failed to extract for group '"
235                         << (*BBs.begin())->getName() << "'\n");
236   }
237 
238   // Erase the functions.
239   if (EraseFunctions || BlockExtractorEraseFuncs) {
240     for (Function *F : Functions) {
241       LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName()
242                         << "\n");
243       F->deleteBody();
244     }
245     // Set linkage as ExternalLinkage to avoid erasing unreachable functions.
246     for (Function &F : M)
247       F.setLinkage(GlobalValue::ExternalLinkage);
248     Changed = true;
249   }
250 
251   return Changed;
252 }
253 
254 bool BlockExtractorLegacyPass::runOnModule(Module &M) {
255   return BE.runOnModule(M);
256 }
257 
258 PreservedAnalyses BlockExtractorPass::run(Module &M,
259                                           ModuleAnalysisManager &AM) {
260   BlockExtractor BE(false);
261   BE.init(SmallVector<SmallVector<BasicBlock *, 16>, 0>());
262   return BE.runOnModule(M) ? PreservedAnalyses::none()
263                            : PreservedAnalyses::all();
264 }
265