1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "CFGMST.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Sequence.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Analysis/BlockFrequencyInfo.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/Analysis/EHPersonalities.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstIterator.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/CRC.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/Path.h"
44 #include "llvm/Support/Regex.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Transforms/Instrumentation.h"
47 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
48 #include "llvm/Transforms/Utils/ModuleUtils.h"
49 #include <algorithm>
50 #include <memory>
51 #include <string>
52 #include <utility>
53 
54 using namespace llvm;
55 namespace endian = llvm::support::endian;
56 
57 #define DEBUG_TYPE "insert-gcov-profiling"
58 
59 enum : uint32_t {
60   GCOV_ARC_ON_TREE = 1 << 0,
61 
62   GCOV_TAG_FUNCTION = 0x01000000,
63   GCOV_TAG_BLOCKS = 0x01410000,
64   GCOV_TAG_ARCS = 0x01430000,
65   GCOV_TAG_LINES = 0x01450000,
66 };
67 
68 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
69                                                cl::init("408*"), cl::Hidden,
70                                                cl::ValueRequired);
71 
72 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
73                                    cl::desc("Make counter updates atomic"));
74 
75 // Returns the number of words which will be used to represent this string.
wordsOfString(StringRef s)76 static unsigned wordsOfString(StringRef s) {
77   // Length + NUL-terminated string + 0~3 padding NULs.
78   return (s.size() / 4) + 2;
79 }
80 
getDefault()81 GCOVOptions GCOVOptions::getDefault() {
82   GCOVOptions Options;
83   Options.EmitNotes = true;
84   Options.EmitData = true;
85   Options.NoRedZone = false;
86   Options.Atomic = AtomicCounter;
87 
88   if (DefaultGCOVVersion.size() != 4) {
89     llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
90                              DefaultGCOVVersion);
91   }
92   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
93   return Options;
94 }
95 
96 namespace {
97 class GCOVFunction;
98 
99 class GCOVProfiler {
100 public:
GCOVProfiler()101   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
GCOVProfiler(const GCOVOptions & Opts)102   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
103   bool
104   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
105               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
106               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
107 
write(uint32_t i)108   void write(uint32_t i) {
109     char Bytes[4];
110     endian::write32(Bytes, i, Endian);
111     os->write(Bytes, 4);
112   }
writeString(StringRef s)113   void writeString(StringRef s) {
114     write(wordsOfString(s) - 1);
115     os->write(s.data(), s.size());
116     os->write_zeros(4 - s.size() % 4);
117   }
writeBytes(const char * Bytes,int Size)118   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
119 
120 private:
121   // Create the .gcno files for the Module based on DebugInfo.
122   bool
123   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
124                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
125                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
126                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
127 
128   Function *createInternalFunction(FunctionType *FTy, StringRef Name);
129   void emitGlobalConstructor(
130       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
131 
132   bool isFunctionInstrumented(const Function &F);
133   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
134   static bool doesFilenameMatchARegex(StringRef Filename,
135                                       std::vector<Regex> &Regexes);
136 
137   // Get pointers to the functions in the runtime library.
138   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
139   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
140   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
141   FunctionCallee getSummaryInfoFunc();
142   FunctionCallee getEndFileFunc();
143 
144   // Add the function to write out all our counters to the global destructor
145   // list.
146   Function *
147   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
148   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
149 
150   bool AddFlushBeforeForkAndExec();
151 
152   enum class GCovFileType { GCNO, GCDA };
153   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
154 
155   GCOVOptions Options;
156   support::endianness Endian;
157   raw_ostream *os;
158 
159   // Checksum, produced by hash of EdgeDestinations
160   SmallVector<uint32_t, 4> FileChecksums;
161 
162   Module *M = nullptr;
163   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
164   LLVMContext *Ctx = nullptr;
165   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
166   std::vector<Regex> FilterRe;
167   std::vector<Regex> ExcludeRe;
168   DenseSet<const BasicBlock *> ExecBlocks;
169   StringMap<bool> InstrumentedFiles;
170 };
171 
172 class GCOVProfilerLegacyPass : public ModulePass {
173 public:
174   static char ID;
GCOVProfilerLegacyPass()175   GCOVProfilerLegacyPass()
176       : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {}
GCOVProfilerLegacyPass(const GCOVOptions & Opts)177   GCOVProfilerLegacyPass(const GCOVOptions &Opts)
178       : ModulePass(ID), Profiler(Opts) {
179     initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
180   }
getPassName() const181   StringRef getPassName() const override { return "GCOV Profiler"; }
182 
runOnModule(Module & M)183   bool runOnModule(Module &M) override {
184     auto GetBFI = [this](Function &F) {
185       return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
186     };
187     auto GetBPI = [this](Function &F) {
188       return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
189     };
190     auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
191       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
192     };
193     return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI);
194   }
195 
getAnalysisUsage(AnalysisUsage & AU) const196   void getAnalysisUsage(AnalysisUsage &AU) const override {
197     AU.addRequired<BlockFrequencyInfoWrapperPass>();
198     AU.addRequired<TargetLibraryInfoWrapperPass>();
199   }
200 
201 private:
202   GCOVProfiler Profiler;
203 };
204 
205 struct BBInfo {
206   BBInfo *Group;
207   uint32_t Index;
208   uint32_t Rank = 0;
209 
BBInfo__anonea1724380211::BBInfo210   BBInfo(unsigned Index) : Group(this), Index(Index) {}
infoString__anonea1724380211::BBInfo211   std::string infoString() const {
212     return (Twine("Index=") + Twine(Index)).str();
213   }
214 };
215 
216 struct Edge {
217   // This class implements the CFG edges. Note the CFG can be a multi-graph.
218   // So there might be multiple edges with same SrcBB and DestBB.
219   const BasicBlock *SrcBB;
220   const BasicBlock *DestBB;
221   uint64_t Weight;
222   BasicBlock *Place = nullptr;
223   uint32_t SrcNumber, DstNumber;
224   bool InMST = false;
225   bool Removed = false;
226   bool IsCritical = false;
227 
Edge__anonea1724380211::Edge228   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
229       : SrcBB(Src), DestBB(Dest), Weight(W) {}
230 
231   // Return the information string of an edge.
infoString__anonea1724380211::Edge232   std::string infoString() const {
233     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
234             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
235         .str();
236   }
237 };
238 }
239 
240 char GCOVProfilerLegacyPass::ID = 0;
241 INITIALIZE_PASS_BEGIN(
242     GCOVProfilerLegacyPass, "insert-gcov-profiling",
243     "Insert instrumentation for GCOV profiling", false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)244 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
245 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
246 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
247 INITIALIZE_PASS_END(
248     GCOVProfilerLegacyPass, "insert-gcov-profiling",
249     "Insert instrumentation for GCOV profiling", false, false)
250 
251 ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
252   return new GCOVProfilerLegacyPass(Options);
253 }
254 
getFunctionName(const DISubprogram * SP)255 static StringRef getFunctionName(const DISubprogram *SP) {
256   if (!SP->getLinkageName().empty())
257     return SP->getLinkageName();
258   return SP->getName();
259 }
260 
261 /// Extract a filename for a DISubprogram.
262 ///
263 /// Prefer relative paths in the coverage notes. Clang also may split
264 /// up absolute paths into a directory and filename component. When
265 /// the relative path doesn't exist, reconstruct the absolute path.
getFilename(const DISubprogram * SP)266 static SmallString<128> getFilename(const DISubprogram *SP) {
267   SmallString<128> Path;
268   StringRef RelPath = SP->getFilename();
269   if (sys::fs::exists(RelPath))
270     Path = RelPath;
271   else
272     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
273   return Path;
274 }
275 
276 namespace {
277   class GCOVRecord {
278   protected:
279     GCOVProfiler *P;
280 
GCOVRecord(GCOVProfiler * P)281     GCOVRecord(GCOVProfiler *P) : P(P) {}
282 
write(uint32_t i)283     void write(uint32_t i) { P->write(i); }
writeString(StringRef s)284     void writeString(StringRef s) { P->writeString(s); }
writeBytes(const char * Bytes,int Size)285     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
286   };
287 
288   class GCOVFunction;
289   class GCOVBlock;
290 
291   // Constructed only by requesting it from a GCOVBlock, this object stores a
292   // list of line numbers and a single filename, representing lines that belong
293   // to the block.
294   class GCOVLines : public GCOVRecord {
295    public:
addLine(uint32_t Line)296     void addLine(uint32_t Line) {
297       assert(Line != 0 && "Line zero is not a valid real line number.");
298       Lines.push_back(Line);
299     }
300 
length() const301     uint32_t length() const {
302       return 1 + wordsOfString(Filename) + Lines.size();
303     }
304 
writeOut()305     void writeOut() {
306       write(0);
307       writeString(Filename);
308       for (int i = 0, e = Lines.size(); i != e; ++i)
309         write(Lines[i]);
310     }
311 
GCOVLines(GCOVProfiler * P,StringRef F)312     GCOVLines(GCOVProfiler *P, StringRef F)
313         : GCOVRecord(P), Filename(std::string(F)) {}
314 
315   private:
316     std::string Filename;
317     SmallVector<uint32_t, 32> Lines;
318   };
319 
320 
321   // Represent a basic block in GCOV. Each block has a unique number in the
322   // function, number of lines belonging to each block, and a set of edges to
323   // other blocks.
324   class GCOVBlock : public GCOVRecord {
325    public:
getFile(StringRef Filename)326     GCOVLines &getFile(StringRef Filename) {
327       return LinesByFile.try_emplace(Filename, P, Filename).first->second;
328     }
329 
addEdge(GCOVBlock & Successor,uint32_t Flags)330     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
331       OutEdges.emplace_back(&Successor, Flags);
332     }
333 
writeOut()334     void writeOut() {
335       uint32_t Len = 3;
336       SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
337       for (auto &I : LinesByFile) {
338         Len += I.second.length();
339         SortedLinesByFile.push_back(&I);
340       }
341 
342       write(GCOV_TAG_LINES);
343       write(Len);
344       write(Number);
345 
346       llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
347                                        StringMapEntry<GCOVLines> *RHS) {
348         return LHS->getKey() < RHS->getKey();
349       });
350       for (auto &I : SortedLinesByFile)
351         I->getValue().writeOut();
352       write(0);
353       write(0);
354     }
355 
GCOVBlock(const GCOVBlock & RHS)356     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
357       // Only allow copy before edges and lines have been added. After that,
358       // there are inter-block pointers (eg: edges) that won't take kindly to
359       // blocks being copied or moved around.
360       assert(LinesByFile.empty());
361       assert(OutEdges.empty());
362     }
363 
364     uint32_t Number;
365     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
366 
367   private:
368     friend class GCOVFunction;
369 
GCOVBlock(GCOVProfiler * P,uint32_t Number)370     GCOVBlock(GCOVProfiler *P, uint32_t Number)
371         : GCOVRecord(P), Number(Number) {}
372 
373     StringMap<GCOVLines> LinesByFile;
374   };
375 
376   // A function has a unique identifier, a checksum (we leave as zero) and a
377   // set of blocks and a map of edges between blocks. This is the only GCOV
378   // object users can construct, the blocks and lines will be rooted here.
379   class GCOVFunction : public GCOVRecord {
380   public:
GCOVFunction(GCOVProfiler * P,Function * F,const DISubprogram * SP,unsigned EndLine,uint32_t Ident,int Version)381     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
382                  unsigned EndLine, uint32_t Ident, int Version)
383         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
384           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
385       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
386       bool ExitBlockBeforeBody = Version >= 48;
387       uint32_t i = ExitBlockBeforeBody ? 2 : 1;
388       for (BasicBlock &BB : *F)
389         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
390       if (!ExitBlockBeforeBody)
391         ReturnBlock.Number = i;
392 
393       std::string FunctionNameAndLine;
394       raw_string_ostream FNLOS(FunctionNameAndLine);
395       FNLOS << getFunctionName(SP) << SP->getLine();
396       FNLOS.flush();
397       FuncChecksum = hash_value(FunctionNameAndLine);
398     }
399 
getBlock(const BasicBlock * BB)400     GCOVBlock &getBlock(const BasicBlock *BB) {
401       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
402     }
403 
getEntryBlock()404     GCOVBlock &getEntryBlock() { return EntryBlock; }
getReturnBlock()405     GCOVBlock &getReturnBlock() {
406       return ReturnBlock;
407     }
408 
getFuncChecksum() const409     uint32_t getFuncChecksum() const {
410       return FuncChecksum;
411     }
412 
writeOut(uint32_t CfgChecksum)413     void writeOut(uint32_t CfgChecksum) {
414       write(GCOV_TAG_FUNCTION);
415       SmallString<128> Filename = getFilename(SP);
416       uint32_t BlockLen =
417           2 + (Version >= 47) + wordsOfString(getFunctionName(SP));
418       if (Version < 80)
419         BlockLen += wordsOfString(Filename) + 1;
420       else
421         BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90);
422 
423       write(BlockLen);
424       write(Ident);
425       write(FuncChecksum);
426       if (Version >= 47)
427         write(CfgChecksum);
428       writeString(getFunctionName(SP));
429       if (Version < 80) {
430         writeString(Filename);
431         write(SP->getLine());
432       } else {
433         write(SP->isArtificial()); // artificial
434         writeString(Filename);
435         write(SP->getLine()); // start_line
436         write(0);             // start_column
437         // EndLine is the last line with !dbg. It is not the } line as in GCC,
438         // but good enough.
439         write(EndLine);
440         if (Version >= 90)
441           write(0); // end_column
442       }
443 
444       // Emit count of blocks.
445       write(GCOV_TAG_BLOCKS);
446       if (Version < 80) {
447         write(Blocks.size() + 2);
448         for (int i = Blocks.size() + 2; i; --i)
449           write(0);
450       } else {
451         write(1);
452         write(Blocks.size() + 2);
453       }
454       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
455 
456       // Emit edges between blocks.
457       const uint32_t Outgoing = EntryBlock.OutEdges.size();
458       if (Outgoing) {
459         write(GCOV_TAG_ARCS);
460         write(Outgoing * 2 + 1);
461         write(EntryBlock.Number);
462         for (const auto &E : EntryBlock.OutEdges) {
463           write(E.first->Number);
464           write(E.second);
465         }
466       }
467       for (auto &It : Blocks) {
468         const GCOVBlock &Block = It.second;
469         if (Block.OutEdges.empty()) continue;
470 
471         write(GCOV_TAG_ARCS);
472         write(Block.OutEdges.size() * 2 + 1);
473         write(Block.Number);
474         for (const auto &E : Block.OutEdges) {
475           write(E.first->Number);
476           write(E.second);
477         }
478       }
479 
480       // Emit lines for each block.
481       for (auto &It : Blocks)
482         It.second.writeOut();
483     }
484 
485   public:
486     const DISubprogram *SP;
487     unsigned EndLine;
488     uint32_t Ident;
489     uint32_t FuncChecksum;
490     int Version;
491     MapVector<BasicBlock *, GCOVBlock> Blocks;
492     GCOVBlock EntryBlock;
493     GCOVBlock ReturnBlock;
494   };
495 }
496 
497 // RegexesStr is a string containing differents regex separated by a semi-colon.
498 // For example "foo\..*$;bar\..*$".
createRegexesFromString(StringRef RegexesStr)499 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
500   std::vector<Regex> Regexes;
501   while (!RegexesStr.empty()) {
502     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
503     if (!HeadTail.first.empty()) {
504       Regex Re(HeadTail.first);
505       std::string Err;
506       if (!Re.isValid(Err)) {
507         Ctx->emitError(Twine("Regex ") + HeadTail.first +
508                        " is not valid: " + Err);
509       }
510       Regexes.emplace_back(std::move(Re));
511     }
512     RegexesStr = HeadTail.second;
513   }
514   return Regexes;
515 }
516 
doesFilenameMatchARegex(StringRef Filename,std::vector<Regex> & Regexes)517 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
518                                            std::vector<Regex> &Regexes) {
519   for (Regex &Re : Regexes)
520     if (Re.match(Filename))
521       return true;
522   return false;
523 }
524 
isFunctionInstrumented(const Function & F)525 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
526   if (FilterRe.empty() && ExcludeRe.empty()) {
527     return true;
528   }
529   SmallString<128> Filename = getFilename(F.getSubprogram());
530   auto It = InstrumentedFiles.find(Filename);
531   if (It != InstrumentedFiles.end()) {
532     return It->second;
533   }
534 
535   SmallString<256> RealPath;
536   StringRef RealFilename;
537 
538   // Path can be
539   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
540   // such a case we must get the real_path.
541   if (sys::fs::real_path(Filename, RealPath)) {
542     // real_path can fail with path like "foo.c".
543     RealFilename = Filename;
544   } else {
545     RealFilename = RealPath;
546   }
547 
548   bool ShouldInstrument;
549   if (FilterRe.empty()) {
550     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
551   } else if (ExcludeRe.empty()) {
552     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
553   } else {
554     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
555                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
556   }
557   InstrumentedFiles[Filename] = ShouldInstrument;
558   return ShouldInstrument;
559 }
560 
mangleName(const DICompileUnit * CU,GCovFileType OutputType)561 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
562                                      GCovFileType OutputType) {
563   bool Notes = OutputType == GCovFileType::GCNO;
564 
565   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
566     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
567       MDNode *N = GCov->getOperand(i);
568       bool ThreeElement = N->getNumOperands() == 3;
569       if (!ThreeElement && N->getNumOperands() != 2)
570         continue;
571       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
572         continue;
573 
574       if (ThreeElement) {
575         // These nodes have no mangling to apply, it's stored mangled in the
576         // bitcode.
577         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
578         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
579         if (!NotesFile || !DataFile)
580           continue;
581         return std::string(Notes ? NotesFile->getString()
582                                  : DataFile->getString());
583       }
584 
585       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
586       if (!GCovFile)
587         continue;
588 
589       SmallString<128> Filename = GCovFile->getString();
590       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
591       return std::string(Filename.str());
592     }
593   }
594 
595   SmallString<128> Filename = CU->getFilename();
596   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
597   StringRef FName = sys::path::filename(Filename);
598   SmallString<128> CurPath;
599   if (sys::fs::current_path(CurPath))
600     return std::string(FName);
601   sys::path::append(CurPath, FName);
602   return std::string(CurPath.str());
603 }
604 
runOnModule(Module & M,function_ref<BlockFrequencyInfo * (Function & F)> GetBFI,function_ref<BranchProbabilityInfo * (Function & F)> GetBPI,std::function<const TargetLibraryInfo & (Function & F)> GetTLI)605 bool GCOVProfiler::runOnModule(
606     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
607     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
608     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
609   this->M = &M;
610   this->GetTLI = std::move(GetTLI);
611   Ctx = &M.getContext();
612 
613   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
614   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
615     return false;
616 
617   bool HasExecOrFork = AddFlushBeforeForkAndExec();
618 
619   FilterRe = createRegexesFromString(Options.Filter);
620   ExcludeRe = createRegexesFromString(Options.Exclude);
621   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
622   return true;
623 }
624 
run(Module & M,ModuleAnalysisManager & AM)625 PreservedAnalyses GCOVProfilerPass::run(Module &M,
626                                         ModuleAnalysisManager &AM) {
627 
628   GCOVProfiler Profiler(GCOVOpts);
629   FunctionAnalysisManager &FAM =
630       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
631 
632   auto GetBFI = [&FAM](Function &F) {
633     return &FAM.getResult<BlockFrequencyAnalysis>(F);
634   };
635   auto GetBPI = [&FAM](Function &F) {
636     return &FAM.getResult<BranchProbabilityAnalysis>(F);
637   };
638   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
639     return FAM.getResult<TargetLibraryAnalysis>(F);
640   };
641 
642   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
643     return PreservedAnalyses::all();
644 
645   return PreservedAnalyses::none();
646 }
647 
functionHasLines(const Function & F,unsigned & EndLine)648 static bool functionHasLines(const Function &F, unsigned &EndLine) {
649   // Check whether this function actually has any source lines. Not only
650   // do these waste space, they also can crash gcov.
651   EndLine = 0;
652   for (auto &BB : F) {
653     for (auto &I : BB) {
654       // Debug intrinsic locations correspond to the location of the
655       // declaration, not necessarily any statements or expressions.
656       if (isa<DbgInfoIntrinsic>(&I)) continue;
657 
658       const DebugLoc &Loc = I.getDebugLoc();
659       if (!Loc)
660         continue;
661 
662       // Artificial lines such as calls to the global constructors.
663       if (Loc.getLine() == 0) continue;
664       EndLine = std::max(EndLine, Loc.getLine());
665 
666       return true;
667     }
668   }
669   return false;
670 }
671 
isUsingScopeBasedEH(Function & F)672 static bool isUsingScopeBasedEH(Function &F) {
673   if (!F.hasPersonalityFn()) return false;
674 
675   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
676   return isScopedEHPersonality(Personality);
677 }
678 
AddFlushBeforeForkAndExec()679 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
680   SmallVector<CallInst *, 2> Forks;
681   SmallVector<CallInst *, 2> Execs;
682   for (auto &F : M->functions()) {
683     auto *TLI = &GetTLI(F);
684     for (auto &I : instructions(F)) {
685       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
686         if (Function *Callee = CI->getCalledFunction()) {
687           LibFunc LF;
688           if (TLI->getLibFunc(*Callee, LF)) {
689             if (LF == LibFunc_fork) {
690 #if !defined(_WIN32)
691               Forks.push_back(CI);
692 #endif
693             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
694                        LF == LibFunc_execlp || LF == LibFunc_execv ||
695                        LF == LibFunc_execvp || LF == LibFunc_execve ||
696                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
697               Execs.push_back(CI);
698             }
699           }
700         }
701       }
702     }
703   }
704 
705   for (auto F : Forks) {
706     IRBuilder<> Builder(F);
707     BasicBlock *Parent = F->getParent();
708     auto NextInst = ++F->getIterator();
709 
710     // We've a fork so just reset the counters in the child process
711     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
712     FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy);
713     F->setCalledFunction(GCOVFork);
714 
715     // We split just after the fork to have a counter for the lines after
716     // Anyway there's a bug:
717     // void foo() { fork(); }
718     // void bar() { foo(); blah(); }
719     // then "blah();" will be called 2 times but showed as 1
720     // because "blah()" belongs to the same block as "foo();"
721     Parent->splitBasicBlock(NextInst);
722 
723     // back() is a br instruction with a debug location
724     // equals to the one from NextAfterFork
725     // So to avoid to have two debug locs on two blocks just change it
726     DebugLoc Loc = F->getDebugLoc();
727     Parent->back().setDebugLoc(Loc);
728   }
729 
730   for (auto E : Execs) {
731     IRBuilder<> Builder(E);
732     BasicBlock *Parent = E->getParent();
733     auto NextInst = ++E->getIterator();
734 
735     // Since the process is replaced by a new one we need to write out gcdas
736     // No need to reset the counters since they'll be lost after the exec**
737     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
738     FunctionCallee WriteoutF =
739         M->getOrInsertFunction("llvm_writeout_files", FTy);
740     Builder.CreateCall(WriteoutF);
741 
742     DebugLoc Loc = E->getDebugLoc();
743     Builder.SetInsertPoint(&*NextInst);
744     // If the exec** fails we must reset the counters since they've been
745     // dumped
746     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
747     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
748     ExecBlocks.insert(Parent);
749     Parent->splitBasicBlock(NextInst);
750     Parent->back().setDebugLoc(Loc);
751   }
752 
753   return !Forks.empty() || !Execs.empty();
754 }
755 
getInstrBB(CFGMST<Edge,BBInfo> & MST,Edge & E,const DenseSet<const BasicBlock * > & ExecBlocks)756 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
757                               const DenseSet<const BasicBlock *> &ExecBlocks) {
758   if (E.InMST || E.Removed)
759     return nullptr;
760 
761   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
762   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
763   // For a fake edge, instrument the real BB.
764   if (SrcBB == nullptr)
765     return DestBB;
766   if (DestBB == nullptr)
767     return SrcBB;
768 
769   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
770     // There are basic blocks (such as catchswitch) cannot be instrumented.
771     // If the returned first insertion point is the end of BB, skip this BB.
772     if (BB->getFirstInsertionPt() == BB->end())
773       return nullptr;
774     return BB;
775   };
776 
777   // Instrument the SrcBB if it has a single successor,
778   // otherwise, the DestBB if this is not a critical edge.
779   Instruction *TI = SrcBB->getTerminator();
780   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
781     return CanInstrument(SrcBB);
782   if (!E.IsCritical)
783     return CanInstrument(DestBB);
784 
785   // Some IndirectBr critical edges cannot be split by the previous
786   // SplitIndirectBrCriticalEdges call. Bail out.
787   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
788   BasicBlock *InstrBB =
789       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
790   if (!InstrBB)
791     return nullptr;
792 
793   MST.addEdge(SrcBB, InstrBB, 0);
794   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
795   E.Removed = true;
796 
797   return CanInstrument(InstrBB);
798 }
799 
800 #ifndef NDEBUG
dumpEdges(CFGMST<Edge,BBInfo> & MST,GCOVFunction & GF)801 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
802   size_t ID = 0;
803   for (auto &E : make_pointee_range(MST.AllEdges)) {
804     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
805     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
806     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
807            << E.infoString() << "\n";
808   }
809 }
810 #endif
811 
emitProfileNotes(NamedMDNode * CUNode,bool HasExecOrFork,function_ref<BlockFrequencyInfo * (Function & F)> GetBFI,function_ref<BranchProbabilityInfo * (Function & F)> GetBPI,function_ref<const TargetLibraryInfo & (Function & F)> GetTLI)812 bool GCOVProfiler::emitProfileNotes(
813     NamedMDNode *CUNode, bool HasExecOrFork,
814     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
815     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
816     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
817   int Version;
818   {
819     uint8_t c3 = Options.Version[0];
820     uint8_t c2 = Options.Version[1];
821     uint8_t c1 = Options.Version[2];
822     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
823                         : (c3 - '0') * 10 + c1 - '0';
824   }
825 
826   bool EmitGCDA = Options.EmitData;
827   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
828     // Each compile unit gets its own .gcno file. This means that whether we run
829     // this pass over the original .o's as they're produced, or run it after
830     // LTO, we'll generate the same .gcno files.
831 
832     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
833 
834     // Skip module skeleton (and module) CUs.
835     if (CU->getDWOId())
836       continue;
837 
838     std::vector<uint8_t> EdgeDestinations;
839     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
840 
841     Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
842                                                  : support::endianness::big;
843     unsigned FunctionIdent = 0;
844     for (auto &F : M->functions()) {
845       DISubprogram *SP = F.getSubprogram();
846       unsigned EndLine;
847       if (!SP) continue;
848       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
849         continue;
850       // TODO: Functions using scope-based EH are currently not supported.
851       if (isUsingScopeBasedEH(F)) continue;
852       if (F.hasFnAttribute(llvm::Attribute::NoProfile))
853         continue;
854 
855       // Add the function line number to the lines of the entry block
856       // to have a counter for the function definition.
857       uint32_t Line = SP->getLine();
858       auto Filename = getFilename(SP);
859 
860       BranchProbabilityInfo *BPI = GetBPI(F);
861       BlockFrequencyInfo *BFI = GetBFI(F);
862 
863       // Split indirectbr critical edges here before computing the MST rather
864       // than later in getInstrBB() to avoid invalidating it.
865       SplitIndirectBrCriticalEdges(F, BPI, BFI);
866 
867       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
868 
869       // getInstrBB can split basic blocks and push elements to AllEdges.
870       for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
871         auto &E = *MST.AllEdges[I];
872         // For now, disable spanning tree optimization when fork or exec* is
873         // used.
874         if (HasExecOrFork)
875           E.InMST = false;
876         E.Place = getInstrBB(MST, E, ExecBlocks);
877       }
878       // Basic blocks in F are finalized at this point.
879       BasicBlock &EntryBlock = F.getEntryBlock();
880       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
881                                                      FunctionIdent++, Version));
882       GCOVFunction &Func = *Funcs.back();
883 
884       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
885       // as well.
886       llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
887         return E->Removed || (!E->InMST && !E->Place);
888       });
889       const size_t Measured =
890           std::stable_partition(
891               MST.AllEdges.begin(), MST.AllEdges.end(),
892               [](std::unique_ptr<Edge> &E) { return E->Place; }) -
893           MST.AllEdges.begin();
894       for (size_t I : llvm::seq<size_t>(0, Measured)) {
895         Edge &E = *MST.AllEdges[I];
896         GCOVBlock &Src =
897             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
898         GCOVBlock &Dst =
899             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
900         E.SrcNumber = Src.Number;
901         E.DstNumber = Dst.Number;
902       }
903       std::stable_sort(
904           MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
905           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
906             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
907                                                 : L->DstNumber < R->DstNumber;
908           });
909 
910       for (const Edge &E : make_pointee_range(MST.AllEdges)) {
911         GCOVBlock &Src =
912             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
913         GCOVBlock &Dst =
914             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
915         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
916       }
917 
918       // Artificial functions such as global initializers
919       if (!SP->isArtificial())
920         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
921 
922       LLVM_DEBUG(dumpEdges(MST, Func));
923 
924       for (auto &GB : Func.Blocks) {
925         const BasicBlock &BB = *GB.first;
926         auto &Block = GB.second;
927         for (auto Succ : Block.OutEdges) {
928           uint32_t Idx = Succ.first->Number;
929           do EdgeDestinations.push_back(Idx & 255);
930           while ((Idx >>= 8) > 0);
931         }
932 
933         for (auto &I : BB) {
934           // Debug intrinsic locations correspond to the location of the
935           // declaration, not necessarily any statements or expressions.
936           if (isa<DbgInfoIntrinsic>(&I)) continue;
937 
938           const DebugLoc &Loc = I.getDebugLoc();
939           if (!Loc)
940             continue;
941 
942           // Artificial lines such as calls to the global constructors.
943           if (Loc.getLine() == 0 || Loc.isImplicitCode())
944             continue;
945 
946           if (Line == Loc.getLine()) continue;
947           Line = Loc.getLine();
948           if (SP != getDISubprogram(Loc.getScope()))
949             continue;
950 
951           GCOVLines &Lines = Block.getFile(Filename);
952           Lines.addLine(Loc.getLine());
953         }
954         Line = 0;
955       }
956       if (EmitGCDA) {
957         DISubprogram *SP = F.getSubprogram();
958         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
959         GlobalVariable *Counters = new GlobalVariable(
960             *M, CounterTy, false, GlobalValue::InternalLinkage,
961             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
962         CountersBySP.emplace_back(Counters, SP);
963 
964         for (size_t I : llvm::seq<size_t>(0, Measured)) {
965           const Edge &E = *MST.AllEdges[I];
966           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
967           Value *V = Builder.CreateConstInBoundsGEP2_64(
968               Counters->getValueType(), Counters, 0, I);
969           if (Options.Atomic) {
970             Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1),
971                                     MaybeAlign(), AtomicOrdering::Monotonic);
972           } else {
973             Value *Count =
974                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
975             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
976             Builder.CreateStore(Count, V);
977           }
978         }
979       }
980     }
981 
982     char Tmp[4];
983     JamCRC JC;
984     JC.update(EdgeDestinations);
985     uint32_t Stamp = JC.getCRC();
986     FileChecksums.push_back(Stamp);
987 
988     if (Options.EmitNotes) {
989       std::error_code EC;
990       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
991                          sys::fs::OF_None);
992       if (EC) {
993         Ctx->emitError(
994             Twine("failed to open coverage notes file for writing: ") +
995             EC.message());
996         continue;
997       }
998       os = &out;
999       if (Endian == support::endianness::big) {
1000         out.write("gcno", 4);
1001         out.write(Options.Version, 4);
1002       } else {
1003         out.write("oncg", 4);
1004         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
1005         out.write(Tmp, 4);
1006       }
1007       write(Stamp);
1008       if (Version >= 90)
1009         writeString(""); // unuseful current_working_directory
1010       if (Version >= 80)
1011         write(0); // unuseful has_unexecuted_blocks
1012 
1013       for (auto &Func : Funcs)
1014         Func->writeOut(Stamp);
1015 
1016       write(0);
1017       write(0);
1018       out.close();
1019     }
1020 
1021     if (EmitGCDA) {
1022       emitGlobalConstructor(CountersBySP);
1023       EmitGCDA = false;
1024     }
1025   }
1026   return true;
1027 }
1028 
createInternalFunction(FunctionType * FTy,StringRef Name)1029 Function *GCOVProfiler::createInternalFunction(FunctionType *FTy,
1030                                                StringRef Name) {
1031   Function *F = Function::createWithDefaultAttr(
1032       FTy, GlobalValue::InternalLinkage, 0, Name, M);
1033   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1034   F->addFnAttr(Attribute::NoUnwind);
1035   if (Options.NoRedZone)
1036     F->addFnAttr(Attribute::NoRedZone);
1037   return F;
1038 }
1039 
emitGlobalConstructor(SmallVectorImpl<std::pair<GlobalVariable *,MDNode * >> & CountersBySP)1040 void GCOVProfiler::emitGlobalConstructor(
1041     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1042   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1043   Function *ResetF = insertReset(CountersBySP);
1044 
1045   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
1046   // be executed at exit and the "__llvm_gcov_reset" function to be executed
1047   // when "__gcov_flush" is called.
1048   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1049   Function *F = createInternalFunction(FTy, "__llvm_gcov_init");
1050   F->addFnAttr(Attribute::NoInline);
1051 
1052   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1053   IRBuilder<> Builder(BB);
1054 
1055   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1056   auto *PFTy = PointerType::get(FTy, 0);
1057   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1058 
1059   // Initialize the environment and register the local writeout, flush and
1060   // reset functions.
1061   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1062   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1063   Builder.CreateRetVoid();
1064 
1065   appendToGlobalCtors(*M, F, 0);
1066 }
1067 
getStartFileFunc(const TargetLibraryInfo * TLI)1068 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1069   Type *Args[] = {
1070       Type::getInt8PtrTy(*Ctx), // const char *orig_filename
1071       Type::getInt32Ty(*Ctx),   // uint32_t version
1072       Type::getInt32Ty(*Ctx),   // uint32_t checksum
1073   };
1074   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1075   AttributeList AL;
1076   if (auto AK = TLI->getExtAttrForI32Param(false))
1077     AL = AL.addParamAttribute(*Ctx, 2, AK);
1078   FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
1079   return Res;
1080 }
1081 
getEmitFunctionFunc(const TargetLibraryInfo * TLI)1082 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1083   Type *Args[] = {
1084     Type::getInt32Ty(*Ctx),    // uint32_t ident
1085     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1086     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1087   };
1088   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1089   AttributeList AL;
1090   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1091     AL = AL.addParamAttribute(*Ctx, 0, AK);
1092     AL = AL.addParamAttribute(*Ctx, 1, AK);
1093     AL = AL.addParamAttribute(*Ctx, 2, AK);
1094   }
1095   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
1096 }
1097 
getEmitArcsFunc(const TargetLibraryInfo * TLI)1098 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1099   Type *Args[] = {
1100     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
1101     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
1102   };
1103   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1104   AttributeList AL;
1105   if (auto AK = TLI->getExtAttrForI32Param(false))
1106     AL = AL.addParamAttribute(*Ctx, 0, AK);
1107   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
1108 }
1109 
getSummaryInfoFunc()1110 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1111   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1112   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1113 }
1114 
getEndFileFunc()1115 FunctionCallee GCOVProfiler::getEndFileFunc() {
1116   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1117   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1118 }
1119 
insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *,MDNode * >> CountersBySP)1120 Function *GCOVProfiler::insertCounterWriteout(
1121     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1122   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1123   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1124   if (!WriteoutF)
1125     WriteoutF = createInternalFunction(WriteoutFTy, "__llvm_gcov_writeout");
1126   WriteoutF->addFnAttr(Attribute::NoInline);
1127 
1128   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1129   IRBuilder<> Builder(BB);
1130 
1131   auto *TLI = &GetTLI(*WriteoutF);
1132 
1133   FunctionCallee StartFile = getStartFileFunc(TLI);
1134   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1135   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1136   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1137   FunctionCallee EndFile = getEndFileFunc();
1138 
1139   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1140   if (!CUNodes) {
1141     Builder.CreateRetVoid();
1142     return WriteoutF;
1143   }
1144 
1145   // Collect the relevant data into a large constant data structure that we can
1146   // walk to write out everything.
1147   StructType *StartFileCallArgsTy = StructType::create(
1148       {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1149       "start_file_args_ty");
1150   StructType *EmitFunctionCallArgsTy = StructType::create(
1151       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1152       "emit_function_args_ty");
1153   StructType *EmitArcsCallArgsTy = StructType::create(
1154       {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
1155       "emit_arcs_args_ty");
1156   StructType *FileInfoTy =
1157       StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
1158                           EmitFunctionCallArgsTy->getPointerTo(),
1159                           EmitArcsCallArgsTy->getPointerTo()},
1160                          "file_info");
1161 
1162   Constant *Zero32 = Builder.getInt32(0);
1163   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1164   Constant *TwoZero32s[] = {Zero32, Zero32};
1165 
1166   SmallVector<Constant *, 8> FileInfos;
1167   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1168     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1169 
1170     // Skip module skeleton (and module) CUs.
1171     if (CU->getDWOId())
1172       continue;
1173 
1174     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1175     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1176     auto *StartFileCallArgs = ConstantStruct::get(
1177         StartFileCallArgsTy,
1178         {Builder.CreateGlobalStringPtr(FilenameGcda),
1179          Builder.getInt32(endian::read32be(Options.Version)),
1180          Builder.getInt32(CfgChecksum)});
1181 
1182     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1183     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1184     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1185       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1186       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1187           EmitFunctionCallArgsTy,
1188           {Builder.getInt32(j),
1189            Builder.getInt32(FuncChecksum),
1190            Builder.getInt32(CfgChecksum)}));
1191 
1192       GlobalVariable *GV = CountersBySP[j].first;
1193       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1194       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1195           EmitArcsCallArgsTy,
1196           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1197                                        GV->getValueType(), GV, TwoZero32s)}));
1198     }
1199     // Create global arrays for the two emit calls.
1200     int CountersSize = CountersBySP.size();
1201     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1202            "Mismatched array size!");
1203     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1204            "Mismatched array size!");
1205     auto *EmitFunctionCallArgsArrayTy =
1206         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1207     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1208         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1209         GlobalValue::InternalLinkage,
1210         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1211                            EmitFunctionCallArgsArray),
1212         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1213     auto *EmitArcsCallArgsArrayTy =
1214         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1215     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1216         GlobalValue::UnnamedAddr::Global);
1217     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1218         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1219         GlobalValue::InternalLinkage,
1220         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1221         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1222     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1223 
1224     FileInfos.push_back(ConstantStruct::get(
1225         FileInfoTy,
1226         {StartFileCallArgs, Builder.getInt32(CountersSize),
1227          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1228                                                 EmitFunctionCallArgsArrayGV,
1229                                                 TwoZero32s),
1230          ConstantExpr::getInBoundsGetElementPtr(
1231              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1232   }
1233 
1234   // If we didn't find anything to actually emit, bail on out.
1235   if (FileInfos.empty()) {
1236     Builder.CreateRetVoid();
1237     return WriteoutF;
1238   }
1239 
1240   // To simplify code, we cap the number of file infos we write out to fit
1241   // easily in a 32-bit signed integer. This gives consistent behavior between
1242   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1243   // operations on 32-bit systems. It also seems unreasonable to try to handle
1244   // more than 2 billion files.
1245   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1246     FileInfos.resize(INT_MAX);
1247 
1248   // Create a global for the entire data structure so we can walk it more
1249   // easily.
1250   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1251   auto *FileInfoArrayGV = new GlobalVariable(
1252       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1253       ConstantArray::get(FileInfoArrayTy, FileInfos),
1254       "__llvm_internal_gcov_emit_file_info");
1255   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1256 
1257   // Create the CFG for walking this data structure.
1258   auto *FileLoopHeader =
1259       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1260   auto *CounterLoopHeader =
1261       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1262   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1263   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1264 
1265   // We always have at least one file, so just branch to the header.
1266   Builder.CreateBr(FileLoopHeader);
1267 
1268   // The index into the files structure is our loop induction variable.
1269   Builder.SetInsertPoint(FileLoopHeader);
1270   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1271                                   "file_idx");
1272   IV->addIncoming(Builder.getInt32(0), BB);
1273   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1274       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1275   auto *StartFileCallArgsPtr =
1276       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1277   auto *StartFileCall = Builder.CreateCall(
1278       StartFile,
1279       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1280                           Builder.CreateStructGEP(StartFileCallArgsTy,
1281                                                   StartFileCallArgsPtr, 0),
1282                           "filename"),
1283        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1284                           Builder.CreateStructGEP(StartFileCallArgsTy,
1285                                                   StartFileCallArgsPtr, 1),
1286                           "version"),
1287        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1288                           Builder.CreateStructGEP(StartFileCallArgsTy,
1289                                                   StartFileCallArgsPtr, 2),
1290                           "stamp")});
1291   if (auto AK = TLI->getExtAttrForI32Param(false))
1292     StartFileCall->addParamAttr(2, AK);
1293   auto *NumCounters = Builder.CreateLoad(
1294       FileInfoTy->getElementType(1),
1295       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1296   auto *EmitFunctionCallArgsArray =
1297       Builder.CreateLoad(FileInfoTy->getElementType(2),
1298                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1299                          "emit_function_args");
1300   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1301       FileInfoTy->getElementType(3),
1302       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1303   auto *EnterCounterLoopCond =
1304       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1305   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1306 
1307   Builder.SetInsertPoint(CounterLoopHeader);
1308   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1309                                "ctr_idx");
1310   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1311   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1312       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1313   auto *EmitFunctionCall = Builder.CreateCall(
1314       EmitFunction,
1315       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1316                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1317                                                   EmitFunctionCallArgsPtr, 0),
1318                           "ident"),
1319        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1320                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1321                                                   EmitFunctionCallArgsPtr, 1),
1322                           "func_checkssum"),
1323        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1324                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1325                                                   EmitFunctionCallArgsPtr, 2),
1326                           "cfg_checksum")});
1327   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1328     EmitFunctionCall->addParamAttr(0, AK);
1329     EmitFunctionCall->addParamAttr(1, AK);
1330     EmitFunctionCall->addParamAttr(2, AK);
1331   }
1332   auto *EmitArcsCallArgsPtr =
1333       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1334   auto *EmitArcsCall = Builder.CreateCall(
1335       EmitArcs,
1336       {Builder.CreateLoad(
1337            EmitArcsCallArgsTy->getElementType(0),
1338            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1339            "num_counters"),
1340        Builder.CreateLoad(
1341            EmitArcsCallArgsTy->getElementType(1),
1342            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1343            "counters")});
1344   if (auto AK = TLI->getExtAttrForI32Param(false))
1345     EmitArcsCall->addParamAttr(0, AK);
1346   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1347   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1348   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1349   JV->addIncoming(NextJV, CounterLoopHeader);
1350 
1351   Builder.SetInsertPoint(FileLoopLatch);
1352   Builder.CreateCall(SummaryInfo, {});
1353   Builder.CreateCall(EndFile, {});
1354   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1355   auto *FileLoopCond =
1356       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1357   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1358   IV->addIncoming(NextIV, FileLoopLatch);
1359 
1360   Builder.SetInsertPoint(ExitBB);
1361   Builder.CreateRetVoid();
1362 
1363   return WriteoutF;
1364 }
1365 
insertReset(ArrayRef<std::pair<GlobalVariable *,MDNode * >> CountersBySP)1366 Function *GCOVProfiler::insertReset(
1367     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1368   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1369   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1370   if (!ResetF)
1371     ResetF = createInternalFunction(FTy, "__llvm_gcov_reset");
1372   ResetF->addFnAttr(Attribute::NoInline);
1373 
1374   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1375   IRBuilder<> Builder(Entry);
1376 
1377   // Zero out the counters.
1378   for (const auto &I : CountersBySP) {
1379     GlobalVariable *GV = I.first;
1380     Constant *Null = Constant::getNullValue(GV->getValueType());
1381     Builder.CreateStore(Null, GV);
1382   }
1383 
1384   Type *RetTy = ResetF->getReturnType();
1385   if (RetTy->isVoidTy())
1386     Builder.CreateRetVoid();
1387   else if (RetTy->isIntegerTy())
1388     // Used if __llvm_gcov_reset was implicitly declared.
1389     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1390   else
1391     report_fatal_error("invalid return type for __llvm_gcov_reset");
1392 
1393   return ResetF;
1394 }
1395