1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "CFGMST.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/Analysis/BlockFrequencyInfo.h"
23 #include "llvm/Analysis/BranchProbabilityInfo.h"
24 #include "llvm/Analysis/EHPersonalities.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/IR/DebugInfo.h"
27 #include "llvm/IR/DebugLoc.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/InstIterator.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/Support/CRC.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Regex.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Instrumentation.h"
41 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
42 #include "llvm/Transforms/Utils/ModuleUtils.h"
43 #include <algorithm>
44 #include <memory>
45 #include <string>
46 #include <utility>
47 
48 using namespace llvm;
49 namespace endian = llvm::support::endian;
50 
51 #define DEBUG_TYPE "insert-gcov-profiling"
52 
53 enum : uint32_t {
54   GCOV_ARC_ON_TREE = 1 << 0,
55 
56   GCOV_TAG_FUNCTION = 0x01000000,
57   GCOV_TAG_BLOCKS = 0x01410000,
58   GCOV_TAG_ARCS = 0x01430000,
59   GCOV_TAG_LINES = 0x01450000,
60 };
61 
62 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
63                                                cl::init("408*"), cl::Hidden,
64                                                cl::ValueRequired);
65 
66 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
67                                    cl::desc("Make counter updates atomic"));
68 
69 // Returns the number of words which will be used to represent this string.
70 static unsigned wordsOfString(StringRef s) {
71   // Length + NUL-terminated string + 0~3 padding NULs.
72   return (s.size() / 4) + 2;
73 }
74 
75 GCOVOptions GCOVOptions::getDefault() {
76   GCOVOptions Options;
77   Options.EmitNotes = true;
78   Options.EmitData = true;
79   Options.NoRedZone = false;
80   Options.Atomic = AtomicCounter;
81 
82   if (DefaultGCOVVersion.size() != 4) {
83     llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") +
84                              DefaultGCOVVersion, /*GenCrashDiag=*/false);
85   }
86   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
87   return Options;
88 }
89 
90 namespace {
91 class GCOVFunction;
92 
93 class GCOVProfiler {
94 public:
95   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
96   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
97   bool
98   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
99               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
100               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
101 
102   void write(uint32_t i) {
103     char Bytes[4];
104     endian::write32(Bytes, i, Endian);
105     os->write(Bytes, 4);
106   }
107   void writeString(StringRef s) {
108     write(wordsOfString(s) - 1);
109     os->write(s.data(), s.size());
110     os->write_zeros(4 - s.size() % 4);
111   }
112   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
113 
114 private:
115   // Create the .gcno files for the Module based on DebugInfo.
116   bool
117   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
118                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
119                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
120                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
121 
122   Function *createInternalFunction(FunctionType *FTy, StringRef Name);
123   void emitGlobalConstructor(
124       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
125 
126   bool isFunctionInstrumented(const Function &F);
127   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
128   static bool doesFilenameMatchARegex(StringRef Filename,
129                                       std::vector<Regex> &Regexes);
130 
131   // Get pointers to the functions in the runtime library.
132   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
133   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
134   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
135   FunctionCallee getSummaryInfoFunc();
136   FunctionCallee getEndFileFunc();
137 
138   // Add the function to write out all our counters to the global destructor
139   // list.
140   Function *
141   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
142   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
143 
144   bool AddFlushBeforeForkAndExec();
145 
146   enum class GCovFileType { GCNO, GCDA };
147   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
148 
149   GCOVOptions Options;
150   support::endianness Endian;
151   raw_ostream *os;
152 
153   // Checksum, produced by hash of EdgeDestinations
154   SmallVector<uint32_t, 4> FileChecksums;
155 
156   Module *M = nullptr;
157   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
158   LLVMContext *Ctx = nullptr;
159   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
160   std::vector<Regex> FilterRe;
161   std::vector<Regex> ExcludeRe;
162   DenseSet<const BasicBlock *> ExecBlocks;
163   StringMap<bool> InstrumentedFiles;
164 };
165 
166 struct BBInfo {
167   BBInfo *Group;
168   uint32_t Index;
169   uint32_t Rank = 0;
170 
171   BBInfo(unsigned Index) : Group(this), Index(Index) {}
172   std::string infoString() const {
173     return (Twine("Index=") + Twine(Index)).str();
174   }
175 };
176 
177 struct Edge {
178   // This class implements the CFG edges. Note the CFG can be a multi-graph.
179   // So there might be multiple edges with same SrcBB and DestBB.
180   const BasicBlock *SrcBB;
181   const BasicBlock *DestBB;
182   uint64_t Weight;
183   BasicBlock *Place = nullptr;
184   uint32_t SrcNumber, DstNumber;
185   bool InMST = false;
186   bool Removed = false;
187   bool IsCritical = false;
188 
189   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
190       : SrcBB(Src), DestBB(Dest), Weight(W) {}
191 
192   // Return the information string of an edge.
193   std::string infoString() const {
194     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
195             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
196         .str();
197   }
198 };
199 }
200 
201 static StringRef getFunctionName(const DISubprogram *SP) {
202   if (!SP->getLinkageName().empty())
203     return SP->getLinkageName();
204   return SP->getName();
205 }
206 
207 /// Extract a filename for a DISubprogram.
208 ///
209 /// Prefer relative paths in the coverage notes. Clang also may split
210 /// up absolute paths into a directory and filename component. When
211 /// the relative path doesn't exist, reconstruct the absolute path.
212 static SmallString<128> getFilename(const DISubprogram *SP) {
213   SmallString<128> Path;
214   StringRef RelPath = SP->getFilename();
215   if (sys::fs::exists(RelPath))
216     Path = RelPath;
217   else
218     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
219   return Path;
220 }
221 
222 namespace {
223   class GCOVRecord {
224   protected:
225     GCOVProfiler *P;
226 
227     GCOVRecord(GCOVProfiler *P) : P(P) {}
228 
229     void write(uint32_t i) { P->write(i); }
230     void writeString(StringRef s) { P->writeString(s); }
231     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
232   };
233 
234   class GCOVFunction;
235   class GCOVBlock;
236 
237   // Constructed only by requesting it from a GCOVBlock, this object stores a
238   // list of line numbers and a single filename, representing lines that belong
239   // to the block.
240   class GCOVLines : public GCOVRecord {
241    public:
242     void addLine(uint32_t Line) {
243       assert(Line != 0 && "Line zero is not a valid real line number.");
244       Lines.push_back(Line);
245     }
246 
247     uint32_t length() const {
248       return 1 + wordsOfString(Filename) + Lines.size();
249     }
250 
251     void writeOut() {
252       write(0);
253       writeString(Filename);
254       for (int i = 0, e = Lines.size(); i != e; ++i)
255         write(Lines[i]);
256     }
257 
258     GCOVLines(GCOVProfiler *P, StringRef F)
259         : GCOVRecord(P), Filename(std::string(F)) {}
260 
261   private:
262     std::string Filename;
263     SmallVector<uint32_t, 32> Lines;
264   };
265 
266 
267   // Represent a basic block in GCOV. Each block has a unique number in the
268   // function, number of lines belonging to each block, and a set of edges to
269   // other blocks.
270   class GCOVBlock : public GCOVRecord {
271    public:
272     GCOVLines &getFile(StringRef Filename) {
273       return LinesByFile.try_emplace(Filename, P, Filename).first->second;
274     }
275 
276     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
277       OutEdges.emplace_back(&Successor, Flags);
278     }
279 
280     void writeOut() {
281       uint32_t Len = 3;
282       SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
283       for (auto &I : LinesByFile) {
284         Len += I.second.length();
285         SortedLinesByFile.push_back(&I);
286       }
287 
288       write(GCOV_TAG_LINES);
289       write(Len);
290       write(Number);
291 
292       llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
293                                        StringMapEntry<GCOVLines> *RHS) {
294         return LHS->getKey() < RHS->getKey();
295       });
296       for (auto &I : SortedLinesByFile)
297         I->getValue().writeOut();
298       write(0);
299       write(0);
300     }
301 
302     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
303       // Only allow copy before edges and lines have been added. After that,
304       // there are inter-block pointers (eg: edges) that won't take kindly to
305       // blocks being copied or moved around.
306       assert(LinesByFile.empty());
307       assert(OutEdges.empty());
308     }
309 
310     uint32_t Number;
311     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
312 
313   private:
314     friend class GCOVFunction;
315 
316     GCOVBlock(GCOVProfiler *P, uint32_t Number)
317         : GCOVRecord(P), Number(Number) {}
318 
319     StringMap<GCOVLines> LinesByFile;
320   };
321 
322   // A function has a unique identifier, a checksum (we leave as zero) and a
323   // set of blocks and a map of edges between blocks. This is the only GCOV
324   // object users can construct, the blocks and lines will be rooted here.
325   class GCOVFunction : public GCOVRecord {
326   public:
327     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
328                  unsigned EndLine, uint32_t Ident, int Version)
329         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
330           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
331       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
332       bool ExitBlockBeforeBody = Version >= 48;
333       uint32_t i = ExitBlockBeforeBody ? 2 : 1;
334       for (BasicBlock &BB : *F)
335         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
336       if (!ExitBlockBeforeBody)
337         ReturnBlock.Number = i;
338 
339       std::string FunctionNameAndLine;
340       raw_string_ostream FNLOS(FunctionNameAndLine);
341       FNLOS << getFunctionName(SP) << SP->getLine();
342       FNLOS.flush();
343       FuncChecksum = hash_value(FunctionNameAndLine);
344     }
345 
346     GCOVBlock &getBlock(const BasicBlock *BB) {
347       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
348     }
349 
350     GCOVBlock &getEntryBlock() { return EntryBlock; }
351     GCOVBlock &getReturnBlock() {
352       return ReturnBlock;
353     }
354 
355     uint32_t getFuncChecksum() const {
356       return FuncChecksum;
357     }
358 
359     void writeOut(uint32_t CfgChecksum) {
360       write(GCOV_TAG_FUNCTION);
361       SmallString<128> Filename = getFilename(SP);
362       uint32_t BlockLen =
363           2 + (Version >= 47) + wordsOfString(getFunctionName(SP));
364       if (Version < 80)
365         BlockLen += wordsOfString(Filename) + 1;
366       else
367         BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90);
368 
369       write(BlockLen);
370       write(Ident);
371       write(FuncChecksum);
372       if (Version >= 47)
373         write(CfgChecksum);
374       writeString(getFunctionName(SP));
375       if (Version < 80) {
376         writeString(Filename);
377         write(SP->getLine());
378       } else {
379         write(SP->isArtificial()); // artificial
380         writeString(Filename);
381         write(SP->getLine()); // start_line
382         write(0);             // start_column
383         // EndLine is the last line with !dbg. It is not the } line as in GCC,
384         // but good enough.
385         write(EndLine);
386         if (Version >= 90)
387           write(0); // end_column
388       }
389 
390       // Emit count of blocks.
391       write(GCOV_TAG_BLOCKS);
392       if (Version < 80) {
393         write(Blocks.size() + 2);
394         for (int i = Blocks.size() + 2; i; --i)
395           write(0);
396       } else {
397         write(1);
398         write(Blocks.size() + 2);
399       }
400       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
401 
402       // Emit edges between blocks.
403       const uint32_t Outgoing = EntryBlock.OutEdges.size();
404       if (Outgoing) {
405         write(GCOV_TAG_ARCS);
406         write(Outgoing * 2 + 1);
407         write(EntryBlock.Number);
408         for (const auto &E : EntryBlock.OutEdges) {
409           write(E.first->Number);
410           write(E.second);
411         }
412       }
413       for (auto &It : Blocks) {
414         const GCOVBlock &Block = It.second;
415         if (Block.OutEdges.empty()) continue;
416 
417         write(GCOV_TAG_ARCS);
418         write(Block.OutEdges.size() * 2 + 1);
419         write(Block.Number);
420         for (const auto &E : Block.OutEdges) {
421           write(E.first->Number);
422           write(E.second);
423         }
424       }
425 
426       // Emit lines for each block.
427       for (auto &It : Blocks)
428         It.second.writeOut();
429     }
430 
431   public:
432     const DISubprogram *SP;
433     unsigned EndLine;
434     uint32_t Ident;
435     uint32_t FuncChecksum;
436     int Version;
437     MapVector<BasicBlock *, GCOVBlock> Blocks;
438     GCOVBlock EntryBlock;
439     GCOVBlock ReturnBlock;
440   };
441 }
442 
443 // RegexesStr is a string containing differents regex separated by a semi-colon.
444 // For example "foo\..*$;bar\..*$".
445 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
446   std::vector<Regex> Regexes;
447   while (!RegexesStr.empty()) {
448     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
449     if (!HeadTail.first.empty()) {
450       Regex Re(HeadTail.first);
451       std::string Err;
452       if (!Re.isValid(Err)) {
453         Ctx->emitError(Twine("Regex ") + HeadTail.first +
454                        " is not valid: " + Err);
455       }
456       Regexes.emplace_back(std::move(Re));
457     }
458     RegexesStr = HeadTail.second;
459   }
460   return Regexes;
461 }
462 
463 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
464                                            std::vector<Regex> &Regexes) {
465   for (Regex &Re : Regexes)
466     if (Re.match(Filename))
467       return true;
468   return false;
469 }
470 
471 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
472   if (FilterRe.empty() && ExcludeRe.empty()) {
473     return true;
474   }
475   SmallString<128> Filename = getFilename(F.getSubprogram());
476   auto It = InstrumentedFiles.find(Filename);
477   if (It != InstrumentedFiles.end()) {
478     return It->second;
479   }
480 
481   SmallString<256> RealPath;
482   StringRef RealFilename;
483 
484   // Path can be
485   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
486   // such a case we must get the real_path.
487   if (sys::fs::real_path(Filename, RealPath)) {
488     // real_path can fail with path like "foo.c".
489     RealFilename = Filename;
490   } else {
491     RealFilename = RealPath;
492   }
493 
494   bool ShouldInstrument;
495   if (FilterRe.empty()) {
496     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
497   } else if (ExcludeRe.empty()) {
498     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
499   } else {
500     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
501                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
502   }
503   InstrumentedFiles[Filename] = ShouldInstrument;
504   return ShouldInstrument;
505 }
506 
507 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
508                                      GCovFileType OutputType) {
509   bool Notes = OutputType == GCovFileType::GCNO;
510 
511   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
512     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
513       MDNode *N = GCov->getOperand(i);
514       bool ThreeElement = N->getNumOperands() == 3;
515       if (!ThreeElement && N->getNumOperands() != 2)
516         continue;
517       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
518         continue;
519 
520       if (ThreeElement) {
521         // These nodes have no mangling to apply, it's stored mangled in the
522         // bitcode.
523         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
524         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
525         if (!NotesFile || !DataFile)
526           continue;
527         return std::string(Notes ? NotesFile->getString()
528                                  : DataFile->getString());
529       }
530 
531       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
532       if (!GCovFile)
533         continue;
534 
535       SmallString<128> Filename = GCovFile->getString();
536       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
537       return std::string(Filename.str());
538     }
539   }
540 
541   SmallString<128> Filename = CU->getFilename();
542   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
543   StringRef FName = sys::path::filename(Filename);
544   SmallString<128> CurPath;
545   if (sys::fs::current_path(CurPath))
546     return std::string(FName);
547   sys::path::append(CurPath, FName);
548   return std::string(CurPath.str());
549 }
550 
551 bool GCOVProfiler::runOnModule(
552     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
553     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
554     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
555   this->M = &M;
556   this->GetTLI = std::move(GetTLI);
557   Ctx = &M.getContext();
558 
559   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
560   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
561     return false;
562 
563   bool HasExecOrFork = AddFlushBeforeForkAndExec();
564 
565   FilterRe = createRegexesFromString(Options.Filter);
566   ExcludeRe = createRegexesFromString(Options.Exclude);
567   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
568   return true;
569 }
570 
571 PreservedAnalyses GCOVProfilerPass::run(Module &M,
572                                         ModuleAnalysisManager &AM) {
573 
574   GCOVProfiler Profiler(GCOVOpts);
575   FunctionAnalysisManager &FAM =
576       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
577 
578   auto GetBFI = [&FAM](Function &F) {
579     return &FAM.getResult<BlockFrequencyAnalysis>(F);
580   };
581   auto GetBPI = [&FAM](Function &F) {
582     return &FAM.getResult<BranchProbabilityAnalysis>(F);
583   };
584   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
585     return FAM.getResult<TargetLibraryAnalysis>(F);
586   };
587 
588   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
589     return PreservedAnalyses::all();
590 
591   return PreservedAnalyses::none();
592 }
593 
594 static bool functionHasLines(const Function &F, unsigned &EndLine) {
595   // Check whether this function actually has any source lines. Not only
596   // do these waste space, they also can crash gcov.
597   EndLine = 0;
598   for (auto &BB : F) {
599     for (auto &I : BB) {
600       // Debug intrinsic locations correspond to the location of the
601       // declaration, not necessarily any statements or expressions.
602       if (isa<DbgInfoIntrinsic>(&I)) continue;
603 
604       const DebugLoc &Loc = I.getDebugLoc();
605       if (!Loc)
606         continue;
607 
608       // Artificial lines such as calls to the global constructors.
609       if (Loc.getLine() == 0) continue;
610       EndLine = std::max(EndLine, Loc.getLine());
611 
612       return true;
613     }
614   }
615   return false;
616 }
617 
618 static bool isUsingScopeBasedEH(Function &F) {
619   if (!F.hasPersonalityFn()) return false;
620 
621   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
622   return isScopedEHPersonality(Personality);
623 }
624 
625 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
626   SmallVector<CallInst *, 2> Forks;
627   SmallVector<CallInst *, 2> Execs;
628   for (auto &F : M->functions()) {
629     auto *TLI = &GetTLI(F);
630     for (auto &I : instructions(F)) {
631       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
632         if (Function *Callee = CI->getCalledFunction()) {
633           LibFunc LF;
634           if (TLI->getLibFunc(*Callee, LF)) {
635             if (LF == LibFunc_fork) {
636 #if !defined(_WIN32)
637               Forks.push_back(CI);
638 #endif
639             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
640                        LF == LibFunc_execlp || LF == LibFunc_execv ||
641                        LF == LibFunc_execvp || LF == LibFunc_execve ||
642                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
643               Execs.push_back(CI);
644             }
645           }
646         }
647       }
648     }
649   }
650 
651   for (auto F : Forks) {
652     IRBuilder<> Builder(F);
653     BasicBlock *Parent = F->getParent();
654     auto NextInst = ++F->getIterator();
655 
656     // We've a fork so just reset the counters in the child process
657     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
658     FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy);
659     F->setCalledFunction(GCOVFork);
660 
661     // We split just after the fork to have a counter for the lines after
662     // Anyway there's a bug:
663     // void foo() { fork(); }
664     // void bar() { foo(); blah(); }
665     // then "blah();" will be called 2 times but showed as 1
666     // because "blah()" belongs to the same block as "foo();"
667     Parent->splitBasicBlock(NextInst);
668 
669     // back() is a br instruction with a debug location
670     // equals to the one from NextAfterFork
671     // So to avoid to have two debug locs on two blocks just change it
672     DebugLoc Loc = F->getDebugLoc();
673     Parent->back().setDebugLoc(Loc);
674   }
675 
676   for (auto E : Execs) {
677     IRBuilder<> Builder(E);
678     BasicBlock *Parent = E->getParent();
679     auto NextInst = ++E->getIterator();
680 
681     // Since the process is replaced by a new one we need to write out gcdas
682     // No need to reset the counters since they'll be lost after the exec**
683     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
684     FunctionCallee WriteoutF =
685         M->getOrInsertFunction("llvm_writeout_files", FTy);
686     Builder.CreateCall(WriteoutF);
687 
688     DebugLoc Loc = E->getDebugLoc();
689     Builder.SetInsertPoint(&*NextInst);
690     // If the exec** fails we must reset the counters since they've been
691     // dumped
692     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
693     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
694     ExecBlocks.insert(Parent);
695     Parent->splitBasicBlock(NextInst);
696     Parent->back().setDebugLoc(Loc);
697   }
698 
699   return !Forks.empty() || !Execs.empty();
700 }
701 
702 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
703                               const DenseSet<const BasicBlock *> &ExecBlocks) {
704   if (E.InMST || E.Removed)
705     return nullptr;
706 
707   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
708   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
709   // For a fake edge, instrument the real BB.
710   if (SrcBB == nullptr)
711     return DestBB;
712   if (DestBB == nullptr)
713     return SrcBB;
714 
715   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
716     // There are basic blocks (such as catchswitch) cannot be instrumented.
717     // If the returned first insertion point is the end of BB, skip this BB.
718     if (BB->getFirstInsertionPt() == BB->end())
719       return nullptr;
720     return BB;
721   };
722 
723   // Instrument the SrcBB if it has a single successor,
724   // otherwise, the DestBB if this is not a critical edge.
725   Instruction *TI = SrcBB->getTerminator();
726   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
727     return CanInstrument(SrcBB);
728   if (!E.IsCritical)
729     return CanInstrument(DestBB);
730 
731   // Some IndirectBr critical edges cannot be split by the previous
732   // SplitIndirectBrCriticalEdges call. Bail out.
733   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
734   BasicBlock *InstrBB =
735       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
736   if (!InstrBB)
737     return nullptr;
738 
739   MST.addEdge(SrcBB, InstrBB, 0);
740   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
741   E.Removed = true;
742 
743   return CanInstrument(InstrBB);
744 }
745 
746 #ifndef NDEBUG
747 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
748   size_t ID = 0;
749   for (auto &E : make_pointee_range(MST.AllEdges)) {
750     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
751     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
752     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
753            << E.infoString() << "\n";
754   }
755 }
756 #endif
757 
758 bool GCOVProfiler::emitProfileNotes(
759     NamedMDNode *CUNode, bool HasExecOrFork,
760     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
761     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
762     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
763   int Version;
764   {
765     uint8_t c3 = Options.Version[0];
766     uint8_t c2 = Options.Version[1];
767     uint8_t c1 = Options.Version[2];
768     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
769                         : (c3 - '0') * 10 + c1 - '0';
770   }
771 
772   bool EmitGCDA = Options.EmitData;
773   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
774     // Each compile unit gets its own .gcno file. This means that whether we run
775     // this pass over the original .o's as they're produced, or run it after
776     // LTO, we'll generate the same .gcno files.
777 
778     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
779 
780     // Skip module skeleton (and module) CUs.
781     if (CU->getDWOId())
782       continue;
783 
784     std::vector<uint8_t> EdgeDestinations;
785     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
786 
787     Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
788                                                  : support::endianness::big;
789     unsigned FunctionIdent = 0;
790     for (auto &F : M->functions()) {
791       DISubprogram *SP = F.getSubprogram();
792       unsigned EndLine;
793       if (!SP) continue;
794       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
795         continue;
796       // TODO: Functions using scope-based EH are currently not supported.
797       if (isUsingScopeBasedEH(F)) continue;
798       if (F.hasFnAttribute(llvm::Attribute::NoProfile))
799         continue;
800 
801       // Add the function line number to the lines of the entry block
802       // to have a counter for the function definition.
803       uint32_t Line = SP->getLine();
804       auto Filename = getFilename(SP);
805 
806       BranchProbabilityInfo *BPI = GetBPI(F);
807       BlockFrequencyInfo *BFI = GetBFI(F);
808 
809       // Split indirectbr critical edges here before computing the MST rather
810       // than later in getInstrBB() to avoid invalidating it.
811       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
812                                    BFI);
813 
814       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
815 
816       // getInstrBB can split basic blocks and push elements to AllEdges.
817       for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
818         auto &E = *MST.AllEdges[I];
819         // For now, disable spanning tree optimization when fork or exec* is
820         // used.
821         if (HasExecOrFork)
822           E.InMST = false;
823         E.Place = getInstrBB(MST, E, ExecBlocks);
824       }
825       // Basic blocks in F are finalized at this point.
826       BasicBlock &EntryBlock = F.getEntryBlock();
827       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
828                                                      FunctionIdent++, Version));
829       GCOVFunction &Func = *Funcs.back();
830 
831       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
832       // as well.
833       llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
834         return E->Removed || (!E->InMST && !E->Place);
835       });
836       const size_t Measured =
837           std::stable_partition(
838               MST.AllEdges.begin(), MST.AllEdges.end(),
839               [](std::unique_ptr<Edge> &E) { return E->Place; }) -
840           MST.AllEdges.begin();
841       for (size_t I : llvm::seq<size_t>(0, Measured)) {
842         Edge &E = *MST.AllEdges[I];
843         GCOVBlock &Src =
844             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
845         GCOVBlock &Dst =
846             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
847         E.SrcNumber = Src.Number;
848         E.DstNumber = Dst.Number;
849       }
850       std::stable_sort(
851           MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
852           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
853             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
854                                                 : L->DstNumber < R->DstNumber;
855           });
856 
857       for (const Edge &E : make_pointee_range(MST.AllEdges)) {
858         GCOVBlock &Src =
859             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
860         GCOVBlock &Dst =
861             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
862         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
863       }
864 
865       // Artificial functions such as global initializers
866       if (!SP->isArtificial())
867         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
868 
869       LLVM_DEBUG(dumpEdges(MST, Func));
870 
871       for (auto &GB : Func.Blocks) {
872         const BasicBlock &BB = *GB.first;
873         auto &Block = GB.second;
874         for (auto Succ : Block.OutEdges) {
875           uint32_t Idx = Succ.first->Number;
876           do EdgeDestinations.push_back(Idx & 255);
877           while ((Idx >>= 8) > 0);
878         }
879 
880         for (auto &I : BB) {
881           // Debug intrinsic locations correspond to the location of the
882           // declaration, not necessarily any statements or expressions.
883           if (isa<DbgInfoIntrinsic>(&I)) continue;
884 
885           const DebugLoc &Loc = I.getDebugLoc();
886           if (!Loc)
887             continue;
888 
889           // Artificial lines such as calls to the global constructors.
890           if (Loc.getLine() == 0 || Loc.isImplicitCode())
891             continue;
892 
893           if (Line == Loc.getLine()) continue;
894           Line = Loc.getLine();
895           if (SP != getDISubprogram(Loc.getScope()))
896             continue;
897 
898           GCOVLines &Lines = Block.getFile(Filename);
899           Lines.addLine(Loc.getLine());
900         }
901         Line = 0;
902       }
903       if (EmitGCDA) {
904         DISubprogram *SP = F.getSubprogram();
905         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
906         GlobalVariable *Counters = new GlobalVariable(
907             *M, CounterTy, false, GlobalValue::InternalLinkage,
908             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
909         CountersBySP.emplace_back(Counters, SP);
910 
911         for (size_t I : llvm::seq<size_t>(0, Measured)) {
912           const Edge &E = *MST.AllEdges[I];
913           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
914           Value *V = Builder.CreateConstInBoundsGEP2_64(
915               Counters->getValueType(), Counters, 0, I);
916           if (Options.Atomic) {
917             Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1),
918                                     MaybeAlign(), AtomicOrdering::Monotonic);
919           } else {
920             Value *Count =
921                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
922             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
923             Builder.CreateStore(Count, V);
924           }
925         }
926       }
927     }
928 
929     char Tmp[4];
930     JamCRC JC;
931     JC.update(EdgeDestinations);
932     uint32_t Stamp = JC.getCRC();
933     FileChecksums.push_back(Stamp);
934 
935     if (Options.EmitNotes) {
936       std::error_code EC;
937       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
938                          sys::fs::OF_None);
939       if (EC) {
940         Ctx->emitError(
941             Twine("failed to open coverage notes file for writing: ") +
942             EC.message());
943         continue;
944       }
945       os = &out;
946       if (Endian == support::endianness::big) {
947         out.write("gcno", 4);
948         out.write(Options.Version, 4);
949       } else {
950         out.write("oncg", 4);
951         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
952         out.write(Tmp, 4);
953       }
954       write(Stamp);
955       if (Version >= 90)
956         writeString(""); // unuseful current_working_directory
957       if (Version >= 80)
958         write(0); // unuseful has_unexecuted_blocks
959 
960       for (auto &Func : Funcs)
961         Func->writeOut(Stamp);
962 
963       write(0);
964       write(0);
965       out.close();
966     }
967 
968     if (EmitGCDA) {
969       emitGlobalConstructor(CountersBySP);
970       EmitGCDA = false;
971     }
972   }
973   return true;
974 }
975 
976 Function *GCOVProfiler::createInternalFunction(FunctionType *FTy,
977                                                StringRef Name) {
978   Function *F = Function::createWithDefaultAttr(
979       FTy, GlobalValue::InternalLinkage, 0, Name, M);
980   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
981   F->addFnAttr(Attribute::NoUnwind);
982   if (Options.NoRedZone)
983     F->addFnAttr(Attribute::NoRedZone);
984   return F;
985 }
986 
987 void GCOVProfiler::emitGlobalConstructor(
988     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
989   Function *WriteoutF = insertCounterWriteout(CountersBySP);
990   Function *ResetF = insertReset(CountersBySP);
991 
992   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
993   // be executed at exit and the "__llvm_gcov_reset" function to be executed
994   // when "__gcov_flush" is called.
995   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
996   Function *F = createInternalFunction(FTy, "__llvm_gcov_init");
997   F->addFnAttr(Attribute::NoInline);
998 
999   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1000   IRBuilder<> Builder(BB);
1001 
1002   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1003   auto *PFTy = PointerType::get(FTy, 0);
1004   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1005 
1006   // Initialize the environment and register the local writeout, flush and
1007   // reset functions.
1008   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1009   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1010   Builder.CreateRetVoid();
1011 
1012   appendToGlobalCtors(*M, F, 0);
1013 }
1014 
1015 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1016   Type *Args[] = {
1017       Type::getInt8PtrTy(*Ctx), // const char *orig_filename
1018       Type::getInt32Ty(*Ctx),   // uint32_t version
1019       Type::getInt32Ty(*Ctx),   // uint32_t checksum
1020   };
1021   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1022   AttributeList AL;
1023   if (auto AK = TLI->getExtAttrForI32Param(false))
1024     AL = AL.addParamAttribute(*Ctx, 2, AK);
1025   FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
1026   return Res;
1027 }
1028 
1029 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1030   Type *Args[] = {
1031     Type::getInt32Ty(*Ctx),    // uint32_t ident
1032     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1033     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1034   };
1035   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1036   AttributeList AL;
1037   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1038     AL = AL.addParamAttribute(*Ctx, 0, AK);
1039     AL = AL.addParamAttribute(*Ctx, 1, AK);
1040     AL = AL.addParamAttribute(*Ctx, 2, AK);
1041   }
1042   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
1043 }
1044 
1045 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1046   Type *Args[] = {
1047     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
1048     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
1049   };
1050   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1051   AttributeList AL;
1052   if (auto AK = TLI->getExtAttrForI32Param(false))
1053     AL = AL.addParamAttribute(*Ctx, 0, AK);
1054   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
1055 }
1056 
1057 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1058   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1059   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1060 }
1061 
1062 FunctionCallee GCOVProfiler::getEndFileFunc() {
1063   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1064   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1065 }
1066 
1067 Function *GCOVProfiler::insertCounterWriteout(
1068     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1069   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1070   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1071   if (!WriteoutF)
1072     WriteoutF = createInternalFunction(WriteoutFTy, "__llvm_gcov_writeout");
1073   WriteoutF->addFnAttr(Attribute::NoInline);
1074 
1075   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1076   IRBuilder<> Builder(BB);
1077 
1078   auto *TLI = &GetTLI(*WriteoutF);
1079 
1080   FunctionCallee StartFile = getStartFileFunc(TLI);
1081   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1082   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1083   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1084   FunctionCallee EndFile = getEndFileFunc();
1085 
1086   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1087   if (!CUNodes) {
1088     Builder.CreateRetVoid();
1089     return WriteoutF;
1090   }
1091 
1092   // Collect the relevant data into a large constant data structure that we can
1093   // walk to write out everything.
1094   StructType *StartFileCallArgsTy = StructType::create(
1095       {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1096       "start_file_args_ty");
1097   StructType *EmitFunctionCallArgsTy = StructType::create(
1098       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1099       "emit_function_args_ty");
1100   StructType *EmitArcsCallArgsTy = StructType::create(
1101       {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
1102       "emit_arcs_args_ty");
1103   StructType *FileInfoTy =
1104       StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
1105                           EmitFunctionCallArgsTy->getPointerTo(),
1106                           EmitArcsCallArgsTy->getPointerTo()},
1107                          "file_info");
1108 
1109   Constant *Zero32 = Builder.getInt32(0);
1110   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1111   Constant *TwoZero32s[] = {Zero32, Zero32};
1112 
1113   SmallVector<Constant *, 8> FileInfos;
1114   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1115     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1116 
1117     // Skip module skeleton (and module) CUs.
1118     if (CU->getDWOId())
1119       continue;
1120 
1121     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1122     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1123     auto *StartFileCallArgs = ConstantStruct::get(
1124         StartFileCallArgsTy,
1125         {Builder.CreateGlobalStringPtr(FilenameGcda),
1126          Builder.getInt32(endian::read32be(Options.Version)),
1127          Builder.getInt32(CfgChecksum)});
1128 
1129     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1130     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1131     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1132       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1133       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1134           EmitFunctionCallArgsTy,
1135           {Builder.getInt32(j),
1136            Builder.getInt32(FuncChecksum),
1137            Builder.getInt32(CfgChecksum)}));
1138 
1139       GlobalVariable *GV = CountersBySP[j].first;
1140       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1141       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1142           EmitArcsCallArgsTy,
1143           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1144                                        GV->getValueType(), GV, TwoZero32s)}));
1145     }
1146     // Create global arrays for the two emit calls.
1147     int CountersSize = CountersBySP.size();
1148     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1149            "Mismatched array size!");
1150     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1151            "Mismatched array size!");
1152     auto *EmitFunctionCallArgsArrayTy =
1153         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1154     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1155         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1156         GlobalValue::InternalLinkage,
1157         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1158                            EmitFunctionCallArgsArray),
1159         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1160     auto *EmitArcsCallArgsArrayTy =
1161         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1162     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1163         GlobalValue::UnnamedAddr::Global);
1164     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1165         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1166         GlobalValue::InternalLinkage,
1167         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1168         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1169     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1170 
1171     FileInfos.push_back(ConstantStruct::get(
1172         FileInfoTy,
1173         {StartFileCallArgs, Builder.getInt32(CountersSize),
1174          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1175                                                 EmitFunctionCallArgsArrayGV,
1176                                                 TwoZero32s),
1177          ConstantExpr::getInBoundsGetElementPtr(
1178              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1179   }
1180 
1181   // If we didn't find anything to actually emit, bail on out.
1182   if (FileInfos.empty()) {
1183     Builder.CreateRetVoid();
1184     return WriteoutF;
1185   }
1186 
1187   // To simplify code, we cap the number of file infos we write out to fit
1188   // easily in a 32-bit signed integer. This gives consistent behavior between
1189   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1190   // operations on 32-bit systems. It also seems unreasonable to try to handle
1191   // more than 2 billion files.
1192   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1193     FileInfos.resize(INT_MAX);
1194 
1195   // Create a global for the entire data structure so we can walk it more
1196   // easily.
1197   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1198   auto *FileInfoArrayGV = new GlobalVariable(
1199       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1200       ConstantArray::get(FileInfoArrayTy, FileInfos),
1201       "__llvm_internal_gcov_emit_file_info");
1202   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1203 
1204   // Create the CFG for walking this data structure.
1205   auto *FileLoopHeader =
1206       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1207   auto *CounterLoopHeader =
1208       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1209   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1210   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1211 
1212   // We always have at least one file, so just branch to the header.
1213   Builder.CreateBr(FileLoopHeader);
1214 
1215   // The index into the files structure is our loop induction variable.
1216   Builder.SetInsertPoint(FileLoopHeader);
1217   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1218                                   "file_idx");
1219   IV->addIncoming(Builder.getInt32(0), BB);
1220   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1221       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1222   auto *StartFileCallArgsPtr =
1223       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1224   auto *StartFileCall = Builder.CreateCall(
1225       StartFile,
1226       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1227                           Builder.CreateStructGEP(StartFileCallArgsTy,
1228                                                   StartFileCallArgsPtr, 0),
1229                           "filename"),
1230        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1231                           Builder.CreateStructGEP(StartFileCallArgsTy,
1232                                                   StartFileCallArgsPtr, 1),
1233                           "version"),
1234        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1235                           Builder.CreateStructGEP(StartFileCallArgsTy,
1236                                                   StartFileCallArgsPtr, 2),
1237                           "stamp")});
1238   if (auto AK = TLI->getExtAttrForI32Param(false))
1239     StartFileCall->addParamAttr(2, AK);
1240   auto *NumCounters = Builder.CreateLoad(
1241       FileInfoTy->getElementType(1),
1242       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1243   auto *EmitFunctionCallArgsArray =
1244       Builder.CreateLoad(FileInfoTy->getElementType(2),
1245                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1246                          "emit_function_args");
1247   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1248       FileInfoTy->getElementType(3),
1249       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1250   auto *EnterCounterLoopCond =
1251       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1252   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1253 
1254   Builder.SetInsertPoint(CounterLoopHeader);
1255   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1256                                "ctr_idx");
1257   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1258   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1259       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1260   auto *EmitFunctionCall = Builder.CreateCall(
1261       EmitFunction,
1262       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1263                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1264                                                   EmitFunctionCallArgsPtr, 0),
1265                           "ident"),
1266        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1267                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1268                                                   EmitFunctionCallArgsPtr, 1),
1269                           "func_checkssum"),
1270        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1271                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1272                                                   EmitFunctionCallArgsPtr, 2),
1273                           "cfg_checksum")});
1274   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1275     EmitFunctionCall->addParamAttr(0, AK);
1276     EmitFunctionCall->addParamAttr(1, AK);
1277     EmitFunctionCall->addParamAttr(2, AK);
1278   }
1279   auto *EmitArcsCallArgsPtr =
1280       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1281   auto *EmitArcsCall = Builder.CreateCall(
1282       EmitArcs,
1283       {Builder.CreateLoad(
1284            EmitArcsCallArgsTy->getElementType(0),
1285            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1286            "num_counters"),
1287        Builder.CreateLoad(
1288            EmitArcsCallArgsTy->getElementType(1),
1289            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1290            "counters")});
1291   if (auto AK = TLI->getExtAttrForI32Param(false))
1292     EmitArcsCall->addParamAttr(0, AK);
1293   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1294   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1295   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1296   JV->addIncoming(NextJV, CounterLoopHeader);
1297 
1298   Builder.SetInsertPoint(FileLoopLatch);
1299   Builder.CreateCall(SummaryInfo, {});
1300   Builder.CreateCall(EndFile, {});
1301   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1302   auto *FileLoopCond =
1303       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1304   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1305   IV->addIncoming(NextIV, FileLoopLatch);
1306 
1307   Builder.SetInsertPoint(ExitBB);
1308   Builder.CreateRetVoid();
1309 
1310   return WriteoutF;
1311 }
1312 
1313 Function *GCOVProfiler::insertReset(
1314     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1315   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1316   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1317   if (!ResetF)
1318     ResetF = createInternalFunction(FTy, "__llvm_gcov_reset");
1319   ResetF->addFnAttr(Attribute::NoInline);
1320 
1321   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1322   IRBuilder<> Builder(Entry);
1323   LLVMContext &C = Entry->getContext();
1324 
1325   // Zero out the counters.
1326   for (const auto &I : CountersBySP) {
1327     GlobalVariable *GV = I.first;
1328     auto *GVTy = cast<ArrayType>(GV->getValueType());
1329     Builder.CreateMemSet(GV, Constant::getNullValue(Type::getInt8Ty(C)),
1330                          GVTy->getNumElements() *
1331                              GVTy->getElementType()->getScalarSizeInBits() / 8,
1332                          GV->getAlign());
1333   }
1334 
1335   Type *RetTy = ResetF->getReturnType();
1336   if (RetTy->isVoidTy())
1337     Builder.CreateRetVoid();
1338   else if (RetTy->isIntegerTy())
1339     // Used if __llvm_gcov_reset was implicitly declared.
1340     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1341   else
1342     report_fatal_error("invalid return type for __llvm_gcov_reset");
1343 
1344   return ResetF;
1345 }
1346