1 //===- CoverageMapping.cpp - Code coverage mapping support ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for clang's and llvm's instrumentation based
10 // code coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/Coverage/CoverageMapping.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Object/BuildID.h"
23 #include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
24 #include "llvm/ProfileData/InstrProfReader.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/Errc.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/VirtualFileSystem.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <cassert>
34 #include <cstdint>
35 #include <iterator>
36 #include <map>
37 #include <memory>
38 #include <optional>
39 #include <string>
40 #include <system_error>
41 #include <utility>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace coverage;
46 
47 #define DEBUG_TYPE "coverage-mapping"
48 
49 Counter CounterExpressionBuilder::get(const CounterExpression &E) {
50   auto It = ExpressionIndices.find(E);
51   if (It != ExpressionIndices.end())
52     return Counter::getExpression(It->second);
53   unsigned I = Expressions.size();
54   Expressions.push_back(E);
55   ExpressionIndices[E] = I;
56   return Counter::getExpression(I);
57 }
58 
59 void CounterExpressionBuilder::extractTerms(Counter C, int Factor,
60                                             SmallVectorImpl<Term> &Terms) {
61   switch (C.getKind()) {
62   case Counter::Zero:
63     break;
64   case Counter::CounterValueReference:
65     Terms.emplace_back(C.getCounterID(), Factor);
66     break;
67   case Counter::Expression:
68     const auto &E = Expressions[C.getExpressionID()];
69     extractTerms(E.LHS, Factor, Terms);
70     extractTerms(
71         E.RHS, E.Kind == CounterExpression::Subtract ? -Factor : Factor, Terms);
72     break;
73   }
74 }
75 
76 Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
77   // Gather constant terms.
78   SmallVector<Term, 32> Terms;
79   extractTerms(ExpressionTree, +1, Terms);
80 
81   // If there are no terms, this is just a zero. The algorithm below assumes at
82   // least one term.
83   if (Terms.size() == 0)
84     return Counter::getZero();
85 
86   // Group the terms by counter ID.
87   llvm::sort(Terms, [](const Term &LHS, const Term &RHS) {
88     return LHS.CounterID < RHS.CounterID;
89   });
90 
91   // Combine terms by counter ID to eliminate counters that sum to zero.
92   auto Prev = Terms.begin();
93   for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) {
94     if (I->CounterID == Prev->CounterID) {
95       Prev->Factor += I->Factor;
96       continue;
97     }
98     ++Prev;
99     *Prev = *I;
100   }
101   Terms.erase(++Prev, Terms.end());
102 
103   Counter C;
104   // Create additions. We do this before subtractions to avoid constructs like
105   // ((0 - X) + Y), as opposed to (Y - X).
106   for (auto T : Terms) {
107     if (T.Factor <= 0)
108       continue;
109     for (int I = 0; I < T.Factor; ++I)
110       if (C.isZero())
111         C = Counter::getCounter(T.CounterID);
112       else
113         C = get(CounterExpression(CounterExpression::Add, C,
114                                   Counter::getCounter(T.CounterID)));
115   }
116 
117   // Create subtractions.
118   for (auto T : Terms) {
119     if (T.Factor >= 0)
120       continue;
121     for (int I = 0; I < -T.Factor; ++I)
122       C = get(CounterExpression(CounterExpression::Subtract, C,
123                                 Counter::getCounter(T.CounterID)));
124   }
125   return C;
126 }
127 
128 Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS, bool Simplify) {
129   auto Cnt = get(CounterExpression(CounterExpression::Add, LHS, RHS));
130   return Simplify ? simplify(Cnt) : Cnt;
131 }
132 
133 Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS,
134                                            bool Simplify) {
135   auto Cnt = get(CounterExpression(CounterExpression::Subtract, LHS, RHS));
136   return Simplify ? simplify(Cnt) : Cnt;
137 }
138 
139 void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const {
140   switch (C.getKind()) {
141   case Counter::Zero:
142     OS << '0';
143     return;
144   case Counter::CounterValueReference:
145     OS << '#' << C.getCounterID();
146     break;
147   case Counter::Expression: {
148     if (C.getExpressionID() >= Expressions.size())
149       return;
150     const auto &E = Expressions[C.getExpressionID()];
151     OS << '(';
152     dump(E.LHS, OS);
153     OS << (E.Kind == CounterExpression::Subtract ? " - " : " + ");
154     dump(E.RHS, OS);
155     OS << ')';
156     break;
157   }
158   }
159   if (CounterValues.empty())
160     return;
161   Expected<int64_t> Value = evaluate(C);
162   if (auto E = Value.takeError()) {
163     consumeError(std::move(E));
164     return;
165   }
166   OS << '[' << *Value << ']';
167 }
168 
169 Expected<int64_t> CounterMappingContext::evaluate(const Counter &C) const {
170   switch (C.getKind()) {
171   case Counter::Zero:
172     return 0;
173   case Counter::CounterValueReference:
174     if (C.getCounterID() >= CounterValues.size())
175       return errorCodeToError(errc::argument_out_of_domain);
176     return CounterValues[C.getCounterID()];
177   case Counter::Expression: {
178     if (C.getExpressionID() >= Expressions.size())
179       return errorCodeToError(errc::argument_out_of_domain);
180     const auto &E = Expressions[C.getExpressionID()];
181     Expected<int64_t> LHS = evaluate(E.LHS);
182     if (!LHS)
183       return LHS;
184     Expected<int64_t> RHS = evaluate(E.RHS);
185     if (!RHS)
186       return RHS;
187     return E.Kind == CounterExpression::Subtract ? *LHS - *RHS : *LHS + *RHS;
188   }
189   }
190   llvm_unreachable("Unhandled CounterKind");
191 }
192 
193 unsigned CounterMappingContext::getMaxCounterID(const Counter &C) const {
194   switch (C.getKind()) {
195   case Counter::Zero:
196     return 0;
197   case Counter::CounterValueReference:
198     return C.getCounterID();
199   case Counter::Expression: {
200     if (C.getExpressionID() >= Expressions.size())
201       return 0;
202     const auto &E = Expressions[C.getExpressionID()];
203     return std::max(getMaxCounterID(E.LHS), getMaxCounterID(E.RHS));
204   }
205   }
206   llvm_unreachable("Unhandled CounterKind");
207 }
208 
209 void FunctionRecordIterator::skipOtherFiles() {
210   while (Current != Records.end() && !Filename.empty() &&
211          Filename != Current->Filenames[0])
212     ++Current;
213   if (Current == Records.end())
214     *this = FunctionRecordIterator();
215 }
216 
217 ArrayRef<unsigned> CoverageMapping::getImpreciseRecordIndicesForFilename(
218     StringRef Filename) const {
219   size_t FilenameHash = hash_value(Filename);
220   auto RecordIt = FilenameHash2RecordIndices.find(FilenameHash);
221   if (RecordIt == FilenameHash2RecordIndices.end())
222     return {};
223   return RecordIt->second;
224 }
225 
226 static unsigned getMaxCounterID(const CounterMappingContext &Ctx,
227                                 const CoverageMappingRecord &Record) {
228   unsigned MaxCounterID = 0;
229   for (const auto &Region : Record.MappingRegions) {
230     MaxCounterID = std::max(MaxCounterID, Ctx.getMaxCounterID(Region.Count));
231   }
232   return MaxCounterID;
233 }
234 
235 Error CoverageMapping::loadFunctionRecord(
236     const CoverageMappingRecord &Record,
237     IndexedInstrProfReader &ProfileReader) {
238   StringRef OrigFuncName = Record.FunctionName;
239   if (OrigFuncName.empty())
240     return make_error<CoverageMapError>(coveragemap_error::malformed);
241 
242   if (Record.Filenames.empty())
243     OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName);
244   else
245     OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
246 
247   CounterMappingContext Ctx(Record.Expressions);
248 
249   std::vector<uint64_t> Counts;
250   if (Error E = ProfileReader.getFunctionCounts(Record.FunctionName,
251                                                 Record.FunctionHash, Counts)) {
252     instrprof_error IPE = std::get<0>(InstrProfError::take(std::move(E)));
253     if (IPE == instrprof_error::hash_mismatch) {
254       FuncHashMismatches.emplace_back(std::string(Record.FunctionName),
255                                       Record.FunctionHash);
256       return Error::success();
257     } else if (IPE != instrprof_error::unknown_function)
258       return make_error<InstrProfError>(IPE);
259     Counts.assign(getMaxCounterID(Ctx, Record) + 1, 0);
260   }
261   Ctx.setCounts(Counts);
262 
263   assert(!Record.MappingRegions.empty() && "Function has no regions");
264 
265   // This coverage record is a zero region for a function that's unused in
266   // some TU, but used in a different TU. Ignore it. The coverage maps from the
267   // the other TU will either be loaded (providing full region counts) or they
268   // won't (in which case we don't unintuitively report functions as uncovered
269   // when they have non-zero counts in the profile).
270   if (Record.MappingRegions.size() == 1 &&
271       Record.MappingRegions[0].Count.isZero() && Counts[0] > 0)
272     return Error::success();
273 
274   FunctionRecord Function(OrigFuncName, Record.Filenames);
275   for (const auto &Region : Record.MappingRegions) {
276     Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
277     if (auto E = ExecutionCount.takeError()) {
278       consumeError(std::move(E));
279       return Error::success();
280     }
281     Expected<int64_t> AltExecutionCount = Ctx.evaluate(Region.FalseCount);
282     if (auto E = AltExecutionCount.takeError()) {
283       consumeError(std::move(E));
284       return Error::success();
285     }
286     Function.pushRegion(Region, *ExecutionCount, *AltExecutionCount);
287   }
288 
289   // Don't create records for (filenames, function) pairs we've already seen.
290   auto FilenamesHash = hash_combine_range(Record.Filenames.begin(),
291                                           Record.Filenames.end());
292   if (!RecordProvenance[FilenamesHash].insert(hash_value(OrigFuncName)).second)
293     return Error::success();
294 
295   Functions.push_back(std::move(Function));
296 
297   // Performance optimization: keep track of the indices of the function records
298   // which correspond to each filename. This can be used to substantially speed
299   // up queries for coverage info in a file.
300   unsigned RecordIndex = Functions.size() - 1;
301   for (StringRef Filename : Record.Filenames) {
302     auto &RecordIndices = FilenameHash2RecordIndices[hash_value(Filename)];
303     // Note that there may be duplicates in the filename set for a function
304     // record, because of e.g. macro expansions in the function in which both
305     // the macro and the function are defined in the same file.
306     if (RecordIndices.empty() || RecordIndices.back() != RecordIndex)
307       RecordIndices.push_back(RecordIndex);
308   }
309 
310   return Error::success();
311 }
312 
313 // This function is for memory optimization by shortening the lifetimes
314 // of CoverageMappingReader instances.
315 Error CoverageMapping::loadFromReaders(
316     ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
317     IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage) {
318   for (const auto &CoverageReader : CoverageReaders) {
319     for (auto RecordOrErr : *CoverageReader) {
320       if (Error E = RecordOrErr.takeError())
321         return E;
322       const auto &Record = *RecordOrErr;
323       if (Error E = Coverage.loadFunctionRecord(Record, ProfileReader))
324         return E;
325     }
326   }
327   return Error::success();
328 }
329 
330 Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
331     ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
332     IndexedInstrProfReader &ProfileReader) {
333   auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
334   if (Error E = loadFromReaders(CoverageReaders, ProfileReader, *Coverage))
335     return std::move(E);
336   return std::move(Coverage);
337 }
338 
339 // If E is a no_data_found error, returns success. Otherwise returns E.
340 static Error handleMaybeNoDataFoundError(Error E) {
341   return handleErrors(
342       std::move(E), [](const CoverageMapError &CME) {
343         if (CME.get() == coveragemap_error::no_data_found)
344           return static_cast<Error>(Error::success());
345         return make_error<CoverageMapError>(CME.get());
346       });
347 }
348 
349 Error CoverageMapping::loadFromFile(
350     StringRef Filename, StringRef Arch, StringRef CompilationDir,
351     IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage,
352     bool &DataFound, SmallVectorImpl<object::BuildID> *FoundBinaryIDs) {
353   auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN(
354       Filename, /*IsText=*/false, /*RequiresNullTerminator=*/false);
355   if (std::error_code EC = CovMappingBufOrErr.getError())
356     return createFileError(Filename, errorCodeToError(EC));
357   MemoryBufferRef CovMappingBufRef =
358       CovMappingBufOrErr.get()->getMemBufferRef();
359   SmallVector<std::unique_ptr<MemoryBuffer>, 4> Buffers;
360 
361   SmallVector<object::BuildIDRef> BinaryIDs;
362   auto CoverageReadersOrErr = BinaryCoverageReader::create(
363       CovMappingBufRef, Arch, Buffers, CompilationDir,
364       FoundBinaryIDs ? &BinaryIDs : nullptr);
365   if (Error E = CoverageReadersOrErr.takeError()) {
366     E = handleMaybeNoDataFoundError(std::move(E));
367     if (E)
368       return createFileError(Filename, std::move(E));
369     return E;
370   }
371 
372   SmallVector<std::unique_ptr<CoverageMappingReader>, 4> Readers;
373   for (auto &Reader : CoverageReadersOrErr.get())
374     Readers.push_back(std::move(Reader));
375   if (FoundBinaryIDs && !Readers.empty()) {
376     llvm::append_range(*FoundBinaryIDs,
377                        llvm::map_range(BinaryIDs, [](object::BuildIDRef BID) {
378                          return object::BuildID(BID);
379                        }));
380   }
381   DataFound |= !Readers.empty();
382   if (Error E = loadFromReaders(Readers, ProfileReader, Coverage))
383     return createFileError(Filename, std::move(E));
384   return Error::success();
385 }
386 
387 Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
388     ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename,
389     vfs::FileSystem &FS, ArrayRef<StringRef> Arches, StringRef CompilationDir,
390     const object::BuildIDFetcher *BIDFetcher, bool CheckBinaryIDs) {
391   auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename, FS);
392   if (Error E = ProfileReaderOrErr.takeError())
393     return createFileError(ProfileFilename, std::move(E));
394   auto ProfileReader = std::move(ProfileReaderOrErr.get());
395   auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
396   bool DataFound = false;
397 
398   auto GetArch = [&](size_t Idx) {
399     if (Arches.empty())
400       return StringRef();
401     if (Arches.size() == 1)
402       return Arches.front();
403     return Arches[Idx];
404   };
405 
406   SmallVector<object::BuildID> FoundBinaryIDs;
407   for (const auto &File : llvm::enumerate(ObjectFilenames)) {
408     if (Error E =
409             loadFromFile(File.value(), GetArch(File.index()), CompilationDir,
410                          *ProfileReader, *Coverage, DataFound, &FoundBinaryIDs))
411       return std::move(E);
412   }
413 
414   if (BIDFetcher) {
415     std::vector<object::BuildID> ProfileBinaryIDs;
416     if (Error E = ProfileReader->readBinaryIds(ProfileBinaryIDs))
417       return createFileError(ProfileFilename, std::move(E));
418 
419     SmallVector<object::BuildIDRef> BinaryIDsToFetch;
420     if (!ProfileBinaryIDs.empty()) {
421       const auto &Compare = [](object::BuildIDRef A, object::BuildIDRef B) {
422         return std::lexicographical_compare(A.begin(), A.end(), B.begin(),
423                                             B.end());
424       };
425       llvm::sort(FoundBinaryIDs, Compare);
426       std::set_difference(
427           ProfileBinaryIDs.begin(), ProfileBinaryIDs.end(),
428           FoundBinaryIDs.begin(), FoundBinaryIDs.end(),
429           std::inserter(BinaryIDsToFetch, BinaryIDsToFetch.end()), Compare);
430     }
431 
432     for (object::BuildIDRef BinaryID : BinaryIDsToFetch) {
433       std::optional<std::string> PathOpt = BIDFetcher->fetch(BinaryID);
434       if (PathOpt) {
435         std::string Path = std::move(*PathOpt);
436         StringRef Arch = Arches.size() == 1 ? Arches.front() : StringRef();
437         if (Error E = loadFromFile(Path, Arch, CompilationDir, *ProfileReader,
438                                   *Coverage, DataFound))
439           return std::move(E);
440       } else if (CheckBinaryIDs) {
441         return createFileError(
442             ProfileFilename,
443             createStringError(errc::no_such_file_or_directory,
444                               "Missing binary ID: " +
445                                   llvm::toHex(BinaryID, /*LowerCase=*/true)));
446       }
447     }
448   }
449 
450   if (!DataFound)
451     return createFileError(
452         join(ObjectFilenames.begin(), ObjectFilenames.end(), ", "),
453         make_error<CoverageMapError>(coveragemap_error::no_data_found));
454   return std::move(Coverage);
455 }
456 
457 namespace {
458 
459 /// Distributes functions into instantiation sets.
460 ///
461 /// An instantiation set is a collection of functions that have the same source
462 /// code, ie, template functions specializations.
463 class FunctionInstantiationSetCollector {
464   using MapT = std::map<LineColPair, std::vector<const FunctionRecord *>>;
465   MapT InstantiatedFunctions;
466 
467 public:
468   void insert(const FunctionRecord &Function, unsigned FileID) {
469     auto I = Function.CountedRegions.begin(), E = Function.CountedRegions.end();
470     while (I != E && I->FileID != FileID)
471       ++I;
472     assert(I != E && "function does not cover the given file");
473     auto &Functions = InstantiatedFunctions[I->startLoc()];
474     Functions.push_back(&Function);
475   }
476 
477   MapT::iterator begin() { return InstantiatedFunctions.begin(); }
478   MapT::iterator end() { return InstantiatedFunctions.end(); }
479 };
480 
481 class SegmentBuilder {
482   std::vector<CoverageSegment> &Segments;
483   SmallVector<const CountedRegion *, 8> ActiveRegions;
484 
485   SegmentBuilder(std::vector<CoverageSegment> &Segments) : Segments(Segments) {}
486 
487   /// Emit a segment with the count from \p Region starting at \p StartLoc.
488   //
489   /// \p IsRegionEntry: The segment is at the start of a new non-gap region.
490   /// \p EmitSkippedRegion: The segment must be emitted as a skipped region.
491   void startSegment(const CountedRegion &Region, LineColPair StartLoc,
492                     bool IsRegionEntry, bool EmitSkippedRegion = false) {
493     bool HasCount = !EmitSkippedRegion &&
494                     (Region.Kind != CounterMappingRegion::SkippedRegion);
495 
496     // If the new segment wouldn't affect coverage rendering, skip it.
497     if (!Segments.empty() && !IsRegionEntry && !EmitSkippedRegion) {
498       const auto &Last = Segments.back();
499       if (Last.HasCount == HasCount && Last.Count == Region.ExecutionCount &&
500           !Last.IsRegionEntry)
501         return;
502     }
503 
504     if (HasCount)
505       Segments.emplace_back(StartLoc.first, StartLoc.second,
506                             Region.ExecutionCount, IsRegionEntry,
507                             Region.Kind == CounterMappingRegion::GapRegion);
508     else
509       Segments.emplace_back(StartLoc.first, StartLoc.second, IsRegionEntry);
510 
511     LLVM_DEBUG({
512       const auto &Last = Segments.back();
513       dbgs() << "Segment at " << Last.Line << ":" << Last.Col
514              << " (count = " << Last.Count << ")"
515              << (Last.IsRegionEntry ? ", RegionEntry" : "")
516              << (!Last.HasCount ? ", Skipped" : "")
517              << (Last.IsGapRegion ? ", Gap" : "") << "\n";
518     });
519   }
520 
521   /// Emit segments for active regions which end before \p Loc.
522   ///
523   /// \p Loc: The start location of the next region. If std::nullopt, all active
524   /// regions are completed.
525   /// \p FirstCompletedRegion: Index of the first completed region.
526   void completeRegionsUntil(std::optional<LineColPair> Loc,
527                             unsigned FirstCompletedRegion) {
528     // Sort the completed regions by end location. This makes it simple to
529     // emit closing segments in sorted order.
530     auto CompletedRegionsIt = ActiveRegions.begin() + FirstCompletedRegion;
531     std::stable_sort(CompletedRegionsIt, ActiveRegions.end(),
532                       [](const CountedRegion *L, const CountedRegion *R) {
533                         return L->endLoc() < R->endLoc();
534                       });
535 
536     // Emit segments for all completed regions.
537     for (unsigned I = FirstCompletedRegion + 1, E = ActiveRegions.size(); I < E;
538          ++I) {
539       const auto *CompletedRegion = ActiveRegions[I];
540       assert((!Loc || CompletedRegion->endLoc() <= *Loc) &&
541              "Completed region ends after start of new region");
542 
543       const auto *PrevCompletedRegion = ActiveRegions[I - 1];
544       auto CompletedSegmentLoc = PrevCompletedRegion->endLoc();
545 
546       // Don't emit any more segments if they start where the new region begins.
547       if (Loc && CompletedSegmentLoc == *Loc)
548         break;
549 
550       // Don't emit a segment if the next completed region ends at the same
551       // location as this one.
552       if (CompletedSegmentLoc == CompletedRegion->endLoc())
553         continue;
554 
555       // Use the count from the last completed region which ends at this loc.
556       for (unsigned J = I + 1; J < E; ++J)
557         if (CompletedRegion->endLoc() == ActiveRegions[J]->endLoc())
558           CompletedRegion = ActiveRegions[J];
559 
560       startSegment(*CompletedRegion, CompletedSegmentLoc, false);
561     }
562 
563     auto Last = ActiveRegions.back();
564     if (FirstCompletedRegion && Last->endLoc() != *Loc) {
565       // If there's a gap after the end of the last completed region and the
566       // start of the new region, use the last active region to fill the gap.
567       startSegment(*ActiveRegions[FirstCompletedRegion - 1], Last->endLoc(),
568                    false);
569     } else if (!FirstCompletedRegion && (!Loc || *Loc != Last->endLoc())) {
570       // Emit a skipped segment if there are no more active regions. This
571       // ensures that gaps between functions are marked correctly.
572       startSegment(*Last, Last->endLoc(), false, true);
573     }
574 
575     // Pop the completed regions.
576     ActiveRegions.erase(CompletedRegionsIt, ActiveRegions.end());
577   }
578 
579   void buildSegmentsImpl(ArrayRef<CountedRegion> Regions) {
580     for (const auto &CR : enumerate(Regions)) {
581       auto CurStartLoc = CR.value().startLoc();
582 
583       // Active regions which end before the current region need to be popped.
584       auto CompletedRegions =
585           std::stable_partition(ActiveRegions.begin(), ActiveRegions.end(),
586                                 [&](const CountedRegion *Region) {
587                                   return !(Region->endLoc() <= CurStartLoc);
588                                 });
589       if (CompletedRegions != ActiveRegions.end()) {
590         unsigned FirstCompletedRegion =
591             std::distance(ActiveRegions.begin(), CompletedRegions);
592         completeRegionsUntil(CurStartLoc, FirstCompletedRegion);
593       }
594 
595       bool GapRegion = CR.value().Kind == CounterMappingRegion::GapRegion;
596 
597       // Try to emit a segment for the current region.
598       if (CurStartLoc == CR.value().endLoc()) {
599         // Avoid making zero-length regions active. If it's the last region,
600         // emit a skipped segment. Otherwise use its predecessor's count.
601         const bool Skipped =
602             (CR.index() + 1) == Regions.size() ||
603             CR.value().Kind == CounterMappingRegion::SkippedRegion;
604         startSegment(ActiveRegions.empty() ? CR.value() : *ActiveRegions.back(),
605                      CurStartLoc, !GapRegion, Skipped);
606         // If it is skipped segment, create a segment with last pushed
607         // regions's count at CurStartLoc.
608         if (Skipped && !ActiveRegions.empty())
609           startSegment(*ActiveRegions.back(), CurStartLoc, false);
610         continue;
611       }
612       if (CR.index() + 1 == Regions.size() ||
613           CurStartLoc != Regions[CR.index() + 1].startLoc()) {
614         // Emit a segment if the next region doesn't start at the same location
615         // as this one.
616         startSegment(CR.value(), CurStartLoc, !GapRegion);
617       }
618 
619       // This region is active (i.e not completed).
620       ActiveRegions.push_back(&CR.value());
621     }
622 
623     // Complete any remaining active regions.
624     if (!ActiveRegions.empty())
625       completeRegionsUntil(std::nullopt, 0);
626   }
627 
628   /// Sort a nested sequence of regions from a single file.
629   static void sortNestedRegions(MutableArrayRef<CountedRegion> Regions) {
630     llvm::sort(Regions, [](const CountedRegion &LHS, const CountedRegion &RHS) {
631       if (LHS.startLoc() != RHS.startLoc())
632         return LHS.startLoc() < RHS.startLoc();
633       if (LHS.endLoc() != RHS.endLoc())
634         // When LHS completely contains RHS, we sort LHS first.
635         return RHS.endLoc() < LHS.endLoc();
636       // If LHS and RHS cover the same area, we need to sort them according
637       // to their kinds so that the most suitable region will become "active"
638       // in combineRegions(). Because we accumulate counter values only from
639       // regions of the same kind as the first region of the area, prefer
640       // CodeRegion to ExpansionRegion and ExpansionRegion to SkippedRegion.
641       static_assert(CounterMappingRegion::CodeRegion <
642                             CounterMappingRegion::ExpansionRegion &&
643                         CounterMappingRegion::ExpansionRegion <
644                             CounterMappingRegion::SkippedRegion,
645                     "Unexpected order of region kind values");
646       return LHS.Kind < RHS.Kind;
647     });
648   }
649 
650   /// Combine counts of regions which cover the same area.
651   static ArrayRef<CountedRegion>
652   combineRegions(MutableArrayRef<CountedRegion> Regions) {
653     if (Regions.empty())
654       return Regions;
655     auto Active = Regions.begin();
656     auto End = Regions.end();
657     for (auto I = Regions.begin() + 1; I != End; ++I) {
658       if (Active->startLoc() != I->startLoc() ||
659           Active->endLoc() != I->endLoc()) {
660         // Shift to the next region.
661         ++Active;
662         if (Active != I)
663           *Active = *I;
664         continue;
665       }
666       // Merge duplicate region.
667       // If CodeRegions and ExpansionRegions cover the same area, it's probably
668       // a macro which is fully expanded to another macro. In that case, we need
669       // to accumulate counts only from CodeRegions, or else the area will be
670       // counted twice.
671       // On the other hand, a macro may have a nested macro in its body. If the
672       // outer macro is used several times, the ExpansionRegion for the nested
673       // macro will also be added several times. These ExpansionRegions cover
674       // the same source locations and have to be combined to reach the correct
675       // value for that area.
676       // We add counts of the regions of the same kind as the active region
677       // to handle the both situations.
678       if (I->Kind == Active->Kind)
679         Active->ExecutionCount += I->ExecutionCount;
680     }
681     return Regions.drop_back(std::distance(++Active, End));
682   }
683 
684 public:
685   /// Build a sorted list of CoverageSegments from a list of Regions.
686   static std::vector<CoverageSegment>
687   buildSegments(MutableArrayRef<CountedRegion> Regions) {
688     std::vector<CoverageSegment> Segments;
689     SegmentBuilder Builder(Segments);
690 
691     sortNestedRegions(Regions);
692     ArrayRef<CountedRegion> CombinedRegions = combineRegions(Regions);
693 
694     LLVM_DEBUG({
695       dbgs() << "Combined regions:\n";
696       for (const auto &CR : CombinedRegions)
697         dbgs() << "  " << CR.LineStart << ":" << CR.ColumnStart << " -> "
698                << CR.LineEnd << ":" << CR.ColumnEnd
699                << " (count=" << CR.ExecutionCount << ")\n";
700     });
701 
702     Builder.buildSegmentsImpl(CombinedRegions);
703 
704 #ifndef NDEBUG
705     for (unsigned I = 1, E = Segments.size(); I < E; ++I) {
706       const auto &L = Segments[I - 1];
707       const auto &R = Segments[I];
708       if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col < R.Col)) {
709         if (L.Line == R.Line && L.Col == R.Col && !L.HasCount)
710           continue;
711         LLVM_DEBUG(dbgs() << " ! Segment " << L.Line << ":" << L.Col
712                           << " followed by " << R.Line << ":" << R.Col << "\n");
713         assert(false && "Coverage segments not unique or sorted");
714       }
715     }
716 #endif
717 
718     return Segments;
719   }
720 };
721 
722 } // end anonymous namespace
723 
724 std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
725   std::vector<StringRef> Filenames;
726   for (const auto &Function : getCoveredFunctions())
727     llvm::append_range(Filenames, Function.Filenames);
728   llvm::sort(Filenames);
729   auto Last = std::unique(Filenames.begin(), Filenames.end());
730   Filenames.erase(Last, Filenames.end());
731   return Filenames;
732 }
733 
734 static SmallBitVector gatherFileIDs(StringRef SourceFile,
735                                     const FunctionRecord &Function) {
736   SmallBitVector FilenameEquivalence(Function.Filenames.size(), false);
737   for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
738     if (SourceFile == Function.Filenames[I])
739       FilenameEquivalence[I] = true;
740   return FilenameEquivalence;
741 }
742 
743 /// Return the ID of the file where the definition of the function is located.
744 static std::optional<unsigned>
745 findMainViewFileID(const FunctionRecord &Function) {
746   SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true);
747   for (const auto &CR : Function.CountedRegions)
748     if (CR.Kind == CounterMappingRegion::ExpansionRegion)
749       IsNotExpandedFile[CR.ExpandedFileID] = false;
750   int I = IsNotExpandedFile.find_first();
751   if (I == -1)
752     return std::nullopt;
753   return I;
754 }
755 
756 /// Check if SourceFile is the file that contains the definition of
757 /// the Function. Return the ID of the file in that case or std::nullopt
758 /// otherwise.
759 static std::optional<unsigned>
760 findMainViewFileID(StringRef SourceFile, const FunctionRecord &Function) {
761   std::optional<unsigned> I = findMainViewFileID(Function);
762   if (I && SourceFile == Function.Filenames[*I])
763     return I;
764   return std::nullopt;
765 }
766 
767 static bool isExpansion(const CountedRegion &R, unsigned FileID) {
768   return R.Kind == CounterMappingRegion::ExpansionRegion && R.FileID == FileID;
769 }
770 
771 CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
772   CoverageData FileCoverage(Filename);
773   std::vector<CountedRegion> Regions;
774 
775   // Look up the function records in the given file. Due to hash collisions on
776   // the filename, we may get back some records that are not in the file.
777   ArrayRef<unsigned> RecordIndices =
778       getImpreciseRecordIndicesForFilename(Filename);
779   for (unsigned RecordIndex : RecordIndices) {
780     const FunctionRecord &Function = Functions[RecordIndex];
781     auto MainFileID = findMainViewFileID(Filename, Function);
782     auto FileIDs = gatherFileIDs(Filename, Function);
783     for (const auto &CR : Function.CountedRegions)
784       if (FileIDs.test(CR.FileID)) {
785         Regions.push_back(CR);
786         if (MainFileID && isExpansion(CR, *MainFileID))
787           FileCoverage.Expansions.emplace_back(CR, Function);
788       }
789     // Capture branch regions specific to the function (excluding expansions).
790     for (const auto &CR : Function.CountedBranchRegions)
791       if (FileIDs.test(CR.FileID) && (CR.FileID == CR.ExpandedFileID))
792         FileCoverage.BranchRegions.push_back(CR);
793   }
794 
795   LLVM_DEBUG(dbgs() << "Emitting segments for file: " << Filename << "\n");
796   FileCoverage.Segments = SegmentBuilder::buildSegments(Regions);
797 
798   return FileCoverage;
799 }
800 
801 std::vector<InstantiationGroup>
802 CoverageMapping::getInstantiationGroups(StringRef Filename) const {
803   FunctionInstantiationSetCollector InstantiationSetCollector;
804   // Look up the function records in the given file. Due to hash collisions on
805   // the filename, we may get back some records that are not in the file.
806   ArrayRef<unsigned> RecordIndices =
807       getImpreciseRecordIndicesForFilename(Filename);
808   for (unsigned RecordIndex : RecordIndices) {
809     const FunctionRecord &Function = Functions[RecordIndex];
810     auto MainFileID = findMainViewFileID(Filename, Function);
811     if (!MainFileID)
812       continue;
813     InstantiationSetCollector.insert(Function, *MainFileID);
814   }
815 
816   std::vector<InstantiationGroup> Result;
817   for (auto &InstantiationSet : InstantiationSetCollector) {
818     InstantiationGroup IG{InstantiationSet.first.first,
819                           InstantiationSet.first.second,
820                           std::move(InstantiationSet.second)};
821     Result.emplace_back(std::move(IG));
822   }
823   return Result;
824 }
825 
826 CoverageData
827 CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) const {
828   auto MainFileID = findMainViewFileID(Function);
829   if (!MainFileID)
830     return CoverageData();
831 
832   CoverageData FunctionCoverage(Function.Filenames[*MainFileID]);
833   std::vector<CountedRegion> Regions;
834   for (const auto &CR : Function.CountedRegions)
835     if (CR.FileID == *MainFileID) {
836       Regions.push_back(CR);
837       if (isExpansion(CR, *MainFileID))
838         FunctionCoverage.Expansions.emplace_back(CR, Function);
839     }
840   // Capture branch regions specific to the function (excluding expansions).
841   for (const auto &CR : Function.CountedBranchRegions)
842     if (CR.FileID == *MainFileID)
843       FunctionCoverage.BranchRegions.push_back(CR);
844 
845   LLVM_DEBUG(dbgs() << "Emitting segments for function: " << Function.Name
846                     << "\n");
847   FunctionCoverage.Segments = SegmentBuilder::buildSegments(Regions);
848 
849   return FunctionCoverage;
850 }
851 
852 CoverageData CoverageMapping::getCoverageForExpansion(
853     const ExpansionRecord &Expansion) const {
854   CoverageData ExpansionCoverage(
855       Expansion.Function.Filenames[Expansion.FileID]);
856   std::vector<CountedRegion> Regions;
857   for (const auto &CR : Expansion.Function.CountedRegions)
858     if (CR.FileID == Expansion.FileID) {
859       Regions.push_back(CR);
860       if (isExpansion(CR, Expansion.FileID))
861         ExpansionCoverage.Expansions.emplace_back(CR, Expansion.Function);
862     }
863   for (const auto &CR : Expansion.Function.CountedBranchRegions)
864     // Capture branch regions that only pertain to the corresponding expansion.
865     if (CR.FileID == Expansion.FileID)
866       ExpansionCoverage.BranchRegions.push_back(CR);
867 
868   LLVM_DEBUG(dbgs() << "Emitting segments for expansion of file "
869                     << Expansion.FileID << "\n");
870   ExpansionCoverage.Segments = SegmentBuilder::buildSegments(Regions);
871 
872   return ExpansionCoverage;
873 }
874 
875 LineCoverageStats::LineCoverageStats(
876     ArrayRef<const CoverageSegment *> LineSegments,
877     const CoverageSegment *WrappedSegment, unsigned Line)
878     : ExecutionCount(0), HasMultipleRegions(false), Mapped(false), Line(Line),
879       LineSegments(LineSegments), WrappedSegment(WrappedSegment) {
880   // Find the minimum number of regions which start in this line.
881   unsigned MinRegionCount = 0;
882   auto isStartOfRegion = [](const CoverageSegment *S) {
883     return !S->IsGapRegion && S->HasCount && S->IsRegionEntry;
884   };
885   for (unsigned I = 0; I < LineSegments.size() && MinRegionCount < 2; ++I)
886     if (isStartOfRegion(LineSegments[I]))
887       ++MinRegionCount;
888 
889   bool StartOfSkippedRegion = !LineSegments.empty() &&
890                               !LineSegments.front()->HasCount &&
891                               LineSegments.front()->IsRegionEntry;
892 
893   HasMultipleRegions = MinRegionCount > 1;
894   Mapped =
895       !StartOfSkippedRegion &&
896       ((WrappedSegment && WrappedSegment->HasCount) || (MinRegionCount > 0));
897 
898   if (!Mapped)
899     return;
900 
901   // Pick the max count from the non-gap, region entry segments and the
902   // wrapped count.
903   if (WrappedSegment)
904     ExecutionCount = WrappedSegment->Count;
905   if (!MinRegionCount)
906     return;
907   for (const auto *LS : LineSegments)
908     if (isStartOfRegion(LS))
909       ExecutionCount = std::max(ExecutionCount, LS->Count);
910 }
911 
912 LineCoverageIterator &LineCoverageIterator::operator++() {
913   if (Next == CD.end()) {
914     Stats = LineCoverageStats();
915     Ended = true;
916     return *this;
917   }
918   if (Segments.size())
919     WrappedSegment = Segments.back();
920   Segments.clear();
921   while (Next != CD.end() && Next->Line == Line)
922     Segments.push_back(&*Next++);
923   Stats = LineCoverageStats(Segments, WrappedSegment, Line);
924   ++Line;
925   return *this;
926 }
927 
928 static std::string getCoverageMapErrString(coveragemap_error Err) {
929   switch (Err) {
930   case coveragemap_error::success:
931     return "Success";
932   case coveragemap_error::eof:
933     return "End of File";
934   case coveragemap_error::no_data_found:
935     return "No coverage data found";
936   case coveragemap_error::unsupported_version:
937     return "Unsupported coverage format version";
938   case coveragemap_error::truncated:
939     return "Truncated coverage data";
940   case coveragemap_error::malformed:
941     return "Malformed coverage data";
942   case coveragemap_error::decompression_failed:
943     return "Failed to decompress coverage data (zlib)";
944   case coveragemap_error::invalid_or_missing_arch_specifier:
945     return "`-arch` specifier is invalid or missing for universal binary";
946   }
947   llvm_unreachable("A value of coveragemap_error has no message.");
948 }
949 
950 namespace {
951 
952 // FIXME: This class is only here to support the transition to llvm::Error. It
953 // will be removed once this transition is complete. Clients should prefer to
954 // deal with the Error value directly, rather than converting to error_code.
955 class CoverageMappingErrorCategoryType : public std::error_category {
956   const char *name() const noexcept override { return "llvm.coveragemap"; }
957   std::string message(int IE) const override {
958     return getCoverageMapErrString(static_cast<coveragemap_error>(IE));
959   }
960 };
961 
962 } // end anonymous namespace
963 
964 std::string CoverageMapError::message() const {
965   return getCoverageMapErrString(Err);
966 }
967 
968 const std::error_category &llvm::coverage::coveragemap_category() {
969   static CoverageMappingErrorCategoryType ErrorCategory;
970   return ErrorCategory;
971 }
972 
973 char CoverageMapError::ID = 0;
974