1 //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Merging Corpora.
9 //
10 // The task:
11 //   Take the existing corpus (possibly empty) and merge new inputs into
12 //   it so that only inputs with new coverage ('features') are added.
13 //   The process should tolerate the crashes, OOMs, leaks, etc.
14 //
15 // Algorithm:
16 //   The outer process collects the set of files and writes their names
17 //   into a temporary "control" file, then repeatedly launches the inner
18 //   process until all inputs are processed.
19 //   The outer process does not actually execute the target code.
20 //
21 //   The inner process reads the control file and sees a) list of all the inputs
22 //   and b) the last processed input. Then it starts processing the inputs one
23 //   by one. Before processing every input it writes one line to control file:
24 //   STARTED INPUT_ID INPUT_SIZE
25 //   After processing an input it writes the following lines:
26 //   FT INPUT_ID Feature1 Feature2 Feature3 ...
27 //   COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
28 //   If a crash happens while processing an input the last line in the control
29 //   file will be "STARTED INPUT_ID" and so the next process will know
30 //   where to resume.
31 //
32 //   Once all inputs are processed by the inner process(es) the outer process
33 //   reads the control files and does the merge based entirely on the contents
34 //   of control file.
35 //   It uses a single pass greedy algorithm choosing first the smallest inputs
36 //   within the same size the inputs that have more new features.
37 //
38 //===----------------------------------------------------------------------===//
39 
40 #ifndef LLVM_FUZZER_MERGE_H
41 #define LLVM_FUZZER_MERGE_H
42 
43 #include "FuzzerDefs.h"
44 
45 #include <istream>
46 #include <ostream>
47 #include <set>
48 #include <vector>
49 
50 namespace fuzzer {
51 
52 struct MergeFileInfo {
53   std::string Name;
54   size_t Size = 0;
55   Vector<uint32_t> Features, Cov;
56 };
57 
58 struct Merger {
59   Vector<MergeFileInfo> Files;
60   size_t NumFilesInFirstCorpus = 0;
61   size_t FirstNotProcessedFile = 0;
62   std::string LastFailure;
63 
64   bool Parse(std::istream &IS, bool ParseCoverage);
65   bool Parse(const std::string &Str, bool ParseCoverage);
66   void ParseOrExit(std::istream &IS, bool ParseCoverage);
67   size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
68                const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
69                Vector<std::string> *NewFiles);
70   size_t ApproximateMemoryConsumption() const;
71   Set<uint32_t> AllFeatures() const;
72 };
73 
74 void CrashResistantMerge(const Vector<std::string> &Args,
75                          const Vector<SizedFile> &OldCorpus,
76                          const Vector<SizedFile> &NewCorpus,
77                          Vector<std::string> *NewFiles,
78                          const Set<uint32_t> &InitialFeatures,
79                          Set<uint32_t> *NewFeatures,
80                          const Set<uint32_t> &InitialCov,
81                          Set<uint32_t> *NewCov,
82                          const std::string &CFPath,
83                          bool Verbose);
84 
85 }  // namespace fuzzer
86 
87 #endif  // LLVM_FUZZER_MERGE_H
88