1 //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Merging Corpora.
9 //
10 // The task:
11 //   Take the existing corpus (possibly empty) and merge new inputs into
12 //   it so that only inputs with new coverage ('features') are added.
13 //   The process should tolerate the crashes, OOMs, leaks, etc.
14 //
15 // Algorithm:
16 //   The outter process collects the set of files and writes their names
17 //   into a temporary "control" file, then repeatedly launches the inner
18 //   process until all inputs are processed.
19 //   The outer process does not actually execute the target code.
20 //
21 //   The inner process reads the control file and sees a) list of all the inputs
22 //   and b) the last processed input. Then it starts processing the inputs one
23 //   by one. Before processing every input it writes one line to control file:
24 //   STARTED INPUT_ID INPUT_SIZE
25 //   After processing an input it write another line:
26 //   DONE INPUT_ID Feature1 Feature2 Feature3 ...
27 //   If a crash happens while processing an input the last line in the control
28 //   file will be "STARTED INPUT_ID" and so the next process will know
29 //   where to resume.
30 //
31 //   Once all inputs are processed by the innner process(es) the outer process
32 //   reads the control files and does the merge based entirely on the contents
33 //   of control file.
34 //   It uses a single pass greedy algorithm choosing first the smallest inputs
35 //   within the same size the inputs that have more new features.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #ifndef LLVM_FUZZER_MERGE_H
40 #define LLVM_FUZZER_MERGE_H
41 
42 #include "FuzzerDefs.h"
43 
44 #include <istream>
45 #include <ostream>
46 #include <set>
47 #include <vector>
48 
49 namespace fuzzer {
50 
51 struct MergeFileInfo {
52   std::string Name;
53   size_t Size = 0;
54   Vector<uint32_t> Features, Cov;
55 };
56 
57 struct Merger {
58   Vector<MergeFileInfo> Files;
59   size_t NumFilesInFirstCorpus = 0;
60   size_t FirstNotProcessedFile = 0;
61   std::string LastFailure;
62 
63   bool Parse(std::istream &IS, bool ParseCoverage);
64   bool Parse(const std::string &Str, bool ParseCoverage);
65   void ParseOrExit(std::istream &IS, bool ParseCoverage);
66   size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
67                const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
68                Vector<std::string> *NewFiles);
69   size_t ApproximateMemoryConsumption() const;
70   Set<uint32_t> AllFeatures() const;
71 };
72 
73 void CrashResistantMerge(const Vector<std::string> &Args,
74                          const Vector<SizedFile> &OldCorpus,
75                          const Vector<SizedFile> &NewCorpus,
76                          Vector<std::string> *NewFiles,
77                          const Set<uint32_t> &InitialFeatures,
78                          Set<uint32_t> *NewFeatures,
79                          const Set<uint32_t> &InitialCov,
80                          Set<uint32_t> *NewCov,
81                          const std::string &CFPath,
82                          bool Verbose);
83 
84 }  // namespace fuzzer
85 
86 #endif  // LLVM_FUZZER_MERGE_H
87