1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Spawn and orchestrate separate fuzzing processes.
9 //===----------------------------------------------------------------------===//
10 
11 #include "FuzzerCommand.h"
12 #include "FuzzerFork.h"
13 #include "FuzzerIO.h"
14 #include "FuzzerInternal.h"
15 #include "FuzzerMerge.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerTracePC.h"
18 #include "FuzzerUtil.h"
19 
20 #include <atomic>
21 #include <chrono>
22 #include <condition_variable>
23 #include <fstream>
24 #include <memory>
25 #include <mutex>
26 #include <queue>
27 #include <sstream>
28 #include <thread>
29 
30 namespace fuzzer {
31 
32 struct Stats {
33   size_t number_of_executed_units = 0;
34   size_t peak_rss_mb = 0;
35   size_t average_exec_per_sec = 0;
36 };
37 
ParseFinalStatsFromLog(const std::string & LogPath)38 static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39   std::ifstream In(LogPath);
40   std::string Line;
41   Stats Res;
42   struct {
43     const char *Name;
44     size_t *Var;
45   } NameVarPairs[] = {
46       {"stat::number_of_executed_units:", &Res.number_of_executed_units},
47       {"stat::peak_rss_mb:", &Res.peak_rss_mb},
48       {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49       {nullptr, nullptr},
50   };
51   while (std::getline(In, Line, '\n')) {
52     if (Line.find("stat::") != 0) continue;
53     std::istringstream ISS(Line);
54     std::string Name;
55     size_t Val;
56     ISS >> Name >> Val;
57     for (size_t i = 0; NameVarPairs[i].Name; i++)
58       if (Name == NameVarPairs[i].Name)
59         *NameVarPairs[i].Var = Val;
60   }
61   return Res;
62 }
63 
64 struct FuzzJob {
65   // Inputs.
66   Command Cmd;
67   std::string CorpusDir;
68   std::string FeaturesDir;
69   std::string LogPath;
70   std::string SeedListPath;
71   std::string CFPath;
72   size_t      JobId;
73 
74   int         DftTimeInSeconds = 0;
75 
76   // Fuzzing Outputs.
77   int ExitCode;
78 
~FuzzJobfuzzer::FuzzJob79   ~FuzzJob() {
80     RemoveFile(CFPath);
81     RemoveFile(LogPath);
82     RemoveFile(SeedListPath);
83     RmDirRecursive(CorpusDir);
84     RmDirRecursive(FeaturesDir);
85   }
86 };
87 
88 struct GlobalEnv {
89   Vector<std::string> Args;
90   Vector<std::string> CorpusDirs;
91   std::string MainCorpusDir;
92   std::string TempDir;
93   std::string DFTDir;
94   std::string DataFlowBinary;
95   Set<uint32_t> Features, Cov;
96   Set<std::string> FilesWithDFT;
97   Vector<std::string> Files;
98   Random *Rand;
99   std::chrono::system_clock::time_point ProcessStartTime;
100   int Verbosity = 0;
101 
102   size_t NumTimeouts = 0;
103   size_t NumOOMs = 0;
104   size_t NumCrashes = 0;
105 
106 
107   size_t NumRuns = 0;
108 
StopFilefuzzer::GlobalEnv109   std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
110 
secondsSinceProcessStartUpfuzzer::GlobalEnv111   size_t secondsSinceProcessStartUp() const {
112     return std::chrono::duration_cast<std::chrono::seconds>(
113                std::chrono::system_clock::now() - ProcessStartTime)
114         .count();
115   }
116 
CreateNewJobfuzzer::GlobalEnv117   FuzzJob *CreateNewJob(size_t JobId) {
118     Command Cmd(Args);
119     Cmd.removeFlag("fork");
120     Cmd.removeFlag("runs");
121     Cmd.removeFlag("collect_data_flow");
122     for (auto &C : CorpusDirs) // Remove all corpora from the args.
123       Cmd.removeArgument(C);
124     Cmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
125     Cmd.addFlag("print_final_stats", "1");
126     Cmd.addFlag("print_funcs", "0");  // no need to spend time symbolizing.
127     Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
128     Cmd.addFlag("stop_file", StopFile());
129     if (!DataFlowBinary.empty()) {
130       Cmd.addFlag("data_flow_trace", DFTDir);
131       if (!Cmd.hasFlag("focus_function"))
132         Cmd.addFlag("focus_function", "auto");
133     }
134     auto Job = new FuzzJob;
135     std::string Seeds;
136     if (size_t CorpusSubsetSize =
137             std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
138       auto Time1 = std::chrono::system_clock::now();
139       for (size_t i = 0; i < CorpusSubsetSize; i++) {
140         auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
141         Seeds += (Seeds.empty() ? "" : ",") + SF;
142         CollectDFT(SF);
143       }
144       auto Time2 = std::chrono::system_clock::now();
145       Job->DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
146     }
147     if (!Seeds.empty()) {
148       Job->SeedListPath =
149           DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
150       WriteToFile(Seeds, Job->SeedListPath);
151       Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
152     }
153     Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
154     Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
155     Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
156     Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
157     Job->JobId = JobId;
158 
159 
160     Cmd.addArgument(Job->CorpusDir);
161     Cmd.addFlag("features_dir", Job->FeaturesDir);
162 
163     for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
164       RmDirRecursive(D);
165       MkDir(D);
166     }
167 
168     Cmd.setOutputFile(Job->LogPath);
169     Cmd.combineOutAndErr();
170 
171     Job->Cmd = Cmd;
172 
173     if (Verbosity >= 2)
174       Printf("Job %zd/%p Created: %s\n", JobId, Job,
175              Job->Cmd.toString().c_str());
176     // Start from very short runs and gradually increase them.
177     return Job;
178   }
179 
RunOneMergeJobfuzzer::GlobalEnv180   int RunOneMergeJob(FuzzJob *Job) {
181     auto Stats = ParseFinalStatsFromLog(Job->LogPath);
182     NumRuns += Stats.number_of_executed_units;
183 
184     Vector<SizedFile> TempFiles, MergeCandidates;
185     // Read all newly created inputs and their feature sets.
186     // Choose only those inputs that have new features.
187     int Res = GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
188     if (Res != 0)
189       return Res;
190     std::sort(TempFiles.begin(), TempFiles.end());
191     for (auto &F : TempFiles) {
192       auto FeatureFile = F.File;
193       FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
194       auto FeatureBytes = FileToVector(FeatureFile, 0, false);
195       assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
196       Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
197       memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
198       for (auto Ft : NewFeatures) {
199         if (!Features.count(Ft)) {
200           MergeCandidates.push_back(F);
201           break;
202         }
203       }
204     }
205     // if (!FilesToAdd.empty() || Job->ExitCode != 0)
206     Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
207            "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
208            NumRuns, Cov.size(), Features.size(), Files.size(),
209            Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
210            secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
211 
212     if (MergeCandidates.empty()) return 0;
213 
214     Vector<std::string> FilesToAdd;
215     Set<uint32_t> NewFeatures, NewCov;
216     CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
217                         &NewFeatures, Cov, &NewCov, Job->CFPath, false);
218     if (Fuzzer::isGracefulExitRequested())
219       return 0;
220     for (auto &Path : FilesToAdd) {
221       auto U = FileToVector(Path);
222       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
223       WriteToFile(U, NewPath);
224       Files.push_back(NewPath);
225     }
226     Features.insert(NewFeatures.begin(), NewFeatures.end());
227     Cov.insert(NewCov.begin(), NewCov.end());
228     for (auto Idx : NewCov)
229       if (auto *TE = TPC.PCTableEntryByIdx(Idx))
230         if (TPC.PcIsFuncEntry(TE))
231           PrintPC("  NEW_FUNC: %p %F %L\n", "",
232                   TPC.GetNextInstructionPc(TE->PC));
233     return 0;
234   }
235 
236 
CollectDFTfuzzer::GlobalEnv237   void CollectDFT(const std::string &InputPath) {
238     if (DataFlowBinary.empty()) return;
239     if (!FilesWithDFT.insert(InputPath).second) return;
240     Command Cmd(Args);
241     Cmd.removeFlag("fork");
242     Cmd.removeFlag("runs");
243     Cmd.addFlag("data_flow_trace", DFTDir);
244     Cmd.addArgument(InputPath);
245     for (auto &C : CorpusDirs) // Remove all corpora from the args.
246       Cmd.removeArgument(C);
247     Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
248     Cmd.combineOutAndErr();
249     // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
250     ExecuteCommand(Cmd);
251   }
252 
253 };
254 
255 struct JobQueue {
256   std::queue<FuzzJob *> Qu;
257   std::mutex Mu;
258   std::condition_variable Cv;
259 
Pushfuzzer::JobQueue260   void Push(FuzzJob *Job) {
261     {
262       std::lock_guard<std::mutex> Lock(Mu);
263       Qu.push(Job);
264     }
265     Cv.notify_one();
266   }
Popfuzzer::JobQueue267   FuzzJob *Pop() {
268     std::unique_lock<std::mutex> Lk(Mu);
269     // std::lock_guard<std::mutex> Lock(Mu);
270     Cv.wait(Lk, [&]{return !Qu.empty();});
271     assert(!Qu.empty());
272     auto Job = Qu.front();
273     Qu.pop();
274     return Job;
275   }
276 };
277 
WorkerThread(JobQueue * FuzzQ,JobQueue * MergeQ)278 void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
279   while (auto Job = FuzzQ->Pop()) {
280     // Printf("WorkerThread: job %p\n", Job);
281     Job->ExitCode = ExecuteCommand(Job->Cmd);
282     MergeQ->Push(Job);
283   }
284 }
285 
286 // This is just a skeleton of an experimental -fork=1 feature.
FuzzWithFork(Random & Rand,const FuzzingOptions & Options,const Vector<std::string> & Args,const Vector<std::string> & CorpusDirs,int NumJobs)287 int FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
288                   const Vector<std::string> &Args,
289                   const Vector<std::string> &CorpusDirs, int NumJobs) {
290   Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
291 
292   GlobalEnv Env;
293   Env.Args = Args;
294   Env.CorpusDirs = CorpusDirs;
295   Env.Rand = &Rand;
296   Env.Verbosity = Options.Verbosity;
297   Env.ProcessStartTime = std::chrono::system_clock::now();
298   Env.DataFlowBinary = Options.CollectDataFlow;
299 
300   Vector<SizedFile> SeedFiles;
301   int Res;
302   for (auto &Dir : CorpusDirs) {
303     Res = GetSizedFilesFromDir(Dir, &SeedFiles);
304     if (Res != 0)
305       return Res;
306   }
307   std::sort(SeedFiles.begin(), SeedFiles.end());
308   Env.TempDir = TempPath("FuzzWithFork", ".dir");
309   Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
310   RmDirRecursive(Env.TempDir);  // in case there is a leftover from old runs.
311   MkDir(Env.TempDir);
312   MkDir(Env.DFTDir);
313 
314 
315   if (CorpusDirs.empty())
316     MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
317   else
318     Env.MainCorpusDir = CorpusDirs[0];
319 
320   auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
321   Res = CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
322                       {}, &Env.Cov,
323                       CFPath, false);
324   if (Res != 0)
325     return Res;
326   if (Fuzzer::isGracefulExitRequested())
327     return 0;
328 
329   RemoveFile(CFPath);
330   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
331          Env.Files.size(), Env.TempDir.c_str());
332 
333   int ExitCode = 0;
334 
335   JobQueue FuzzQ, MergeQ;
336 
337   auto StopJobs = [&]() {
338     for (int i = 0; i < NumJobs; i++)
339       FuzzQ.Push(nullptr);
340     MergeQ.Push(nullptr);
341     WriteToFile(Unit({1}), Env.StopFile());
342   };
343 
344   size_t JobId = 1;
345   Vector<std::thread> Threads;
346   for (int t = 0; t < NumJobs; t++) {
347     Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
348     FuzzQ.Push(Env.CreateNewJob(JobId++));
349   }
350 
351   while (true) {
352     std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
353     if (!Job)
354       break;
355     ExitCode = Job->ExitCode;
356     if (ExitCode == Options.InterruptExitCode) {
357       Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
358       StopJobs();
359       break;
360     }
361     if (Fuzzer::MaybeExitGracefully())
362       return 0;
363 
364     Res = Env.RunOneMergeJob(Job.get());
365     if (Res != 0)
366       return Res;
367     if (Fuzzer::isGracefulExitRequested())
368       return 0;
369 
370     // Continue if our crash is one of the ignorred ones.
371     if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
372       Env.NumTimeouts++;
373     else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
374       Env.NumOOMs++;
375     else if (ExitCode != 0) {
376       Env.NumCrashes++;
377       if (Options.IgnoreCrashes) {
378         std::ifstream In(Job->LogPath);
379         std::string Line;
380         while (std::getline(In, Line, '\n'))
381           if (Line.find("ERROR:") != Line.npos ||
382               Line.find("runtime error:") != Line.npos)
383             Printf("%s\n", Line.c_str());
384       } else {
385         // And exit if we don't ignore this crash.
386         Printf("INFO: log from the inner process:\n%s",
387                FileToString(Job->LogPath).c_str());
388         StopJobs();
389         break;
390       }
391     }
392 
393     // Stop if we are over the time budget.
394     // This is not precise, since other threads are still running
395     // and we will wait while joining them.
396     // We also don't stop instantly: other jobs need to finish.
397     if (Options.MaxTotalTimeSec > 0 &&
398         Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
399       Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
400              Env.secondsSinceProcessStartUp());
401       StopJobs();
402       break;
403     }
404     if (Env.NumRuns >= Options.MaxNumberOfRuns) {
405       Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
406              Env.NumRuns);
407       StopJobs();
408       break;
409     }
410 
411     FuzzQ.Push(Env.CreateNewJob(JobId++));
412   }
413 
414   for (auto &T : Threads)
415     T.join();
416 
417   // The workers have terminated. Don't try to remove the directory before they
418   // terminate to avoid a race condition preventing cleanup on Windows.
419   RmDirRecursive(Env.TempDir);
420 
421   // Use the exit code from the last child process.
422   Printf("INFO: exiting: %d time: %zds\n", ExitCode,
423          Env.secondsSinceProcessStartUp());
424   return ExitCode;
425 }
426 
427 } // namespace fuzzer
428