1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Spawn and orchestrate separate fuzzing processes.
9 //===----------------------------------------------------------------------===//
10 
11 #include "FuzzerCommand.h"
12 #include "FuzzerFork.h"
13 #include "FuzzerIO.h"
14 #include "FuzzerInternal.h"
15 #include "FuzzerMerge.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerTracePC.h"
18 #include "FuzzerUtil.h"
19 
20 #include <atomic>
21 #include <chrono>
22 #include <condition_variable>
23 #include <fstream>
24 #include <memory>
25 #include <mutex>
26 #include <queue>
27 #include <sstream>
28 #include <thread>
29 
30 namespace fuzzer {
31 
32 struct Stats {
33   size_t number_of_executed_units = 0;
34   size_t peak_rss_mb = 0;
35   size_t average_exec_per_sec = 0;
36 };
37 
38 static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39   std::ifstream In(LogPath);
40   std::string Line;
41   Stats Res;
42   struct {
43     const char *Name;
44     size_t *Var;
45   } NameVarPairs[] = {
46       {"stat::number_of_executed_units:", &Res.number_of_executed_units},
47       {"stat::peak_rss_mb:", &Res.peak_rss_mb},
48       {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49       {nullptr, nullptr},
50   };
51   while (std::getline(In, Line, '\n')) {
52     if (Line.find("stat::") != 0) continue;
53     std::istringstream ISS(Line);
54     std::string Name;
55     size_t Val;
56     ISS >> Name >> Val;
57     for (size_t i = 0; NameVarPairs[i].Name; i++)
58       if (Name == NameVarPairs[i].Name)
59         *NameVarPairs[i].Var = Val;
60   }
61   return Res;
62 }
63 
64 struct FuzzJob {
65   // Inputs.
66   Command Cmd;
67   std::string CorpusDir;
68   std::string FeaturesDir;
69   std::string LogPath;
70   std::string SeedListPath;
71   std::string CFPath;
72   size_t      JobId;
73 
74   int         DftTimeInSeconds = 0;
75 
76   // Fuzzing Outputs.
77   int ExitCode;
78 
79   ~FuzzJob() {
80     RemoveFile(CFPath);
81     RemoveFile(LogPath);
82     RemoveFile(SeedListPath);
83     RmDirRecursive(CorpusDir);
84     RmDirRecursive(FeaturesDir);
85   }
86 };
87 
88 struct GlobalEnv {
89   std::vector<std::string> Args;
90   std::vector<std::string> CorpusDirs;
91   std::string MainCorpusDir;
92   std::string TempDir;
93   std::string DFTDir;
94   std::string DataFlowBinary;
95   std::set<uint32_t> Features, Cov;
96   std::set<std::string> FilesWithDFT;
97   std::vector<std::string> Files;
98   std::vector<std::size_t> FilesSizes;
99   Random *Rand;
100   std::chrono::system_clock::time_point ProcessStartTime;
101   int Verbosity = 0;
102   int Group = 0;
103   int NumCorpuses = 8;
104 
105   size_t NumTimeouts = 0;
106   size_t NumOOMs = 0;
107   size_t NumCrashes = 0;
108 
109 
110   size_t NumRuns = 0;
111 
112   std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
113 
114   size_t secondsSinceProcessStartUp() const {
115     return std::chrono::duration_cast<std::chrono::seconds>(
116                std::chrono::system_clock::now() - ProcessStartTime)
117         .count();
118   }
119 
120   FuzzJob *CreateNewJob(size_t JobId) {
121     Command Cmd(Args);
122     Cmd.removeFlag("fork");
123     Cmd.removeFlag("runs");
124     Cmd.removeFlag("collect_data_flow");
125     for (auto &C : CorpusDirs) // Remove all corpora from the args.
126       Cmd.removeArgument(C);
127     Cmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
128     Cmd.addFlag("print_final_stats", "1");
129     Cmd.addFlag("print_funcs", "0");  // no need to spend time symbolizing.
130     Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
131     Cmd.addFlag("stop_file", StopFile());
132     if (!DataFlowBinary.empty()) {
133       Cmd.addFlag("data_flow_trace", DFTDir);
134       if (!Cmd.hasFlag("focus_function"))
135         Cmd.addFlag("focus_function", "auto");
136     }
137     auto Job = new FuzzJob;
138     std::string Seeds;
139     if (size_t CorpusSubsetSize =
140             std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
141       auto Time1 = std::chrono::system_clock::now();
142       if (Group) { // whether to group the corpus.
143         size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
144         size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
145         for (size_t i = 0; i < CorpusSubsetSize; i++) {
146           size_t RandNum = (*Rand)(AverageCorpusSize);
147           size_t Index = RandNum + StartIndex;
148           Index = Index < Files.size() ? Index
149                                        : Rand->SkewTowardsLast(Files.size());
150           auto &SF = Files[Index];
151           Seeds += (Seeds.empty() ? "" : ",") + SF;
152           CollectDFT(SF);
153         }
154       } else {
155         for (size_t i = 0; i < CorpusSubsetSize; i++) {
156           auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
157           Seeds += (Seeds.empty() ? "" : ",") + SF;
158           CollectDFT(SF);
159         }
160       }
161       auto Time2 = std::chrono::system_clock::now();
162       auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
163       assert(DftTimeInSeconds < std::numeric_limits<int>::max());
164       Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds);
165     }
166     if (!Seeds.empty()) {
167       Job->SeedListPath =
168           DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
169       WriteToFile(Seeds, Job->SeedListPath);
170       Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
171     }
172     Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
173     Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
174     Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
175     Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
176     Job->JobId = JobId;
177 
178 
179     Cmd.addArgument(Job->CorpusDir);
180     Cmd.addFlag("features_dir", Job->FeaturesDir);
181 
182     for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
183       RmDirRecursive(D);
184       MkDir(D);
185     }
186 
187     Cmd.setOutputFile(Job->LogPath);
188     Cmd.combineOutAndErr();
189 
190     Job->Cmd = Cmd;
191 
192     if (Verbosity >= 2)
193       Printf("Job %zd/%p Created: %s\n", JobId, Job,
194              Job->Cmd.toString().c_str());
195     // Start from very short runs and gradually increase them.
196     return Job;
197   }
198 
199   void RunOneMergeJob(FuzzJob *Job) {
200     auto Stats = ParseFinalStatsFromLog(Job->LogPath);
201     NumRuns += Stats.number_of_executed_units;
202 
203     std::vector<SizedFile> TempFiles, MergeCandidates;
204     // Read all newly created inputs and their feature sets.
205     // Choose only those inputs that have new features.
206     GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
207     std::sort(TempFiles.begin(), TempFiles.end());
208     for (auto &F : TempFiles) {
209       auto FeatureFile = F.File;
210       FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
211       auto FeatureBytes = FileToVector(FeatureFile, 0, false);
212       assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
213       std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
214       memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
215       for (auto Ft : NewFeatures) {
216         if (!Features.count(Ft)) {
217           MergeCandidates.push_back(F);
218           break;
219         }
220       }
221     }
222     // if (!FilesToAdd.empty() || Job->ExitCode != 0)
223     Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
224            "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
225            NumRuns, Cov.size(), Features.size(), Files.size(),
226            Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
227            secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
228 
229     if (MergeCandidates.empty()) return;
230 
231     std::vector<std::string> FilesToAdd;
232     std::set<uint32_t> NewFeatures, NewCov;
233     bool IsSetCoverMerge =
234         !Job->Cmd.getFlagValue("set_cover_merge").compare("1");
235     CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
236                         &NewFeatures, Cov, &NewCov, Job->CFPath, false,
237                         IsSetCoverMerge);
238     for (auto &Path : FilesToAdd) {
239       auto U = FileToVector(Path);
240       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
241       WriteToFile(U, NewPath);
242       if (Group) { // Insert the queue according to the size of the seed.
243         size_t UnitSize = U.size();
244         auto Idx =
245             std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
246             FilesSizes.begin();
247         FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
248         Files.insert(Files.begin() + Idx, NewPath);
249       } else {
250         Files.push_back(NewPath);
251       }
252     }
253     Features.insert(NewFeatures.begin(), NewFeatures.end());
254     Cov.insert(NewCov.begin(), NewCov.end());
255     for (auto Idx : NewCov)
256       if (auto *TE = TPC.PCTableEntryByIdx(Idx))
257         if (TPC.PcIsFuncEntry(TE))
258           PrintPC("  NEW_FUNC: %p %F %L\n", "",
259                   TPC.GetNextInstructionPc(TE->PC));
260   }
261 
262   void CollectDFT(const std::string &InputPath) {
263     if (DataFlowBinary.empty()) return;
264     if (!FilesWithDFT.insert(InputPath).second) return;
265     Command Cmd(Args);
266     Cmd.removeFlag("fork");
267     Cmd.removeFlag("runs");
268     Cmd.addFlag("data_flow_trace", DFTDir);
269     Cmd.addArgument(InputPath);
270     for (auto &C : CorpusDirs) // Remove all corpora from the args.
271       Cmd.removeArgument(C);
272     Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
273     Cmd.combineOutAndErr();
274     // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
275     ExecuteCommand(Cmd);
276   }
277 
278 };
279 
280 struct JobQueue {
281   std::queue<FuzzJob *> Qu;
282   std::mutex Mu;
283   std::condition_variable Cv;
284 
285   void Push(FuzzJob *Job) {
286     {
287       std::lock_guard<std::mutex> Lock(Mu);
288       Qu.push(Job);
289     }
290     Cv.notify_one();
291   }
292   FuzzJob *Pop() {
293     std::unique_lock<std::mutex> Lk(Mu);
294     // std::lock_guard<std::mutex> Lock(Mu);
295     Cv.wait(Lk, [&]{return !Qu.empty();});
296     assert(!Qu.empty());
297     auto Job = Qu.front();
298     Qu.pop();
299     return Job;
300   }
301 };
302 
303 void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
304   while (auto Job = FuzzQ->Pop()) {
305     // Printf("WorkerThread: job %p\n", Job);
306     Job->ExitCode = ExecuteCommand(Job->Cmd);
307     MergeQ->Push(Job);
308   }
309 }
310 
311 // This is just a skeleton of an experimental -fork=1 feature.
312 void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
313                   const std::vector<std::string> &Args,
314                   const std::vector<std::string> &CorpusDirs, int NumJobs) {
315   Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
316 
317   GlobalEnv Env;
318   Env.Args = Args;
319   Env.CorpusDirs = CorpusDirs;
320   Env.Rand = &Rand;
321   Env.Verbosity = Options.Verbosity;
322   Env.ProcessStartTime = std::chrono::system_clock::now();
323   Env.DataFlowBinary = Options.CollectDataFlow;
324   Env.Group = Options.ForkCorpusGroups;
325 
326   std::vector<SizedFile> SeedFiles;
327   for (auto &Dir : CorpusDirs)
328     GetSizedFilesFromDir(Dir, &SeedFiles);
329   std::sort(SeedFiles.begin(), SeedFiles.end());
330   Env.TempDir = TempPath("FuzzWithFork", ".dir");
331   Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
332   RmDirRecursive(Env.TempDir);  // in case there is a leftover from old runs.
333   MkDir(Env.TempDir);
334   MkDir(Env.DFTDir);
335 
336 
337   if (CorpusDirs.empty())
338     MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
339   else
340     Env.MainCorpusDir = CorpusDirs[0];
341 
342   if (Options.KeepSeed) {
343     for (auto &File : SeedFiles)
344       Env.Files.push_back(File.File);
345   } else {
346     auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
347     std::set<uint32_t> NewFeatures, NewCov;
348     CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
349                         &NewFeatures, Env.Cov, &NewCov, CFPath,
350                         /*Verbose=*/false, /*IsSetCoverMerge=*/false);
351     Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
352     Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
353     RemoveFile(CFPath);
354   }
355 
356   if (Env.Group) {
357     for (auto &path : Env.Files)
358       Env.FilesSizes.push_back(FileSize(path));
359   }
360 
361   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
362          Env.Files.size(), Env.TempDir.c_str());
363 
364   int ExitCode = 0;
365 
366   JobQueue FuzzQ, MergeQ;
367 
368   auto StopJobs = [&]() {
369     for (int i = 0; i < NumJobs; i++)
370       FuzzQ.Push(nullptr);
371     MergeQ.Push(nullptr);
372     WriteToFile(Unit({1}), Env.StopFile());
373   };
374 
375   size_t MergeCycle = 20;
376   size_t JobExecuted = 0;
377   size_t JobId = 1;
378   std::vector<std::thread> Threads;
379   for (int t = 0; t < NumJobs; t++) {
380     Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
381     FuzzQ.Push(Env.CreateNewJob(JobId++));
382   }
383 
384   while (true) {
385     std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
386     if (!Job)
387       break;
388     ExitCode = Job->ExitCode;
389     if (ExitCode == Options.InterruptExitCode) {
390       Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
391       StopJobs();
392       break;
393     }
394     Fuzzer::MaybeExitGracefully();
395 
396     Env.RunOneMergeJob(Job.get());
397 
398     // merge the corpus .
399     JobExecuted++;
400     if (Env.Group && JobExecuted >= MergeCycle) {
401       std::vector<SizedFile> CurrentSeedFiles;
402       for (auto &Dir : CorpusDirs)
403         GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
404       std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
405 
406       auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
407       std::set<uint32_t> TmpNewFeatures, TmpNewCov;
408       std::set<uint32_t> TmpFeatures, TmpCov;
409       Env.Files.clear();
410       Env.FilesSizes.clear();
411       CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
412                           TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
413                           CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false);
414       for (auto &path : Env.Files)
415         Env.FilesSizes.push_back(FileSize(path));
416       RemoveFile(CFPath);
417       JobExecuted = 0;
418       MergeCycle += 5;
419     }
420 
421     // Since the number of corpus seeds will gradually increase, in order to
422     // control the number in each group to be about three times the number of
423     // seeds selected each time, the number of groups is dynamically adjusted.
424     if (Env.Files.size() < 2000)
425       Env.NumCorpuses = 12;
426     else if (Env.Files.size() < 6000)
427       Env.NumCorpuses = 20;
428     else if (Env.Files.size() < 12000)
429       Env.NumCorpuses = 32;
430     else if (Env.Files.size() < 16000)
431       Env.NumCorpuses = 40;
432     else if (Env.Files.size() < 24000)
433       Env.NumCorpuses = 60;
434     else
435       Env.NumCorpuses = 80;
436 
437     // Continue if our crash is one of the ignored ones.
438     if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
439       Env.NumTimeouts++;
440     else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
441       Env.NumOOMs++;
442     else if (ExitCode != 0) {
443       Env.NumCrashes++;
444       if (Options.IgnoreCrashes) {
445         std::ifstream In(Job->LogPath);
446         std::string Line;
447         while (std::getline(In, Line, '\n'))
448           if (Line.find("ERROR:") != Line.npos ||
449               Line.find("runtime error:") != Line.npos)
450             Printf("%s\n", Line.c_str());
451       } else {
452         // And exit if we don't ignore this crash.
453         Printf("INFO: log from the inner process:\n%s",
454                FileToString(Job->LogPath).c_str());
455         StopJobs();
456         break;
457       }
458     }
459 
460     // Stop if we are over the time budget.
461     // This is not precise, since other threads are still running
462     // and we will wait while joining them.
463     // We also don't stop instantly: other jobs need to finish.
464     if (Options.MaxTotalTimeSec > 0 &&
465         Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
466       Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
467              Env.secondsSinceProcessStartUp());
468       StopJobs();
469       break;
470     }
471     if (Env.NumRuns >= Options.MaxNumberOfRuns) {
472       Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
473              Env.NumRuns);
474       StopJobs();
475       break;
476     }
477 
478     FuzzQ.Push(Env.CreateNewJob(JobId++));
479   }
480 
481   for (auto &T : Threads)
482     T.join();
483 
484   // The workers have terminated. Don't try to remove the directory before they
485   // terminate to avoid a race condition preventing cleanup on Windows.
486   RmDirRecursive(Env.TempDir);
487 
488   // Use the exit code from the last child process.
489   Printf("INFO: exiting: %d time: %zds\n", ExitCode,
490          Env.secondsSinceProcessStartUp());
491   exit(ExitCode);
492 }
493 
494 } // namespace fuzzer
495