1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Spawn and orchestrate separate fuzzing processes.
9 //===----------------------------------------------------------------------===//
10
11 #include "FuzzerCommand.h"
12 #include "FuzzerFork.h"
13 #include "FuzzerIO.h"
14 #include "FuzzerInternal.h"
15 #include "FuzzerMerge.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerTracePC.h"
18 #include "FuzzerUtil.h"
19
20 #include <atomic>
21 #include <chrono>
22 #include <condition_variable>
23 #include <fstream>
24 #include <memory>
25 #include <mutex>
26 #include <queue>
27 #include <sstream>
28 #include <thread>
29
30 namespace fuzzer {
31
32 struct Stats {
33 size_t number_of_executed_units = 0;
34 size_t peak_rss_mb = 0;
35 size_t average_exec_per_sec = 0;
36 };
37
ParseFinalStatsFromLog(const std::string & LogPath)38 static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39 std::ifstream In(LogPath);
40 std::string Line;
41 Stats Res;
42 struct {
43 const char *Name;
44 size_t *Var;
45 } NameVarPairs[] = {
46 {"stat::number_of_executed_units:", &Res.number_of_executed_units},
47 {"stat::peak_rss_mb:", &Res.peak_rss_mb},
48 {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49 {nullptr, nullptr},
50 };
51 while (std::getline(In, Line, '\n')) {
52 if (Line.find("stat::") != 0) continue;
53 std::istringstream ISS(Line);
54 std::string Name;
55 size_t Val;
56 ISS >> Name >> Val;
57 for (size_t i = 0; NameVarPairs[i].Name; i++)
58 if (Name == NameVarPairs[i].Name)
59 *NameVarPairs[i].Var = Val;
60 }
61 return Res;
62 }
63
64 struct FuzzJob {
65 // Inputs.
66 Command Cmd;
67 std::string CorpusDir;
68 std::string FeaturesDir;
69 std::string LogPath;
70 std::string SeedListPath;
71 std::string CFPath;
72 size_t JobId;
73
74 int DftTimeInSeconds = 0;
75
76 // Fuzzing Outputs.
77 int ExitCode;
78
~FuzzJobfuzzer::FuzzJob79 ~FuzzJob() {
80 RemoveFile(CFPath);
81 RemoveFile(LogPath);
82 RemoveFile(SeedListPath);
83 RmDirRecursive(CorpusDir);
84 RmDirRecursive(FeaturesDir);
85 }
86 };
87
88 struct GlobalEnv {
89 Vector<std::string> Args;
90 Vector<std::string> CorpusDirs;
91 std::string MainCorpusDir;
92 std::string TempDir;
93 std::string DFTDir;
94 std::string DataFlowBinary;
95 Set<uint32_t> Features, Cov;
96 Set<std::string> FilesWithDFT;
97 Vector<std::string> Files;
98 Random *Rand;
99 std::chrono::system_clock::time_point ProcessStartTime;
100 int Verbosity = 0;
101
102 size_t NumTimeouts = 0;
103 size_t NumOOMs = 0;
104 size_t NumCrashes = 0;
105
106
107 size_t NumRuns = 0;
108
StopFilefuzzer::GlobalEnv109 std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
110
secondsSinceProcessStartUpfuzzer::GlobalEnv111 size_t secondsSinceProcessStartUp() const {
112 return std::chrono::duration_cast<std::chrono::seconds>(
113 std::chrono::system_clock::now() - ProcessStartTime)
114 .count();
115 }
116
CreateNewJobfuzzer::GlobalEnv117 FuzzJob *CreateNewJob(size_t JobId) {
118 Command Cmd(Args);
119 Cmd.removeFlag("fork");
120 Cmd.removeFlag("runs");
121 Cmd.removeFlag("collect_data_flow");
122 for (auto &C : CorpusDirs) // Remove all corpora from the args.
123 Cmd.removeArgument(C);
124 Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload.
125 Cmd.addFlag("print_final_stats", "1");
126 Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing.
127 Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
128 Cmd.addFlag("stop_file", StopFile());
129 if (!DataFlowBinary.empty()) {
130 Cmd.addFlag("data_flow_trace", DFTDir);
131 if (!Cmd.hasFlag("focus_function"))
132 Cmd.addFlag("focus_function", "auto");
133 }
134 auto Job = new FuzzJob;
135 std::string Seeds;
136 if (size_t CorpusSubsetSize =
137 std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
138 auto Time1 = std::chrono::system_clock::now();
139 for (size_t i = 0; i < CorpusSubsetSize; i++) {
140 auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
141 Seeds += (Seeds.empty() ? "" : ",") + SF;
142 CollectDFT(SF);
143 }
144 auto Time2 = std::chrono::system_clock::now();
145 Job->DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
146 }
147 if (!Seeds.empty()) {
148 Job->SeedListPath =
149 DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
150 WriteToFile(Seeds, Job->SeedListPath);
151 Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
152 }
153 Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
154 Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
155 Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
156 Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
157 Job->JobId = JobId;
158
159
160 Cmd.addArgument(Job->CorpusDir);
161 Cmd.addFlag("features_dir", Job->FeaturesDir);
162
163 for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
164 RmDirRecursive(D);
165 MkDir(D);
166 }
167
168 Cmd.setOutputFile(Job->LogPath);
169 Cmd.combineOutAndErr();
170
171 Job->Cmd = Cmd;
172
173 if (Verbosity >= 2)
174 Printf("Job %zd/%p Created: %s\n", JobId, Job,
175 Job->Cmd.toString().c_str());
176 // Start from very short runs and gradually increase them.
177 return Job;
178 }
179
RunOneMergeJobfuzzer::GlobalEnv180 int RunOneMergeJob(FuzzJob *Job) {
181 auto Stats = ParseFinalStatsFromLog(Job->LogPath);
182 NumRuns += Stats.number_of_executed_units;
183
184 Vector<SizedFile> TempFiles, MergeCandidates;
185 // Read all newly created inputs and their feature sets.
186 // Choose only those inputs that have new features.
187 int Res = GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
188 if (Res != 0)
189 return Res;
190 std::sort(TempFiles.begin(), TempFiles.end());
191 for (auto &F : TempFiles) {
192 auto FeatureFile = F.File;
193 FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
194 auto FeatureBytes = FileToVector(FeatureFile, 0, false);
195 assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
196 Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
197 memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
198 for (auto Ft : NewFeatures) {
199 if (!Features.count(Ft)) {
200 MergeCandidates.push_back(F);
201 break;
202 }
203 }
204 }
205 // if (!FilesToAdd.empty() || Job->ExitCode != 0)
206 Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
207 "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
208 NumRuns, Cov.size(), Features.size(), Files.size(),
209 Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
210 secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
211
212 if (MergeCandidates.empty()) return 0;
213
214 Vector<std::string> FilesToAdd;
215 Set<uint32_t> NewFeatures, NewCov;
216 CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
217 &NewFeatures, Cov, &NewCov, Job->CFPath, false);
218 if (Fuzzer::isGracefulExitRequested())
219 return 0;
220 for (auto &Path : FilesToAdd) {
221 auto U = FileToVector(Path);
222 auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
223 WriteToFile(U, NewPath);
224 Files.push_back(NewPath);
225 }
226 Features.insert(NewFeatures.begin(), NewFeatures.end());
227 Cov.insert(NewCov.begin(), NewCov.end());
228 for (auto Idx : NewCov)
229 if (auto *TE = TPC.PCTableEntryByIdx(Idx))
230 if (TPC.PcIsFuncEntry(TE))
231 PrintPC(" NEW_FUNC: %p %F %L\n", "",
232 TPC.GetNextInstructionPc(TE->PC));
233 return 0;
234 }
235
236
CollectDFTfuzzer::GlobalEnv237 void CollectDFT(const std::string &InputPath) {
238 if (DataFlowBinary.empty()) return;
239 if (!FilesWithDFT.insert(InputPath).second) return;
240 Command Cmd(Args);
241 Cmd.removeFlag("fork");
242 Cmd.removeFlag("runs");
243 Cmd.addFlag("data_flow_trace", DFTDir);
244 Cmd.addArgument(InputPath);
245 for (auto &C : CorpusDirs) // Remove all corpora from the args.
246 Cmd.removeArgument(C);
247 Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
248 Cmd.combineOutAndErr();
249 // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
250 ExecuteCommand(Cmd);
251 }
252
253 };
254
255 struct JobQueue {
256 std::queue<FuzzJob *> Qu;
257 std::mutex Mu;
258 std::condition_variable Cv;
259
Pushfuzzer::JobQueue260 void Push(FuzzJob *Job) {
261 {
262 std::lock_guard<std::mutex> Lock(Mu);
263 Qu.push(Job);
264 }
265 Cv.notify_one();
266 }
Popfuzzer::JobQueue267 FuzzJob *Pop() {
268 std::unique_lock<std::mutex> Lk(Mu);
269 // std::lock_guard<std::mutex> Lock(Mu);
270 Cv.wait(Lk, [&]{return !Qu.empty();});
271 assert(!Qu.empty());
272 auto Job = Qu.front();
273 Qu.pop();
274 return Job;
275 }
276 };
277
WorkerThread(JobQueue * FuzzQ,JobQueue * MergeQ)278 void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
279 while (auto Job = FuzzQ->Pop()) {
280 // Printf("WorkerThread: job %p\n", Job);
281 Job->ExitCode = ExecuteCommand(Job->Cmd);
282 MergeQ->Push(Job);
283 }
284 }
285
286 // This is just a skeleton of an experimental -fork=1 feature.
FuzzWithFork(Random & Rand,const FuzzingOptions & Options,const Vector<std::string> & Args,const Vector<std::string> & CorpusDirs,int NumJobs)287 int FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
288 const Vector<std::string> &Args,
289 const Vector<std::string> &CorpusDirs, int NumJobs) {
290 Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
291
292 GlobalEnv Env;
293 Env.Args = Args;
294 Env.CorpusDirs = CorpusDirs;
295 Env.Rand = &Rand;
296 Env.Verbosity = Options.Verbosity;
297 Env.ProcessStartTime = std::chrono::system_clock::now();
298 Env.DataFlowBinary = Options.CollectDataFlow;
299
300 Vector<SizedFile> SeedFiles;
301 int Res;
302 for (auto &Dir : CorpusDirs) {
303 Res = GetSizedFilesFromDir(Dir, &SeedFiles);
304 if (Res != 0)
305 return Res;
306 }
307 std::sort(SeedFiles.begin(), SeedFiles.end());
308 Env.TempDir = TempPath("FuzzWithFork", ".dir");
309 Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
310 RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs.
311 MkDir(Env.TempDir);
312 MkDir(Env.DFTDir);
313
314
315 if (CorpusDirs.empty())
316 MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
317 else
318 Env.MainCorpusDir = CorpusDirs[0];
319
320 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
321 Res = CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
322 {}, &Env.Cov,
323 CFPath, false);
324 if (Res != 0)
325 return Res;
326 if (Fuzzer::isGracefulExitRequested())
327 return 0;
328
329 RemoveFile(CFPath);
330 Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
331 Env.Files.size(), Env.TempDir.c_str());
332
333 int ExitCode = 0;
334
335 JobQueue FuzzQ, MergeQ;
336
337 auto StopJobs = [&]() {
338 for (int i = 0; i < NumJobs; i++)
339 FuzzQ.Push(nullptr);
340 MergeQ.Push(nullptr);
341 WriteToFile(Unit({1}), Env.StopFile());
342 };
343
344 size_t JobId = 1;
345 Vector<std::thread> Threads;
346 for (int t = 0; t < NumJobs; t++) {
347 Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
348 FuzzQ.Push(Env.CreateNewJob(JobId++));
349 }
350
351 while (true) {
352 std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
353 if (!Job)
354 break;
355 ExitCode = Job->ExitCode;
356 if (ExitCode == Options.InterruptExitCode) {
357 Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
358 StopJobs();
359 break;
360 }
361 if (Fuzzer::MaybeExitGracefully())
362 return 0;
363
364 Res = Env.RunOneMergeJob(Job.get());
365 if (Res != 0)
366 return Res;
367 if (Fuzzer::isGracefulExitRequested())
368 return 0;
369
370 // Continue if our crash is one of the ignorred ones.
371 if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
372 Env.NumTimeouts++;
373 else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
374 Env.NumOOMs++;
375 else if (ExitCode != 0) {
376 Env.NumCrashes++;
377 if (Options.IgnoreCrashes) {
378 std::ifstream In(Job->LogPath);
379 std::string Line;
380 while (std::getline(In, Line, '\n'))
381 if (Line.find("ERROR:") != Line.npos ||
382 Line.find("runtime error:") != Line.npos)
383 Printf("%s\n", Line.c_str());
384 } else {
385 // And exit if we don't ignore this crash.
386 Printf("INFO: log from the inner process:\n%s",
387 FileToString(Job->LogPath).c_str());
388 StopJobs();
389 break;
390 }
391 }
392
393 // Stop if we are over the time budget.
394 // This is not precise, since other threads are still running
395 // and we will wait while joining them.
396 // We also don't stop instantly: other jobs need to finish.
397 if (Options.MaxTotalTimeSec > 0 &&
398 Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
399 Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
400 Env.secondsSinceProcessStartUp());
401 StopJobs();
402 break;
403 }
404 if (Env.NumRuns >= Options.MaxNumberOfRuns) {
405 Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
406 Env.NumRuns);
407 StopJobs();
408 break;
409 }
410
411 FuzzQ.Push(Env.CreateNewJob(JobId++));
412 }
413
414 for (auto &T : Threads)
415 T.join();
416
417 // The workers have terminated. Don't try to remove the directory before they
418 // terminate to avoid a race condition preventing cleanup on Windows.
419 RmDirRecursive(Env.TempDir);
420
421 // Use the exit code from the last child process.
422 Printf("INFO: exiting: %d time: %zds\n", ExitCode,
423 Env.secondsSinceProcessStartUp());
424 return ExitCode;
425 }
426
427 } // namespace fuzzer
428