forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			410 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			410 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Spawn and orchestrate separate fuzzing processes.
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "FuzzerCommand.h"
 | |
| #include "FuzzerFork.h"
 | |
| #include "FuzzerIO.h"
 | |
| #include "FuzzerInternal.h"
 | |
| #include "FuzzerMerge.h"
 | |
| #include "FuzzerSHA1.h"
 | |
| #include "FuzzerTracePC.h"
 | |
| #include "FuzzerUtil.h"
 | |
| 
 | |
| #include <atomic>
 | |
| #include <chrono>
 | |
| #include <condition_variable>
 | |
| #include <fstream>
 | |
| #include <memory>
 | |
| #include <mutex>
 | |
| #include <queue>
 | |
| #include <sstream>
 | |
| #include <thread>
 | |
| 
 | |
| namespace fuzzer {
 | |
| 
 | |
| struct Stats {
 | |
|   size_t number_of_executed_units = 0;
 | |
|   size_t peak_rss_mb = 0;
 | |
|   size_t average_exec_per_sec = 0;
 | |
| };
 | |
| 
 | |
| static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
 | |
|   std::ifstream In(LogPath);
 | |
|   std::string Line;
 | |
|   Stats Res;
 | |
|   struct {
 | |
|     const char *Name;
 | |
|     size_t *Var;
 | |
|   } NameVarPairs[] = {
 | |
|       {"stat::number_of_executed_units:", &Res.number_of_executed_units},
 | |
|       {"stat::peak_rss_mb:", &Res.peak_rss_mb},
 | |
|       {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
 | |
|       {nullptr, nullptr},
 | |
|   };
 | |
|   while (std::getline(In, Line, '\n')) {
 | |
|     if (Line.find("stat::") != 0) continue;
 | |
|     std::istringstream ISS(Line);
 | |
|     std::string Name;
 | |
|     size_t Val;
 | |
|     ISS >> Name >> Val;
 | |
|     for (size_t i = 0; NameVarPairs[i].Name; i++)
 | |
|       if (Name == NameVarPairs[i].Name)
 | |
|         *NameVarPairs[i].Var = Val;
 | |
|   }
 | |
|   return Res;
 | |
| }
 | |
| 
 | |
| struct FuzzJob {
 | |
|   // Inputs.
 | |
|   Command Cmd;
 | |
|   std::string CorpusDir;
 | |
|   std::string FeaturesDir;
 | |
|   std::string LogPath;
 | |
|   std::string SeedListPath;
 | |
|   std::string CFPath;
 | |
|   size_t      JobId;
 | |
| 
 | |
|   int         DftTimeInSeconds = 0;
 | |
| 
 | |
|   // Fuzzing Outputs.
 | |
|   int ExitCode;
 | |
| 
 | |
|   ~FuzzJob() {
 | |
|     RemoveFile(CFPath);
 | |
|     RemoveFile(LogPath);
 | |
|     RemoveFile(SeedListPath);
 | |
|     RmDirRecursive(CorpusDir);
 | |
|     RmDirRecursive(FeaturesDir);
 | |
|   }
 | |
| };
 | |
| 
 | |
| struct GlobalEnv {
 | |
|   Vector<std::string> Args;
 | |
|   Vector<std::string> CorpusDirs;
 | |
|   std::string MainCorpusDir;
 | |
|   std::string TempDir;
 | |
|   std::string DFTDir;
 | |
|   std::string DataFlowBinary;
 | |
|   Set<uint32_t> Features, Cov;
 | |
|   Set<std::string> FilesWithDFT;
 | |
|   Vector<std::string> Files;
 | |
|   Random *Rand;
 | |
|   std::chrono::system_clock::time_point ProcessStartTime;
 | |
|   int Verbosity = 0;
 | |
| 
 | |
|   size_t NumTimeouts = 0;
 | |
|   size_t NumOOMs = 0;
 | |
|   size_t NumCrashes = 0;
 | |
| 
 | |
| 
 | |
|   size_t NumRuns = 0;
 | |
| 
 | |
|   std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
 | |
| 
 | |
|   size_t secondsSinceProcessStartUp() const {
 | |
|     return std::chrono::duration_cast<std::chrono::seconds>(
 | |
|                std::chrono::system_clock::now() - ProcessStartTime)
 | |
|         .count();
 | |
|   }
 | |
| 
 | |
|   FuzzJob *CreateNewJob(size_t JobId) {
 | |
|     Command Cmd(Args);
 | |
|     Cmd.removeFlag("fork");
 | |
|     Cmd.removeFlag("runs");
 | |
|     Cmd.removeFlag("collect_data_flow");
 | |
|     for (auto &C : CorpusDirs) // Remove all corpora from the args.
 | |
|       Cmd.removeArgument(C);
 | |
|     Cmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
 | |
|     Cmd.addFlag("print_final_stats", "1");
 | |
|     Cmd.addFlag("print_funcs", "0");  // no need to spend time symbolizing.
 | |
|     Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
 | |
|     Cmd.addFlag("stop_file", StopFile());
 | |
|     if (!DataFlowBinary.empty()) {
 | |
|       Cmd.addFlag("data_flow_trace", DFTDir);
 | |
|       if (!Cmd.hasFlag("focus_function"))
 | |
|         Cmd.addFlag("focus_function", "auto");
 | |
|     }
 | |
|     auto Job = new FuzzJob;
 | |
|     std::string Seeds;
 | |
|     if (size_t CorpusSubsetSize =
 | |
|             std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
 | |
|       auto Time1 = std::chrono::system_clock::now();
 | |
|       for (size_t i = 0; i < CorpusSubsetSize; i++) {
 | |
|         auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
 | |
|         Seeds += (Seeds.empty() ? "" : ",") + SF;
 | |
|         CollectDFT(SF);
 | |
|       }
 | |
|       auto Time2 = std::chrono::system_clock::now();
 | |
|       Job->DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
 | |
|     }
 | |
|     if (!Seeds.empty()) {
 | |
|       Job->SeedListPath =
 | |
|           DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
 | |
|       WriteToFile(Seeds, Job->SeedListPath);
 | |
|       Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
 | |
|     }
 | |
|     Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
 | |
|     Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
 | |
|     Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
 | |
|     Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
 | |
|     Job->JobId = JobId;
 | |
| 
 | |
| 
 | |
|     Cmd.addArgument(Job->CorpusDir);
 | |
|     Cmd.addFlag("features_dir", Job->FeaturesDir);
 | |
| 
 | |
|     for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
 | |
|       RmDirRecursive(D);
 | |
|       MkDir(D);
 | |
|     }
 | |
| 
 | |
|     Cmd.setOutputFile(Job->LogPath);
 | |
|     Cmd.combineOutAndErr();
 | |
| 
 | |
|     Job->Cmd = Cmd;
 | |
| 
 | |
|     if (Verbosity >= 2)
 | |
|       Printf("Job %zd/%p Created: %s\n", JobId, Job,
 | |
|              Job->Cmd.toString().c_str());
 | |
|     // Start from very short runs and gradually increase them.
 | |
|     return Job;
 | |
|   }
 | |
| 
 | |
|   void RunOneMergeJob(FuzzJob *Job) {
 | |
|     auto Stats = ParseFinalStatsFromLog(Job->LogPath);
 | |
|     NumRuns += Stats.number_of_executed_units;
 | |
| 
 | |
|     Vector<SizedFile> TempFiles, MergeCandidates;
 | |
|     // Read all newly created inputs and their feature sets.
 | |
|     // Choose only those inputs that have new features.
 | |
|     GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
 | |
|     std::sort(TempFiles.begin(), TempFiles.end());
 | |
|     for (auto &F : TempFiles) {
 | |
|       auto FeatureFile = F.File;
 | |
|       FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
 | |
|       auto FeatureBytes = FileToVector(FeatureFile, 0, false);
 | |
|       assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
 | |
|       Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
 | |
|       memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
 | |
|       for (auto Ft : NewFeatures) {
 | |
|         if (!Features.count(Ft)) {
 | |
|           MergeCandidates.push_back(F);
 | |
|           break;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     // if (!FilesToAdd.empty() || Job->ExitCode != 0)
 | |
|     Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
 | |
|            "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
 | |
|            NumRuns, Cov.size(), Features.size(), Files.size(),
 | |
|            Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
 | |
|            secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
 | |
| 
 | |
|     if (MergeCandidates.empty()) return;
 | |
| 
 | |
|     Vector<std::string> FilesToAdd;
 | |
|     Set<uint32_t> NewFeatures, NewCov;
 | |
|     CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
 | |
|                         &NewFeatures, Cov, &NewCov, Job->CFPath, false);
 | |
|     for (auto &Path : FilesToAdd) {
 | |
|       auto U = FileToVector(Path);
 | |
|       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
 | |
|       WriteToFile(U, NewPath);
 | |
|       Files.push_back(NewPath);
 | |
|     }
 | |
|     Features.insert(NewFeatures.begin(), NewFeatures.end());
 | |
|     Cov.insert(NewCov.begin(), NewCov.end());
 | |
|     for (auto Idx : NewCov)
 | |
|       if (auto *TE = TPC.PCTableEntryByIdx(Idx))
 | |
|         if (TPC.PcIsFuncEntry(TE))
 | |
|           PrintPC("  NEW_FUNC: %p %F %L\n", "",
 | |
|                   TPC.GetNextInstructionPc(TE->PC));
 | |
| 
 | |
|   }
 | |
| 
 | |
| 
 | |
|   void CollectDFT(const std::string &InputPath) {
 | |
|     if (DataFlowBinary.empty()) return;
 | |
|     if (!FilesWithDFT.insert(InputPath).second) return;
 | |
|     Command Cmd(Args);
 | |
|     Cmd.removeFlag("fork");
 | |
|     Cmd.removeFlag("runs");
 | |
|     Cmd.addFlag("data_flow_trace", DFTDir);
 | |
|     Cmd.addArgument(InputPath);
 | |
|     for (auto &C : CorpusDirs) // Remove all corpora from the args.
 | |
|       Cmd.removeArgument(C);
 | |
|     Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
 | |
|     Cmd.combineOutAndErr();
 | |
|     // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
 | |
|     ExecuteCommand(Cmd);
 | |
|   }
 | |
| 
 | |
| };
 | |
| 
 | |
| struct JobQueue {
 | |
|   std::queue<FuzzJob *> Qu;
 | |
|   std::mutex Mu;
 | |
|   std::condition_variable Cv;
 | |
| 
 | |
|   void Push(FuzzJob *Job) {
 | |
|     {
 | |
|       std::lock_guard<std::mutex> Lock(Mu);
 | |
|       Qu.push(Job);
 | |
|     }
 | |
|     Cv.notify_one();
 | |
|   }
 | |
|   FuzzJob *Pop() {
 | |
|     std::unique_lock<std::mutex> Lk(Mu);
 | |
|     // std::lock_guard<std::mutex> Lock(Mu);
 | |
|     Cv.wait(Lk, [&]{return !Qu.empty();});
 | |
|     assert(!Qu.empty());
 | |
|     auto Job = Qu.front();
 | |
|     Qu.pop();
 | |
|     return Job;
 | |
|   }
 | |
| };
 | |
| 
 | |
| void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
 | |
|   while (auto Job = FuzzQ->Pop()) {
 | |
|     // Printf("WorkerThread: job %p\n", Job);
 | |
|     Job->ExitCode = ExecuteCommand(Job->Cmd);
 | |
|     MergeQ->Push(Job);
 | |
|   }
 | |
| }
 | |
| 
 | |
| // This is just a skeleton of an experimental -fork=1 feature.
 | |
| void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
 | |
|                   const Vector<std::string> &Args,
 | |
|                   const Vector<std::string> &CorpusDirs, int NumJobs) {
 | |
|   Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
 | |
| 
 | |
|   GlobalEnv Env;
 | |
|   Env.Args = Args;
 | |
|   Env.CorpusDirs = CorpusDirs;
 | |
|   Env.Rand = &Rand;
 | |
|   Env.Verbosity = Options.Verbosity;
 | |
|   Env.ProcessStartTime = std::chrono::system_clock::now();
 | |
|   Env.DataFlowBinary = Options.CollectDataFlow;
 | |
| 
 | |
|   Vector<SizedFile> SeedFiles;
 | |
|   for (auto &Dir : CorpusDirs)
 | |
|     GetSizedFilesFromDir(Dir, &SeedFiles);
 | |
|   std::sort(SeedFiles.begin(), SeedFiles.end());
 | |
|   Env.TempDir = TempPath("FuzzWithFork", ".dir");
 | |
|   Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
 | |
|   RmDirRecursive(Env.TempDir);  // in case there is a leftover from old runs.
 | |
|   MkDir(Env.TempDir);
 | |
|   MkDir(Env.DFTDir);
 | |
| 
 | |
| 
 | |
|   if (CorpusDirs.empty())
 | |
|     MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
 | |
|   else
 | |
|     Env.MainCorpusDir = CorpusDirs[0];
 | |
| 
 | |
|   auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
 | |
|   CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
 | |
|                       {}, &Env.Cov,
 | |
|                       CFPath, false);
 | |
|   RemoveFile(CFPath);
 | |
|   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
 | |
|          Env.Files.size(), Env.TempDir.c_str());
 | |
| 
 | |
|   int ExitCode = 0;
 | |
| 
 | |
|   JobQueue FuzzQ, MergeQ;
 | |
| 
 | |
|   auto StopJobs = [&]() {
 | |
|     for (int i = 0; i < NumJobs; i++)
 | |
|       FuzzQ.Push(nullptr);
 | |
|     MergeQ.Push(nullptr);
 | |
|     WriteToFile(Unit({1}), Env.StopFile());
 | |
|   };
 | |
| 
 | |
|   size_t JobId = 1;
 | |
|   Vector<std::thread> Threads;
 | |
|   for (int t = 0; t < NumJobs; t++) {
 | |
|     Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
 | |
|     FuzzQ.Push(Env.CreateNewJob(JobId++));
 | |
|   }
 | |
| 
 | |
|   while (true) {
 | |
|     std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
 | |
|     if (!Job)
 | |
|       break;
 | |
|     ExitCode = Job->ExitCode;
 | |
|     if (ExitCode == Options.InterruptExitCode) {
 | |
|       Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
 | |
|       StopJobs();
 | |
|       break;
 | |
|     }
 | |
|     Fuzzer::MaybeExitGracefully();
 | |
| 
 | |
|     Env.RunOneMergeJob(Job.get());
 | |
| 
 | |
|     // Continue if our crash is one of the ignorred ones.
 | |
|     if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
 | |
|       Env.NumTimeouts++;
 | |
|     else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
 | |
|       Env.NumOOMs++;
 | |
|     else if (ExitCode != 0) {
 | |
|       Env.NumCrashes++;
 | |
|       if (Options.IgnoreCrashes) {
 | |
|         std::ifstream In(Job->LogPath);
 | |
|         std::string Line;
 | |
|         while (std::getline(In, Line, '\n'))
 | |
|           if (Line.find("ERROR:") != Line.npos ||
 | |
|               Line.find("runtime error:") != Line.npos)
 | |
|             Printf("%s\n", Line.c_str());
 | |
|       } else {
 | |
|         // And exit if we don't ignore this crash.
 | |
|         Printf("INFO: log from the inner process:\n%s",
 | |
|                FileToString(Job->LogPath).c_str());
 | |
|         StopJobs();
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // Stop if we are over the time budget.
 | |
|     // This is not precise, since other threads are still running
 | |
|     // and we will wait while joining them.
 | |
|     // We also don't stop instantly: other jobs need to finish.
 | |
|     if (Options.MaxTotalTimeSec > 0 &&
 | |
|         Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
 | |
|       Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
 | |
|              Env.secondsSinceProcessStartUp());
 | |
|       StopJobs();
 | |
|       break;
 | |
|     }
 | |
|     if (Env.NumRuns >= Options.MaxNumberOfRuns) {
 | |
|       Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
 | |
|              Env.NumRuns);
 | |
|       StopJobs();
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     FuzzQ.Push(Env.CreateNewJob(JobId++));
 | |
|   }
 | |
| 
 | |
|   for (auto &T : Threads)
 | |
|     T.join();
 | |
| 
 | |
|   // The workers have terminated. Don't try to remove the directory before they
 | |
|   // terminate to avoid a race condition preventing cleanup on Windows.
 | |
|   RmDirRecursive(Env.TempDir);
 | |
| 
 | |
|   // Use the exit code from the last child process.
 | |
|   Printf("INFO: exiting: %d time: %zds\n", ExitCode,
 | |
|          Env.secondsSinceProcessStartUp());
 | |
|   exit(ExitCode);
 | |
| }
 | |
| 
 | |
| } // namespace fuzzer
 |