FuzzerFork.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Spawn and orchestrate separate fuzzing processes.
  9. //===----------------------------------------------------------------------===//
  10. #include "FuzzerCommand.h"
  11. #include "FuzzerFork.h"
  12. #include "FuzzerIO.h"
  13. #include "FuzzerInternal.h"
  14. #include "FuzzerMerge.h"
  15. #include "FuzzerSHA1.h"
  16. #include "FuzzerTracePC.h"
  17. #include "FuzzerUtil.h"
  18. #include <atomic>
  19. #include <chrono>
  20. #include <condition_variable>
  21. #include <fstream>
  22. #include <memory>
  23. #include <mutex>
  24. #include <queue>
  25. #include <sstream>
  26. #include <thread>
  27. namespace fuzzer {
  28. struct Stats {
  29. size_t number_of_executed_units = 0;
  30. size_t peak_rss_mb = 0;
  31. size_t average_exec_per_sec = 0;
  32. };
  33. static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
  34. std::ifstream In(LogPath);
  35. std::string Line;
  36. Stats Res;
  37. struct {
  38. const char *Name;
  39. size_t *Var;
  40. } NameVarPairs[] = {
  41. {"stat::number_of_executed_units:", &Res.number_of_executed_units},
  42. {"stat::peak_rss_mb:", &Res.peak_rss_mb},
  43. {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
  44. {nullptr, nullptr},
  45. };
  46. while (std::getline(In, Line, '\n')) {
  47. if (Line.find("stat::") != 0) continue;
  48. std::istringstream ISS(Line);
  49. std::string Name;
  50. size_t Val;
  51. ISS >> Name >> Val;
  52. for (size_t i = 0; NameVarPairs[i].Name; i++)
  53. if (Name == NameVarPairs[i].Name)
  54. *NameVarPairs[i].Var = Val;
  55. }
  56. return Res;
  57. }
  58. struct FuzzJob {
  59. // Inputs.
  60. Command Cmd;
  61. std::string CorpusDir;
  62. std::string FeaturesDir;
  63. std::string LogPath;
  64. std::string SeedListPath;
  65. std::string CFPath;
  66. size_t JobId;
  67. int DftTimeInSeconds = 0;
  68. // Fuzzing Outputs.
  69. int ExitCode;
  70. ~FuzzJob() {
  71. RemoveFile(CFPath);
  72. RemoveFile(LogPath);
  73. RemoveFile(SeedListPath);
  74. RmDirRecursive(CorpusDir);
  75. RmDirRecursive(FeaturesDir);
  76. }
  77. };
  78. struct GlobalEnv {
  79. std::vector<std::string> Args;
  80. std::vector<std::string> CorpusDirs;
  81. std::string MainCorpusDir;
  82. std::string TempDir;
  83. std::string DFTDir;
  84. std::string DataFlowBinary;
  85. std::set<uint32_t> Features, Cov;
  86. std::set<std::string> FilesWithDFT;
  87. std::vector<std::string> Files;
  88. std::vector<std::size_t> FilesSizes;
  89. Random *Rand;
  90. std::chrono::system_clock::time_point ProcessStartTime;
  91. int Verbosity = 0;
  92. int Group = 0;
  93. int NumCorpuses = 8;
  94. size_t NumTimeouts = 0;
  95. size_t NumOOMs = 0;
  96. size_t NumCrashes = 0;
  97. size_t NumRuns = 0;
  98. std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
  99. size_t secondsSinceProcessStartUp() const {
  100. return std::chrono::duration_cast<std::chrono::seconds>(
  101. std::chrono::system_clock::now() - ProcessStartTime)
  102. .count();
  103. }
  104. FuzzJob *CreateNewJob(size_t JobId) {
  105. Command Cmd(Args);
  106. Cmd.removeFlag("fork");
  107. Cmd.removeFlag("runs");
  108. Cmd.removeFlag("collect_data_flow");
  109. for (auto &C : CorpusDirs) // Remove all corpora from the args.
  110. Cmd.removeArgument(C);
  111. Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload.
  112. Cmd.addFlag("print_final_stats", "1");
  113. Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing.
  114. Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
  115. Cmd.addFlag("stop_file", StopFile());
  116. if (!DataFlowBinary.empty()) {
  117. Cmd.addFlag("data_flow_trace", DFTDir);
  118. if (!Cmd.hasFlag("focus_function"))
  119. Cmd.addFlag("focus_function", "auto");
  120. }
  121. auto Job = new FuzzJob;
  122. std::string Seeds;
  123. if (size_t CorpusSubsetSize =
  124. std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
  125. auto Time1 = std::chrono::system_clock::now();
  126. if (Group) { // whether to group the corpus.
  127. size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
  128. size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
  129. for (size_t i = 0; i < CorpusSubsetSize; i++) {
  130. size_t RandNum = (*Rand)(AverageCorpusSize);
  131. size_t Index = RandNum + StartIndex;
  132. Index = Index < Files.size() ? Index
  133. : Rand->SkewTowardsLast(Files.size());
  134. auto &SF = Files[Index];
  135. Seeds += (Seeds.empty() ? "" : ",") + SF;
  136. CollectDFT(SF);
  137. }
  138. } else {
  139. for (size_t i = 0; i < CorpusSubsetSize; i++) {
  140. auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
  141. Seeds += (Seeds.empty() ? "" : ",") + SF;
  142. CollectDFT(SF);
  143. }
  144. }
  145. auto Time2 = std::chrono::system_clock::now();
  146. auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
  147. assert(DftTimeInSeconds < std::numeric_limits<int>::max());
  148. Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds);
  149. }
  150. if (!Seeds.empty()) {
  151. Job->SeedListPath =
  152. DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
  153. WriteToFile(Seeds, Job->SeedListPath);
  154. Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
  155. }
  156. Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
  157. Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
  158. Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
  159. Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
  160. Job->JobId = JobId;
  161. Cmd.addArgument(Job->CorpusDir);
  162. Cmd.addFlag("features_dir", Job->FeaturesDir);
  163. for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
  164. RmDirRecursive(D);
  165. MkDir(D);
  166. }
  167. Cmd.setOutputFile(Job->LogPath);
  168. Cmd.combineOutAndErr();
  169. Job->Cmd = Cmd;
  170. if (Verbosity >= 2)
  171. Printf("Job %zd/%p Created: %s\n", JobId, Job,
  172. Job->Cmd.toString().c_str());
  173. // Start from very short runs and gradually increase them.
  174. return Job;
  175. }
  176. void RunOneMergeJob(FuzzJob *Job) {
  177. auto Stats = ParseFinalStatsFromLog(Job->LogPath);
  178. NumRuns += Stats.number_of_executed_units;
  179. std::vector<SizedFile> TempFiles, MergeCandidates;
  180. // Read all newly created inputs and their feature sets.
  181. // Choose only those inputs that have new features.
  182. GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
  183. std::sort(TempFiles.begin(), TempFiles.end());
  184. for (auto &F : TempFiles) {
  185. auto FeatureFile = F.File;
  186. FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
  187. auto FeatureBytes = FileToVector(FeatureFile, 0, false);
  188. assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
  189. std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
  190. memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
  191. for (auto Ft : NewFeatures) {
  192. if (!Features.count(Ft)) {
  193. MergeCandidates.push_back(F);
  194. break;
  195. }
  196. }
  197. }
  198. // if (!FilesToAdd.empty() || Job->ExitCode != 0)
  199. Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
  200. "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
  201. NumRuns, Cov.size(), Features.size(), Files.size(),
  202. Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
  203. secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
  204. if (MergeCandidates.empty()) return;
  205. std::vector<std::string> FilesToAdd;
  206. std::set<uint32_t> NewFeatures, NewCov;
  207. bool IsSetCoverMerge =
  208. !Job->Cmd.getFlagValue("set_cover_merge").compare("1");
  209. CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
  210. &NewFeatures, Cov, &NewCov, Job->CFPath, false,
  211. IsSetCoverMerge);
  212. for (auto &Path : FilesToAdd) {
  213. auto U = FileToVector(Path);
  214. auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
  215. WriteToFile(U, NewPath);
  216. if (Group) { // Insert the queue according to the size of the seed.
  217. size_t UnitSize = U.size();
  218. auto Idx =
  219. std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
  220. FilesSizes.begin();
  221. FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
  222. Files.insert(Files.begin() + Idx, NewPath);
  223. } else {
  224. Files.push_back(NewPath);
  225. }
  226. }
  227. Features.insert(NewFeatures.begin(), NewFeatures.end());
  228. Cov.insert(NewCov.begin(), NewCov.end());
  229. for (auto Idx : NewCov)
  230. if (auto *TE = TPC.PCTableEntryByIdx(Idx))
  231. if (TPC.PcIsFuncEntry(TE))
  232. PrintPC(" NEW_FUNC: %p %F %L\n", "",
  233. TPC.GetNextInstructionPc(TE->PC));
  234. }
  235. void CollectDFT(const std::string &InputPath) {
  236. if (DataFlowBinary.empty()) return;
  237. if (!FilesWithDFT.insert(InputPath).second) return;
  238. Command Cmd(Args);
  239. Cmd.removeFlag("fork");
  240. Cmd.removeFlag("runs");
  241. Cmd.addFlag("data_flow_trace", DFTDir);
  242. Cmd.addArgument(InputPath);
  243. for (auto &C : CorpusDirs) // Remove all corpora from the args.
  244. Cmd.removeArgument(C);
  245. Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
  246. Cmd.combineOutAndErr();
  247. // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
  248. ExecuteCommand(Cmd);
  249. }
  250. };
  251. struct JobQueue {
  252. std::queue<FuzzJob *> Qu;
  253. std::mutex Mu;
  254. std::condition_variable Cv;
  255. void Push(FuzzJob *Job) {
  256. {
  257. std::lock_guard<std::mutex> Lock(Mu);
  258. Qu.push(Job);
  259. }
  260. Cv.notify_one();
  261. }
  262. FuzzJob *Pop() {
  263. std::unique_lock<std::mutex> Lk(Mu);
  264. // std::lock_guard<std::mutex> Lock(Mu);
  265. Cv.wait(Lk, [&]{return !Qu.empty();});
  266. assert(!Qu.empty());
  267. auto Job = Qu.front();
  268. Qu.pop();
  269. return Job;
  270. }
  271. };
  272. void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
  273. while (auto Job = FuzzQ->Pop()) {
  274. // Printf("WorkerThread: job %p\n", Job);
  275. Job->ExitCode = ExecuteCommand(Job->Cmd);
  276. MergeQ->Push(Job);
  277. }
  278. }
  279. // This is just a skeleton of an experimental -fork=1 feature.
  280. void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
  281. const std::vector<std::string> &Args,
  282. const std::vector<std::string> &CorpusDirs, int NumJobs) {
  283. Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
  284. GlobalEnv Env;
  285. Env.Args = Args;
  286. Env.CorpusDirs = CorpusDirs;
  287. Env.Rand = &Rand;
  288. Env.Verbosity = Options.Verbosity;
  289. Env.ProcessStartTime = std::chrono::system_clock::now();
  290. Env.DataFlowBinary = Options.CollectDataFlow;
  291. Env.Group = Options.ForkCorpusGroups;
  292. std::vector<SizedFile> SeedFiles;
  293. for (auto &Dir : CorpusDirs)
  294. GetSizedFilesFromDir(Dir, &SeedFiles);
  295. std::sort(SeedFiles.begin(), SeedFiles.end());
  296. Env.TempDir = TempPath("FuzzWithFork", ".dir");
  297. Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
  298. RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs.
  299. MkDir(Env.TempDir);
  300. MkDir(Env.DFTDir);
  301. if (CorpusDirs.empty())
  302. MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
  303. else
  304. Env.MainCorpusDir = CorpusDirs[0];
  305. if (Options.KeepSeed) {
  306. for (auto &File : SeedFiles)
  307. Env.Files.push_back(File.File);
  308. } else {
  309. auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
  310. std::set<uint32_t> NewFeatures, NewCov;
  311. CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
  312. &NewFeatures, Env.Cov, &NewCov, CFPath,
  313. /*Verbose=*/false, /*IsSetCoverMerge=*/false);
  314. Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
  315. Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
  316. RemoveFile(CFPath);
  317. }
  318. if (Env.Group) {
  319. for (auto &path : Env.Files)
  320. Env.FilesSizes.push_back(FileSize(path));
  321. }
  322. Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
  323. Env.Files.size(), Env.TempDir.c_str());
  324. int ExitCode = 0;
  325. JobQueue FuzzQ, MergeQ;
  326. auto StopJobs = [&]() {
  327. for (int i = 0; i < NumJobs; i++)
  328. FuzzQ.Push(nullptr);
  329. MergeQ.Push(nullptr);
  330. WriteToFile(Unit({1}), Env.StopFile());
  331. };
  332. size_t MergeCycle = 20;
  333. size_t JobExecuted = 0;
  334. size_t JobId = 1;
  335. std::vector<std::thread> Threads;
  336. for (int t = 0; t < NumJobs; t++) {
  337. Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
  338. FuzzQ.Push(Env.CreateNewJob(JobId++));
  339. }
  340. while (true) {
  341. std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
  342. if (!Job)
  343. break;
  344. ExitCode = Job->ExitCode;
  345. if (ExitCode == Options.InterruptExitCode) {
  346. Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
  347. StopJobs();
  348. break;
  349. }
  350. Fuzzer::MaybeExitGracefully();
  351. Env.RunOneMergeJob(Job.get());
  352. // merge the corpus .
  353. JobExecuted++;
  354. if (Env.Group && JobExecuted >= MergeCycle) {
  355. std::vector<SizedFile> CurrentSeedFiles;
  356. for (auto &Dir : CorpusDirs)
  357. GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
  358. std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
  359. auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
  360. std::set<uint32_t> TmpNewFeatures, TmpNewCov;
  361. std::set<uint32_t> TmpFeatures, TmpCov;
  362. Env.Files.clear();
  363. Env.FilesSizes.clear();
  364. CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
  365. TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
  366. CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false);
  367. for (auto &path : Env.Files)
  368. Env.FilesSizes.push_back(FileSize(path));
  369. RemoveFile(CFPath);
  370. JobExecuted = 0;
  371. MergeCycle += 5;
  372. }
  373. // Since the number of corpus seeds will gradually increase, in order to
  374. // control the number in each group to be about three times the number of
  375. // seeds selected each time, the number of groups is dynamically adjusted.
  376. if (Env.Files.size() < 2000)
  377. Env.NumCorpuses = 12;
  378. else if (Env.Files.size() < 6000)
  379. Env.NumCorpuses = 20;
  380. else if (Env.Files.size() < 12000)
  381. Env.NumCorpuses = 32;
  382. else if (Env.Files.size() < 16000)
  383. Env.NumCorpuses = 40;
  384. else if (Env.Files.size() < 24000)
  385. Env.NumCorpuses = 60;
  386. else
  387. Env.NumCorpuses = 80;
  388. // Continue if our crash is one of the ignored ones.
  389. if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
  390. Env.NumTimeouts++;
  391. else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
  392. Env.NumOOMs++;
  393. else if (ExitCode != 0) {
  394. Env.NumCrashes++;
  395. if (Options.IgnoreCrashes) {
  396. std::ifstream In(Job->LogPath);
  397. std::string Line;
  398. while (std::getline(In, Line, '\n'))
  399. if (Line.find("ERROR:") != Line.npos ||
  400. Line.find("runtime error:") != Line.npos)
  401. Printf("%s\n", Line.c_str());
  402. } else {
  403. // And exit if we don't ignore this crash.
  404. Printf("INFO: log from the inner process:\n%s",
  405. FileToString(Job->LogPath).c_str());
  406. StopJobs();
  407. break;
  408. }
  409. }
  410. // Stop if we are over the time budget.
  411. // This is not precise, since other threads are still running
  412. // and we will wait while joining them.
  413. // We also don't stop instantly: other jobs need to finish.
  414. if (Options.MaxTotalTimeSec > 0 &&
  415. Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
  416. Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
  417. Env.secondsSinceProcessStartUp());
  418. StopJobs();
  419. break;
  420. }
  421. if (Env.NumRuns >= Options.MaxNumberOfRuns) {
  422. Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
  423. Env.NumRuns);
  424. StopJobs();
  425. break;
  426. }
  427. FuzzQ.Push(Env.CreateNewJob(JobId++));
  428. }
  429. for (auto &T : Threads)
  430. T.join();
  431. // The workers have terminated. Don't try to remove the directory before they
  432. // terminate to avoid a race condition preventing cleanup on Windows.
  433. RmDirRecursive(Env.TempDir);
  434. // Use the exit code from the last child process.
  435. Printf("INFO: exiting: %d time: %zds\n", ExitCode,
  436. Env.secondsSinceProcessStartUp());
  437. exit(ExitCode);
  438. }
  439. } // namespace fuzzer