FuzzerMerge.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Merging Corpora.
  9. //
  10. // The task:
  11. // Take the existing corpus (possibly empty) and merge new inputs into
  12. // it so that only inputs with new coverage ('features') are added.
  13. // The process should tolerate the crashes, OOMs, leaks, etc.
  14. //
  15. // Algorithm:
  16. // The outer process collects the set of files and writes their names
  17. // into a temporary "control" file, then repeatedly launches the inner
  18. // process until all inputs are processed.
  19. // The outer process does not actually execute the target code.
  20. //
  21. // The inner process reads the control file and sees a) list of all the inputs
  22. // and b) the last processed input. Then it starts processing the inputs one
  23. // by one. Before processing every input it writes one line to control file:
  24. // STARTED INPUT_ID INPUT_SIZE
  25. // After processing an input it writes the following lines:
  26. // FT INPUT_ID Feature1 Feature2 Feature3 ...
  27. // COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
  28. // If a crash happens while processing an input the last line in the control
  29. // file will be "STARTED INPUT_ID" and so the next process will know
  30. // where to resume.
  31. //
  32. // Once all inputs are processed by the inner process(es) the outer process
  33. // reads the control files and does the merge based entirely on the contents
  34. // of control file.
  35. // It uses a single pass greedy algorithm choosing first the smallest inputs
  36. // within the same size the inputs that have more new features.
  37. //
  38. //===----------------------------------------------------------------------===//
  39. #ifndef LLVM_FUZZER_MERGE_H
  40. #define LLVM_FUZZER_MERGE_H
  41. #include "FuzzerDefs.h"
  42. #include "FuzzerIO.h"
  43. #include <istream>
  44. #include <ostream>
  45. #include <set>
  46. #include <vector>
  47. namespace fuzzer {
  48. struct MergeFileInfo {
  49. std::string Name;
  50. size_t Size = 0;
  51. std::vector<uint32_t> Features, Cov;
  52. };
  53. struct Merger {
  54. std::vector<MergeFileInfo> Files;
  55. size_t NumFilesInFirstCorpus = 0;
  56. size_t FirstNotProcessedFile = 0;
  57. std::string LastFailure;
  58. bool Parse(std::istream &IS, bool ParseCoverage);
  59. bool Parse(const std::string &Str, bool ParseCoverage);
  60. void ParseOrExit(std::istream &IS, bool ParseCoverage);
  61. size_t Merge(const std::set<uint32_t> &InitialFeatures,
  62. std::set<uint32_t> *NewFeatures,
  63. const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
  64. std::vector<std::string> *NewFiles);
  65. size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
  66. std::set<uint32_t> *NewFeatures,
  67. const std::set<uint32_t> &InitialCov,
  68. std::set<uint32_t> *NewCov,
  69. std::vector<std::string> *NewFiles);
  70. size_t ApproximateMemoryConsumption() const;
  71. std::set<uint32_t> AllFeatures() const;
  72. };
  73. void CrashResistantMerge(const std::vector<std::string> &Args,
  74. const std::vector<SizedFile> &OldCorpus,
  75. const std::vector<SizedFile> &NewCorpus,
  76. std::vector<std::string> *NewFiles,
  77. const std::set<uint32_t> &InitialFeatures,
  78. std::set<uint32_t> *NewFeatures,
  79. const std::set<uint32_t> &InitialCov,
  80. std::set<uint32_t> *NewCov, const std::string &CFPath,
  81. bool Verbose, bool IsSetCoverMerge);
  82. } // namespace fuzzer
  83. #endif // LLVM_FUZZER_MERGE_H