Delta.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the implementation for the Delta Debugging Algorithm:
  10. // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
  11. // into chunks and tries to reduce the number chunks that are interesting.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "Delta.h"
  15. #include "ReducerWorkItem.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/Bitcode/BitcodeReader.h"
  18. #include "llvm/Bitcode/BitcodeWriter.h"
  19. #include "llvm/IR/Verifier.h"
  20. #include "llvm/Support/CommandLine.h"
  21. #include "llvm/Support/ThreadPool.h"
  22. #include "llvm/Support/ToolOutputFile.h"
  23. #include <fstream>
  24. #include <set>
  25. using namespace llvm;
  26. static cl::opt<bool> AbortOnInvalidReduction(
  27. "abort-on-invalid-reduction",
  28. cl::desc("Abort if any reduction results in invalid IR"));
  29. static cl::opt<unsigned int> StartingGranularityLevel(
  30. "starting-granularity-level",
  31. cl::desc("Number of times to divide chunks prior to first test"));
  32. static cl::opt<bool> TmpFilesAsBitcode(
  33. "write-tmp-files-as-bitcode",
  34. cl::desc("Write temporary files as bitcode, instead of textual IR"),
  35. cl::init(false));
  36. #ifdef LLVM_ENABLE_THREADS
  37. static cl::opt<unsigned> NumJobs(
  38. "j",
  39. cl::desc("Maximum number of threads to use to process chunks. Set to 1 to "
  40. "disables parallelism."),
  41. cl::init(1));
  42. #else
  43. unsigned NumJobs = 1;
  44. #endif
  45. void writeOutput(ReducerWorkItem &M, llvm::StringRef Message);
  46. bool isReduced(ReducerWorkItem &M, TestRunner &Test,
  47. SmallString<128> &CurrentFilepath) {
  48. // Write ReducerWorkItem to tmp file
  49. int FD;
  50. std::error_code EC = sys::fs::createTemporaryFile(
  51. "llvm-reduce", M.isMIR() ? "mir" : (TmpFilesAsBitcode ? "bc" : "ll"), FD,
  52. CurrentFilepath);
  53. if (EC) {
  54. errs() << "Error making unique filename: " << EC.message() << "!\n";
  55. exit(1);
  56. }
  57. if (TmpFilesAsBitcode) {
  58. llvm::raw_fd_ostream OutStream(FD, true);
  59. WriteBitcodeToFile(M, OutStream);
  60. OutStream.close();
  61. if (OutStream.has_error()) {
  62. errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
  63. sys::fs::remove(CurrentFilepath);
  64. exit(1);
  65. }
  66. bool Res = Test.run(CurrentFilepath);
  67. sys::fs::remove(CurrentFilepath);
  68. return Res;
  69. }
  70. ToolOutputFile Out(CurrentFilepath, FD);
  71. M.print(Out.os(), /*AnnotationWriter=*/nullptr);
  72. Out.os().close();
  73. if (Out.os().has_error()) {
  74. errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
  75. exit(1);
  76. }
  77. // Current Chunks aren't interesting
  78. return Test.run(CurrentFilepath);
  79. }
  80. /// Counts the amount of lines for a given file
  81. static int getLines(StringRef Filepath) {
  82. int Lines = 0;
  83. std::string CurrLine;
  84. std::ifstream FileStream{std::string(Filepath)};
  85. while (std::getline(FileStream, CurrLine))
  86. ++Lines;
  87. return Lines;
  88. }
  89. /// Splits Chunks in half and prints them.
  90. /// If unable to split (when chunk size is 1) returns false.
  91. static bool increaseGranularity(std::vector<Chunk> &Chunks) {
  92. errs() << "Increasing granularity...";
  93. std::vector<Chunk> NewChunks;
  94. bool SplitOne = false;
  95. for (auto &C : Chunks) {
  96. if (C.End - C.Begin == 0)
  97. NewChunks.push_back(C);
  98. else {
  99. int Half = (C.Begin + C.End) / 2;
  100. NewChunks.push_back({C.Begin, Half});
  101. NewChunks.push_back({Half + 1, C.End});
  102. SplitOne = true;
  103. }
  104. }
  105. if (SplitOne) {
  106. Chunks = NewChunks;
  107. errs() << "Success! New Chunks:\n";
  108. for (auto C : Chunks) {
  109. errs() << '\t';
  110. C.print();
  111. errs() << '\n';
  112. }
  113. }
  114. return SplitOne;
  115. }
  116. // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the
  117. // modified module if the chunk resulted in a reduction.
  118. template <typename T>
  119. static std::unique_ptr<ReducerWorkItem>
  120. CheckChunk(Chunk &ChunkToCheckForUninterestingness,
  121. std::unique_ptr<ReducerWorkItem> Clone, TestRunner &Test,
  122. function_ref<void(Oracle &, T &)> ExtractChunksFromModule,
  123. std::set<Chunk> &UninterestingChunks,
  124. std::vector<Chunk> &ChunksStillConsideredInteresting) {
  125. // Take all of ChunksStillConsideredInteresting chunks, except those we've
  126. // already deemed uninteresting (UninterestingChunks) but didn't remove
  127. // from ChunksStillConsideredInteresting yet, and additionally ignore
  128. // ChunkToCheckForUninterestingness chunk.
  129. std::vector<Chunk> CurrentChunks;
  130. CurrentChunks.reserve(ChunksStillConsideredInteresting.size() -
  131. UninterestingChunks.size() - 1);
  132. copy_if(ChunksStillConsideredInteresting, std::back_inserter(CurrentChunks),
  133. [&](const Chunk &C) {
  134. return !UninterestingChunks.count(C) &&
  135. C != ChunkToCheckForUninterestingness;
  136. });
  137. // Generate Module with only Targets inside Current Chunks
  138. Oracle O(CurrentChunks);
  139. ExtractChunksFromModule(O, *Clone);
  140. // Some reductions may result in invalid IR. Skip such reductions.
  141. if (verifyReducerWorkItem(*Clone, &errs())) {
  142. if (AbortOnInvalidReduction) {
  143. errs() << "Invalid reduction\n";
  144. exit(1);
  145. }
  146. errs() << " **** WARNING | reduction resulted in invalid module, "
  147. "skipping\n";
  148. return nullptr;
  149. }
  150. errs() << "Ignoring: ";
  151. ChunkToCheckForUninterestingness.print();
  152. for (const Chunk &C : UninterestingChunks)
  153. C.print();
  154. SmallString<128> CurrentFilepath;
  155. if (!isReduced(*Clone, Test, CurrentFilepath)) {
  156. // Program became non-reduced, so this chunk appears to be interesting.
  157. errs() << "\n";
  158. return nullptr;
  159. }
  160. return Clone;
  161. }
  162. template <typename T>
  163. SmallString<0> ProcessChunkFromSerializedBitcode(
  164. Chunk &ChunkToCheckForUninterestingness, TestRunner &Test,
  165. function_ref<void(Oracle &, T &)> ExtractChunksFromModule,
  166. std::set<Chunk> &UninterestingChunks,
  167. std::vector<Chunk> &ChunksStillConsideredInteresting,
  168. SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced) {
  169. LLVMContext Ctx;
  170. Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
  171. MemoryBufferRef(StringRef(OriginalBC.data(), OriginalBC.size()),
  172. "<llvm-reduce tmp module>"),
  173. Ctx);
  174. if (!MOrErr)
  175. report_fatal_error("Failed to read bitcode");
  176. auto CloneMMM = std::make_unique<ReducerWorkItem>();
  177. CloneMMM->M = std::move(MOrErr.get());
  178. SmallString<0> Result;
  179. if (std::unique_ptr<ReducerWorkItem> ChunkResult =
  180. CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM),
  181. Test, ExtractChunksFromModule, UninterestingChunks,
  182. ChunksStillConsideredInteresting)) {
  183. raw_svector_ostream BCOS(Result);
  184. WriteBitcodeToFile(*ChunkResult->M, BCOS);
  185. // Communicate that the task reduced a chunk.
  186. AnyReduced = true;
  187. }
  188. return Result;
  189. }
  190. /// Runs the Delta Debugging algorithm, splits the code into chunks and
  191. /// reduces the amount of chunks that are considered interesting by the
  192. /// given test. The number of chunks is determined by a preliminary run of the
  193. /// reduction pass where no change must be made to the module.
  194. template <typename T>
  195. void runDeltaPassInt(
  196. TestRunner &Test,
  197. function_ref<void(Oracle &, T &)> ExtractChunksFromModule) {
  198. assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
  199. "input module is broken before making changes");
  200. SmallString<128> CurrentFilepath;
  201. if (!isReduced(Test.getProgram(), Test, CurrentFilepath)) {
  202. errs() << "\nInput isn't interesting! Verify interesting-ness test\n";
  203. exit(1);
  204. }
  205. int Targets;
  206. {
  207. // Count the number of chunks by counting the number of calls to
  208. // Oracle::shouldKeep() but always returning true so no changes are
  209. // made.
  210. std::vector<Chunk> AllChunks = {{0, INT_MAX}};
  211. Oracle Counter(AllChunks);
  212. ExtractChunksFromModule(Counter, Test.getProgram());
  213. Targets = Counter.count();
  214. assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
  215. "input module is broken after counting chunks");
  216. assert(isReduced(Test.getProgram(), Test, CurrentFilepath) &&
  217. "input module no longer interesting after counting chunks");
  218. #ifndef NDEBUG
  219. // Make sure that the number of chunks does not change as we reduce.
  220. std::vector<Chunk> NoChunks;
  221. Oracle NoChunksCounter(NoChunks);
  222. std::unique_ptr<ReducerWorkItem> Clone =
  223. cloneReducerWorkItem(Test.getProgram());
  224. ExtractChunksFromModule(NoChunksCounter, *Clone);
  225. assert(Targets == NoChunksCounter.count() &&
  226. "number of chunks changes when reducing");
  227. #endif
  228. }
  229. if (!Targets) {
  230. errs() << "\nNothing to reduce\n";
  231. return;
  232. }
  233. std::vector<Chunk> ChunksStillConsideredInteresting = {{0, Targets - 1}};
  234. std::unique_ptr<ReducerWorkItem> ReducedProgram;
  235. for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) {
  236. increaseGranularity(ChunksStillConsideredInteresting);
  237. }
  238. std::atomic<bool> AnyReduced;
  239. std::unique_ptr<ThreadPool> ChunkThreadPoolPtr;
  240. if (NumJobs > 1)
  241. ChunkThreadPoolPtr =
  242. std::make_unique<ThreadPool>(hardware_concurrency(NumJobs));
  243. bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity;
  244. do {
  245. FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false;
  246. std::set<Chunk> UninterestingChunks;
  247. // When running with more than one thread, serialize the original bitcode
  248. // to OriginalBC.
  249. SmallString<0> OriginalBC;
  250. if (NumJobs > 1) {
  251. raw_svector_ostream BCOS(OriginalBC);
  252. WriteBitcodeToFile(*Test.getProgram().M, BCOS);
  253. }
  254. std::deque<std::shared_future<SmallString<0>>> TaskQueue;
  255. for (auto I = ChunksStillConsideredInteresting.rbegin(),
  256. E = ChunksStillConsideredInteresting.rend();
  257. I != E; ++I) {
  258. std::unique_ptr<ReducerWorkItem> Result = nullptr;
  259. unsigned WorkLeft = std::distance(I, E);
  260. // Run in parallel mode, if the user requested more than one thread and
  261. // there are at least a few chunks to process.
  262. if (NumJobs > 1 && WorkLeft > 1) {
  263. unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs));
  264. unsigned NumChunksProcessed = 0;
  265. ThreadPool &ChunkThreadPool = *ChunkThreadPoolPtr;
  266. TaskQueue.clear();
  267. AnyReduced = false;
  268. // Queue jobs to process NumInitialTasks chunks in parallel using
  269. // ChunkThreadPool. When the tasks are added to the pool, parse the
  270. // original module from OriginalBC with a fresh LLVMContext object. This
  271. // ensures that the cloned module of each task uses an independent
  272. // LLVMContext object. If a task reduces the input, serialize the result
  273. // back in the corresponding Result element.
  274. for (unsigned J = 0; J < NumInitialTasks; ++J) {
  275. TaskQueue.emplace_back(ChunkThreadPool.async(
  276. [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks,
  277. &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() {
  278. return ProcessChunkFromSerializedBitcode(
  279. *(I + J), Test, ExtractChunksFromModule,
  280. UninterestingChunks, ChunksStillConsideredInteresting,
  281. OriginalBC, AnyReduced);
  282. }));
  283. }
  284. // Start processing results of the queued tasks. We wait for the first
  285. // task in the queue to finish. If it reduced a chunk, we parse the
  286. // result and exit the loop.
  287. // Otherwise we will try to schedule a new task, if
  288. // * no other pending job reduced a chunk and
  289. // * we have not reached the end of the chunk.
  290. while (!TaskQueue.empty()) {
  291. auto &Future = TaskQueue.front();
  292. Future.wait();
  293. NumChunksProcessed++;
  294. SmallString<0> Res = Future.get();
  295. TaskQueue.pop_front();
  296. if (Res.empty()) {
  297. unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size();
  298. if (!AnyReduced && I + NumScheduledTasks != E) {
  299. Chunk &ChunkToCheck = *(I + NumScheduledTasks);
  300. TaskQueue.emplace_back(ChunkThreadPool.async(
  301. [&Test, &ExtractChunksFromModule, &UninterestingChunks,
  302. &ChunksStillConsideredInteresting, &OriginalBC,
  303. &ChunkToCheck, &AnyReduced]() {
  304. return ProcessChunkFromSerializedBitcode(
  305. ChunkToCheck, Test, ExtractChunksFromModule,
  306. UninterestingChunks, ChunksStillConsideredInteresting,
  307. OriginalBC, AnyReduced);
  308. }));
  309. }
  310. continue;
  311. }
  312. Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
  313. MemoryBufferRef(StringRef(Res.data(), Res.size()),
  314. "<llvm-reduce tmp module>"),
  315. Test.getProgram().M->getContext());
  316. if (!MOrErr)
  317. report_fatal_error("Failed to read bitcode");
  318. Result = std::make_unique<ReducerWorkItem>();
  319. Result->M = std::move(MOrErr.get());
  320. break;
  321. }
  322. // Forward I to the last chunk processed in parallel.
  323. I += NumChunksProcessed - 1;
  324. } else {
  325. Result = CheckChunk(*I, cloneReducerWorkItem(Test.getProgram()), Test,
  326. ExtractChunksFromModule, UninterestingChunks,
  327. ChunksStillConsideredInteresting);
  328. }
  329. if (!Result)
  330. continue;
  331. Chunk &ChunkToCheckForUninterestingness = *I;
  332. FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true;
  333. UninterestingChunks.insert(ChunkToCheckForUninterestingness);
  334. ReducedProgram = std::move(Result);
  335. errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n";
  336. writeOutput(*ReducedProgram, "Saved new best reduction to ");
  337. }
  338. // Delete uninteresting chunks
  339. erase_if(ChunksStillConsideredInteresting,
  340. [&UninterestingChunks](const Chunk &C) {
  341. return UninterestingChunks.count(C);
  342. });
  343. } while (!ChunksStillConsideredInteresting.empty() &&
  344. (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity ||
  345. increaseGranularity(ChunksStillConsideredInteresting)));
  346. // If we reduced the testcase replace it
  347. if (ReducedProgram)
  348. Test.setProgram(std::move(ReducedProgram));
  349. errs() << "Couldn't increase anymore.\n";
  350. }
  351. void llvm::runDeltaPass(
  352. TestRunner &Test,
  353. function_ref<void(Oracle &, Module &)> ExtractChunksFromModule) {
  354. runDeltaPassInt<Module>(Test, ExtractChunksFromModule);
  355. }
  356. void llvm::runDeltaPass(
  357. TestRunner &Test,
  358. function_ref<void(Oracle &, MachineFunction &)> ExtractChunksFromModule) {
  359. runDeltaPassInt<MachineFunction>(Test, ExtractChunksFromModule);
  360. }