split-file.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. //===- split-file.cpp - Input splitting utility ---------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
  10. // the specified part.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ADT/DenseMap.h"
  14. #include "llvm/ADT/StringExtras.h"
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/Support/CommandLine.h"
  17. #include "llvm/Support/FileOutputBuffer.h"
  18. #include "llvm/Support/FileSystem.h"
  19. #include "llvm/Support/LineIterator.h"
  20. #include "llvm/Support/MemoryBuffer.h"
  21. #include "llvm/Support/Path.h"
  22. #include "llvm/Support/ToolOutputFile.h"
  23. #include "llvm/Support/WithColor.h"
  24. #include <string>
  25. #include <system_error>
  26. using namespace llvm;
  27. static cl::OptionCategory cat("split-file Options");
  28. static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
  29. cl::cat(cat));
  30. static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
  31. cl::value_desc("directory"), cl::cat(cat));
  32. static cl::opt<bool> leadingLines("leading-lines",
  33. cl::desc("Preserve line numbers"),
  34. cl::cat(cat));
  35. static cl::opt<bool> noLeadingLines("no-leading-lines",
  36. cl::desc("Don't preserve line numbers (default)"),
  37. cl::cat(cat));
  38. static StringRef toolName;
  39. static int errorCount;
  40. [[noreturn]] static void fatal(StringRef filename, const Twine &message) {
  41. if (filename.empty())
  42. WithColor::error(errs(), toolName) << message << '\n';
  43. else
  44. WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
  45. exit(1);
  46. }
  47. static void error(StringRef filename, int64_t line, const Twine &message) {
  48. ++errorCount;
  49. errs() << filename << ':' << line << ": ";
  50. WithColor::error(errs()) << message << '\n';
  51. }
  52. namespace {
  53. struct Part {
  54. const char *begin = nullptr;
  55. const char *end = nullptr;
  56. int64_t leadingLines = 0;
  57. };
  58. } // namespace
  59. static int handle(MemoryBuffer &inputBuf, StringRef input) {
  60. DenseMap<StringRef, Part> partToBegin;
  61. StringRef lastPart, separator;
  62. StringRef EOL = inputBuf.getBuffer().detectEOL();
  63. for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
  64. const int64_t lineNo = i.line_number();
  65. const StringRef line = *i++;
  66. const size_t markerLen = line.startswith("//") ? 6 : 5;
  67. if (!(line.size() >= markerLen &&
  68. line.substr(markerLen - 4).startswith("--- ")))
  69. continue;
  70. separator = line.substr(0, markerLen);
  71. const StringRef partName = line.substr(markerLen);
  72. if (partName.empty()) {
  73. error(input, lineNo, "empty part name");
  74. continue;
  75. }
  76. if (isSpace(partName.front()) || isSpace(partName.back())) {
  77. error(input, lineNo, "part name cannot have leading or trailing space");
  78. continue;
  79. }
  80. auto res = partToBegin.try_emplace(partName);
  81. if (!res.second) {
  82. error(input, lineNo,
  83. "'" + separator + partName + "' occurs more than once");
  84. continue;
  85. }
  86. if (!lastPart.empty())
  87. partToBegin[lastPart].end = line.data();
  88. Part &cur = res.first->second;
  89. if (!i.is_at_eof())
  90. cur.begin = i->data();
  91. // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
  92. // that the extracted part preserves line numbers.
  93. cur.leadingLines = leadingLines ? i.line_number() - 1 : 0;
  94. lastPart = partName;
  95. }
  96. if (lastPart.empty())
  97. fatal(input, "no part separator was found");
  98. if (errorCount)
  99. return 1;
  100. partToBegin[lastPart].end = inputBuf.getBufferEnd();
  101. std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
  102. SmallString<256> partPath;
  103. for (auto &keyValue : partToBegin) {
  104. partPath.clear();
  105. sys::path::append(partPath, output, keyValue.first);
  106. std::error_code ec =
  107. sys::fs::create_directories(sys::path::parent_path(partPath));
  108. if (ec)
  109. fatal(input, ec.message());
  110. auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
  111. llvm::sys::fs::OF_None);
  112. if (!f)
  113. fatal(input, ec.message());
  114. Part &part = keyValue.second;
  115. for (int64_t i = 0; i != part.leadingLines; ++i)
  116. (*f).os() << EOL;
  117. if (part.begin)
  118. (*f).os().write(part.begin, part.end - part.begin);
  119. outputFiles.push_back(std::move(f));
  120. }
  121. for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
  122. outputFile->keep();
  123. return 0;
  124. }
  125. int main(int argc, const char **argv) {
  126. toolName = sys::path::stem(argv[0]);
  127. cl::HideUnrelatedOptions({&cat});
  128. cl::ParseCommandLineOptions(
  129. argc, argv,
  130. "Split input into multiple parts separated by regex '^(.|//)--- ' and "
  131. "extract the part specified by '^(.|//)--- <part>'\n",
  132. nullptr,
  133. /*EnvVar=*/nullptr,
  134. /*LongOptionsUseDoubleDash=*/true);
  135. if (input.empty())
  136. fatal("", "input filename is not specified");
  137. if (output.empty())
  138. fatal("", "output directory is not specified");
  139. ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
  140. MemoryBuffer::getFileOrSTDIN(input);
  141. if (std::error_code ec = bufferOrErr.getError())
  142. fatal(input, ec.message());
  143. // Delete output if it is a file or an empty directory, so that we can create
  144. // a directory.
  145. sys::fs::file_status status;
  146. if (std::error_code ec = sys::fs::status(output, status))
  147. if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
  148. fatal(output, ec.message());
  149. if (status.type() != sys::fs::file_type::file_not_found &&
  150. status.type() != sys::fs::file_type::directory_file &&
  151. status.type() != sys::fs::file_type::regular_file)
  152. fatal(output, "output cannot be a special file");
  153. if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
  154. if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
  155. ec.value() != static_cast<int>(std::errc::file_exists))
  156. fatal(output, ec.message());
  157. return handle(**bufferOrErr, input);
  158. }