123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- //===- split-file.cpp - Input splitting utility ---------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
- // the specified part.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/StringExtras.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/FileOutputBuffer.h"
- #include "llvm/Support/LineIterator.h"
- #include "llvm/Support/MemoryBuffer.h"
- #include "llvm/Support/Path.h"
- #include "llvm/Support/ToolOutputFile.h"
- #include "llvm/Support/WithColor.h"
- #include <string>
- #include <system_error>
- using namespace llvm;
- static cl::OptionCategory cat("split-file Options");
- static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
- cl::cat(cat));
- static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
- cl::value_desc("directory"), cl::cat(cat));
- static cl::opt<bool> noLeadingLines("no-leading-lines",
- cl::desc("Don't preserve line numbers"),
- cl::cat(cat));
- static StringRef toolName;
- static int errorCount;
- LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename,
- const Twine &message) {
- if (filename.empty())
- WithColor::error(errs(), toolName) << message << '\n';
- else
- WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
- exit(1);
- }
- static void error(StringRef filename, int64_t line, const Twine &message) {
- ++errorCount;
- errs() << filename << ':' << line << ": ";
- WithColor::error(errs()) << message << '\n';
- }
- namespace {
- struct Part {
- const char *begin = nullptr;
- const char *end = nullptr;
- int64_t leadingLines = 0;
- };
- } // namespace
- static int handle(MemoryBuffer &inputBuf, StringRef input) {
- DenseMap<StringRef, Part> partToBegin;
- StringRef lastPart, separator;
- for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
- const int64_t lineNo = i.line_number();
- const StringRef line = *i++;
- const size_t markerLen = line.startswith("//") ? 6 : 5;
- if (!(line.size() >= markerLen &&
- line.substr(markerLen - 4).startswith("--- ")))
- continue;
- separator = line.substr(0, markerLen);
- const StringRef partName = line.substr(markerLen);
- if (partName.empty()) {
- error(input, lineNo, "empty part name");
- continue;
- }
- if (isSpace(partName.front()) || isSpace(partName.back())) {
- error(input, lineNo, "part name cannot have leading or trailing space");
- continue;
- }
- auto res = partToBegin.try_emplace(partName);
- if (!res.second) {
- error(input, lineNo,
- "'" + separator + partName + "' occurs more than once");
- continue;
- }
- if (!lastPart.empty())
- partToBegin[lastPart].end = line.data();
- Part &cur = res.first->second;
- if (!i.is_at_eof())
- cur.begin = i->data();
- // If --no-leading-lines is not specified, numEmptyLines is 0. Append
- // newlines so that the extracted part preserves line numbers.
- cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1;
- lastPart = partName;
- }
- if (lastPart.empty())
- fatal(input, "no part separator was found");
- if (errorCount)
- return 1;
- partToBegin[lastPart].end = inputBuf.getBufferEnd();
- std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
- SmallString<256> partPath;
- for (auto &keyValue : partToBegin) {
- partPath.clear();
- sys::path::append(partPath, output, keyValue.first);
- std::error_code ec =
- sys::fs::create_directories(sys::path::parent_path(partPath));
- if (ec)
- fatal(input, ec.message());
- auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
- llvm::sys::fs::OF_None);
- if (!f)
- fatal(input, ec.message());
- Part &part = keyValue.second;
- for (int64_t i = 0; i != part.leadingLines; ++i)
- (*f).os().write('\n');
- if (part.begin)
- (*f).os().write(part.begin, part.end - part.begin);
- outputFiles.push_back(std::move(f));
- }
- for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
- outputFile->keep();
- return 0;
- }
- int main(int argc, const char **argv) {
- toolName = sys::path::stem(argv[0]);
- cl::HideUnrelatedOptions({&cat});
- cl::ParseCommandLineOptions(
- argc, argv,
- "Split input into multiple parts separated by regex '^(.|//)--- ' and "
- "extract the part specified by '^(.|//)--- <part>'\n",
- nullptr,
- /*EnvVar=*/nullptr,
- /*LongOptionsUseDoubleDash=*/true);
- if (input.empty())
- fatal("", "input filename is not specified");
- if (output.empty())
- fatal("", "output directory is not specified");
- ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
- MemoryBuffer::getFileOrSTDIN(input);
- if (std::error_code ec = bufferOrErr.getError())
- fatal(input, ec.message());
- // Delete output if it is a file or an empty directory, so that we can create
- // a directory.
- sys::fs::file_status status;
- if (std::error_code ec = sys::fs::status(output, status))
- if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
- fatal(output, ec.message());
- if (status.type() != sys::fs::file_type::file_not_found &&
- status.type() != sys::fs::file_type::directory_file &&
- status.type() != sys::fs::file_type::regular_file)
- fatal(output, "output cannot be a special file");
- if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
- if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
- ec.value() != static_cast<int>(std::errc::file_exists))
- fatal(output, ec.message());
- return handle(**bufferOrErr, input);
- }
|