123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #include "clang/Tooling/Inclusions/HeaderIncludes.h"
- #include "clang/Basic/FileManager.h"
- #include "clang/Basic/SourceManager.h"
- #include "clang/Lex/Lexer.h"
- #include "llvm/Support/FormatVariadic.h"
- #include "llvm/Support/Path.h"
- #include <optional>
- namespace clang {
- namespace tooling {
- namespace {
- LangOptions createLangOpts() {
- LangOptions LangOpts;
- LangOpts.CPlusPlus = 1;
- LangOpts.CPlusPlus11 = 1;
- LangOpts.CPlusPlus14 = 1;
- LangOpts.LineComment = 1;
- LangOpts.CXXOperatorNames = 1;
- LangOpts.Bool = 1;
- LangOpts.ObjC = 1;
- LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
- LangOpts.DeclSpecKeyword = 1; // To get __declspec.
- LangOpts.WChar = 1; // To get wchar_t
- return LangOpts;
- }
- // Returns the offset after skipping a sequence of tokens, matched by \p
- // GetOffsetAfterSequence, from the start of the code.
- // \p GetOffsetAfterSequence should be a function that matches a sequence of
- // tokens and returns an offset after the sequence.
- unsigned getOffsetAfterTokenSequence(
- StringRef FileName, StringRef Code, const IncludeStyle &Style,
- llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
- GetOffsetAfterSequence) {
- SourceManagerForFile VirtualSM(FileName, Code);
- SourceManager &SM = VirtualSM.get();
- LangOptions LangOpts = createLangOpts();
- Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
- LangOpts);
- Token Tok;
- // Get the first token.
- Lex.LexFromRawLexer(Tok);
- return GetOffsetAfterSequence(SM, Lex, Tok);
- }
- // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
- // \p Tok will be the token after this directive; otherwise, it can be any token
- // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
- // (second) raw_identifier name is checked.
- bool checkAndConsumeDirectiveWithName(
- Lexer &Lex, StringRef Name, Token &Tok,
- std::optional<StringRef> RawIDName = std::nullopt) {
- bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
- Tok.is(tok::raw_identifier) &&
- Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
- Tok.is(tok::raw_identifier) &&
- (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
- if (Matched)
- Lex.LexFromRawLexer(Tok);
- return Matched;
- }
- void skipComments(Lexer &Lex, Token &Tok) {
- while (Tok.is(tok::comment))
- if (Lex.LexFromRawLexer(Tok))
- return;
- }
- // Returns the offset after header guard directives and any comments
- // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
- // header guard is present in the code, this will return the offset after
- // skipping all comments from the start of the code.
- unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
- StringRef Code,
- const IncludeStyle &Style) {
- // \p Consume returns location after header guard or 0 if no header guard is
- // found.
- auto ConsumeHeaderGuardAndComment =
- [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
- Token Tok)>
- Consume) {
- return getOffsetAfterTokenSequence(
- FileName, Code, Style,
- [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
- skipComments(Lex, Tok);
- unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
- return std::max(InitialOffset, Consume(SM, Lex, Tok));
- });
- };
- return std::max(
- // #ifndef/#define
- ConsumeHeaderGuardAndComment(
- [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
- if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
- skipComments(Lex, Tok);
- if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
- Tok.isAtStartOfLine())
- return SM.getFileOffset(Tok.getLocation());
- }
- return 0;
- }),
- // #pragma once
- ConsumeHeaderGuardAndComment(
- [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
- if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
- StringRef("once")))
- return SM.getFileOffset(Tok.getLocation());
- return 0;
- }));
- }
- // Check if a sequence of tokens is like
- // "#include ("header.h" | <header.h>)".
- // If it is, \p Tok will be the token after this directive; otherwise, it can be
- // any token after the given \p Tok (including \p Tok).
- bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
- auto Matched = [&]() {
- Lex.LexFromRawLexer(Tok);
- return true;
- };
- if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
- Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
- if (Lex.LexFromRawLexer(Tok))
- return false;
- if (Tok.is(tok::string_literal))
- return Matched();
- if (Tok.is(tok::less)) {
- while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
- }
- if (Tok.is(tok::greater))
- return Matched();
- }
- }
- return false;
- }
- // Returns the offset of the last #include directive after which a new
- // #include can be inserted. This ignores #include's after the #include block(s)
- // in the beginning of a file to avoid inserting headers into code sections
- // where new #include's should not be added by default.
- // These code sections include:
- // - raw string literals (containing #include).
- // - #if blocks.
- // - Special #include's among declarations (e.g. functions).
- //
- // If no #include after which a new #include can be inserted, this returns the
- // offset after skipping all comments from the start of the code.
- // Inserting after an #include is not allowed if it comes after code that is not
- // #include (e.g. pre-processing directive that is not #include, declarations).
- unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
- const IncludeStyle &Style) {
- return getOffsetAfterTokenSequence(
- FileName, Code, Style,
- [](const SourceManager &SM, Lexer &Lex, Token Tok) {
- skipComments(Lex, Tok);
- unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
- while (checkAndConsumeInclusiveDirective(Lex, Tok))
- MaxOffset = SM.getFileOffset(Tok.getLocation());
- return MaxOffset;
- });
- }
- inline StringRef trimInclude(StringRef IncludeName) {
- return IncludeName.trim("\"<>");
- }
- const char IncludeRegexPattern[] =
- R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
- // The filename of Path excluding extension.
- // Used to match implementation with headers, this differs from sys::path::stem:
- // - in names with multiple dots (foo.cu.cc) it terminates at the *first*
- // - an empty stem is never returned: /foo/.bar.x => .bar
- // - we don't bother to handle . and .. specially
- StringRef matchingStem(llvm::StringRef Path) {
- StringRef Name = llvm::sys::path::filename(Path);
- return Name.substr(0, Name.find('.', 1));
- }
- } // anonymous namespace
- IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
- StringRef FileName)
- : Style(Style), FileName(FileName) {
- for (const auto &Category : Style.IncludeCategories) {
- CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
- ? llvm::Regex::NoFlags
- : llvm::Regex::IgnoreCase);
- }
- IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
- FileName.endswith(".cpp") || FileName.endswith(".c++") ||
- FileName.endswith(".cxx") || FileName.endswith(".m") ||
- FileName.endswith(".mm");
- if (!Style.IncludeIsMainSourceRegex.empty()) {
- llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
- IsMainFile |= MainFileRegex.match(FileName);
- }
- }
- int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
- bool CheckMainHeader) const {
- int Ret = INT_MAX;
- for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
- if (CategoryRegexs[i].match(IncludeName)) {
- Ret = Style.IncludeCategories[i].Priority;
- break;
- }
- if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
- Ret = 0;
- return Ret;
- }
- int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
- bool CheckMainHeader) const {
- int Ret = INT_MAX;
- for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
- if (CategoryRegexs[i].match(IncludeName)) {
- Ret = Style.IncludeCategories[i].SortPriority;
- if (Ret == 0)
- Ret = Style.IncludeCategories[i].Priority;
- break;
- }
- if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
- Ret = 0;
- return Ret;
- }
- bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
- if (!IncludeName.startswith("\""))
- return false;
- IncludeName =
- IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
- // Not matchingStem: implementation files may have compound extensions but
- // headers may not.
- StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
- StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
- StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
- // main-header examples:
- // 1) foo.h => foo.cc
- // 2) foo.h => foo.cu.cc
- // 3) foo.proto.h => foo.proto.cc
- //
- // non-main-header examples:
- // 1) foo.h => bar.cc
- // 2) foo.proto.h => foo.cc
- StringRef Matching;
- if (MatchingFileStem.startswith_insensitive(HeaderStem))
- Matching = MatchingFileStem; // example 1), 2)
- else if (FileStem.equals_insensitive(HeaderStem))
- Matching = FileStem; // example 3)
- if (!Matching.empty()) {
- llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
- llvm::Regex::IgnoreCase);
- if (MainIncludeRegex.match(Matching))
- return true;
- }
- return false;
- }
- const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
- HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
- const IncludeStyle &Style)
- : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
- MinInsertOffset(
- getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
- MaxInsertOffset(MinInsertOffset +
- getMaxHeaderInsertionOffset(
- FileName, Code.drop_front(MinInsertOffset), Style)),
- Categories(Style, FileName) {
- // Add 0 for main header and INT_MAX for headers that are not in any
- // category.
- Priorities = {0, INT_MAX};
- for (const auto &Category : Style.IncludeCategories)
- Priorities.insert(Category.Priority);
- SmallVector<StringRef, 32> Lines;
- Code.drop_front(MinInsertOffset).split(Lines, "\n");
- unsigned Offset = MinInsertOffset;
- unsigned NextLineOffset;
- SmallVector<StringRef, 4> Matches;
- for (auto Line : Lines) {
- NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
- if (IncludeRegex.match(Line, &Matches)) {
- // If this is the last line without trailing newline, we need to make
- // sure we don't delete across the file boundary.
- addExistingInclude(
- Include(Matches[2],
- tooling::Range(
- Offset, std::min(Line.size() + 1, Code.size() - Offset)),
- Matches[1] == "import" ? tooling::IncludeDirective::Import
- : tooling::IncludeDirective::Include),
- NextLineOffset);
- }
- Offset = NextLineOffset;
- }
- // Populate CategoryEndOfssets:
- // - Ensure that CategoryEndOffset[Highest] is always populated.
- // - If CategoryEndOffset[Priority] isn't set, use the next higher value
- // that is set, up to CategoryEndOffset[Highest].
- auto Highest = Priorities.begin();
- if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
- if (FirstIncludeOffset >= 0)
- CategoryEndOffsets[*Highest] = FirstIncludeOffset;
- else
- CategoryEndOffsets[*Highest] = MinInsertOffset;
- }
- // By this point, CategoryEndOffset[Highest] is always set appropriately:
- // - to an appropriate location before/after existing #includes, or
- // - to right after the header guard, or
- // - to the beginning of the file.
- for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
- if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
- CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
- }
- // \p Offset: the start of the line following this include directive.
- void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
- unsigned NextLineOffset) {
- auto Iter =
- ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
- Iter->second.push_back(std::move(IncludeToAdd));
- auto &CurInclude = Iter->second.back();
- // The header name with quotes or angle brackets.
- // Only record the offset of current #include if we can insert after it.
- if (CurInclude.R.getOffset() <= MaxInsertOffset) {
- int Priority = Categories.getIncludePriority(
- CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
- CategoryEndOffsets[Priority] = NextLineOffset;
- IncludesByPriority[Priority].push_back(&CurInclude);
- if (FirstIncludeOffset < 0)
- FirstIncludeOffset = CurInclude.R.getOffset();
- }
- }
- std::optional<tooling::Replacement>
- HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
- IncludeDirective Directive) const {
- assert(IncludeName == trimInclude(IncludeName));
- // If a <header> ("header") already exists in code, "header" (<header>) with
- // different quotation and/or directive will still be inserted.
- // FIXME: figure out if this is the best behavior.
- auto It = ExistingIncludes.find(IncludeName);
- if (It != ExistingIncludes.end()) {
- for (const auto &Inc : It->second)
- if (Inc.Directive == Directive &&
- ((IsAngled && StringRef(Inc.Name).startswith("<")) ||
- (!IsAngled && StringRef(Inc.Name).startswith("\""))))
- return std::nullopt;
- }
- std::string Quoted =
- std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
- StringRef QuotedName = Quoted;
- int Priority = Categories.getIncludePriority(
- QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
- auto CatOffset = CategoryEndOffsets.find(Priority);
- assert(CatOffset != CategoryEndOffsets.end());
- unsigned InsertOffset = CatOffset->second; // Fall back offset
- auto Iter = IncludesByPriority.find(Priority);
- if (Iter != IncludesByPriority.end()) {
- for (const auto *Inc : Iter->second) {
- if (QuotedName < Inc->Name) {
- InsertOffset = Inc->R.getOffset();
- break;
- }
- }
- }
- assert(InsertOffset <= Code.size());
- llvm::StringRef DirectiveSpelling =
- Directive == IncludeDirective::Include ? "include" : "import";
- std::string NewInclude =
- llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
- // When inserting headers at end of the code, also append '\n' to the code
- // if it does not end with '\n'.
- // FIXME: when inserting multiple #includes at the end of code, only one
- // newline should be added.
- if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
- NewInclude = "\n" + NewInclude;
- return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
- }
- tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
- bool IsAngled) const {
- assert(IncludeName == trimInclude(IncludeName));
- tooling::Replacements Result;
- auto Iter = ExistingIncludes.find(IncludeName);
- if (Iter == ExistingIncludes.end())
- return Result;
- for (const auto &Inc : Iter->second) {
- if ((IsAngled && StringRef(Inc.Name).startswith("\"")) ||
- (!IsAngled && StringRef(Inc.Name).startswith("<")))
- continue;
- llvm::Error Err = Result.add(tooling::Replacement(
- FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
- if (Err) {
- auto ErrMsg = "Unexpected conflicts in #include deletions: " +
- llvm::toString(std::move(Err));
- llvm_unreachable(ErrMsg.c_str());
- }
- }
- return Result;
- }
- } // namespace tooling
- } // namespace clang
|