HeaderIncludes.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Tooling/Inclusions/HeaderIncludes.h"
  9. #include "clang/Basic/FileManager.h"
  10. #include "clang/Basic/SourceManager.h"
  11. #include "clang/Lex/Lexer.h"
  12. #include "llvm/Support/FormatVariadic.h"
  13. #include "llvm/Support/Path.h"
  14. #include <optional>
  15. namespace clang {
  16. namespace tooling {
  17. namespace {
  18. LangOptions createLangOpts() {
  19. LangOptions LangOpts;
  20. LangOpts.CPlusPlus = 1;
  21. LangOpts.CPlusPlus11 = 1;
  22. LangOpts.CPlusPlus14 = 1;
  23. LangOpts.LineComment = 1;
  24. LangOpts.CXXOperatorNames = 1;
  25. LangOpts.Bool = 1;
  26. LangOpts.ObjC = 1;
  27. LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
  28. LangOpts.DeclSpecKeyword = 1; // To get __declspec.
  29. LangOpts.WChar = 1; // To get wchar_t
  30. return LangOpts;
  31. }
  32. // Returns the offset after skipping a sequence of tokens, matched by \p
  33. // GetOffsetAfterSequence, from the start of the code.
  34. // \p GetOffsetAfterSequence should be a function that matches a sequence of
  35. // tokens and returns an offset after the sequence.
  36. unsigned getOffsetAfterTokenSequence(
  37. StringRef FileName, StringRef Code, const IncludeStyle &Style,
  38. llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
  39. GetOffsetAfterSequence) {
  40. SourceManagerForFile VirtualSM(FileName, Code);
  41. SourceManager &SM = VirtualSM.get();
  42. LangOptions LangOpts = createLangOpts();
  43. Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
  44. LangOpts);
  45. Token Tok;
  46. // Get the first token.
  47. Lex.LexFromRawLexer(Tok);
  48. return GetOffsetAfterSequence(SM, Lex, Tok);
  49. }
  50. // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
  51. // \p Tok will be the token after this directive; otherwise, it can be any token
  52. // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
  53. // (second) raw_identifier name is checked.
  54. bool checkAndConsumeDirectiveWithName(
  55. Lexer &Lex, StringRef Name, Token &Tok,
  56. std::optional<StringRef> RawIDName = std::nullopt) {
  57. bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  58. Tok.is(tok::raw_identifier) &&
  59. Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
  60. Tok.is(tok::raw_identifier) &&
  61. (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
  62. if (Matched)
  63. Lex.LexFromRawLexer(Tok);
  64. return Matched;
  65. }
  66. void skipComments(Lexer &Lex, Token &Tok) {
  67. while (Tok.is(tok::comment))
  68. if (Lex.LexFromRawLexer(Tok))
  69. return;
  70. }
  71. // Returns the offset after header guard directives and any comments
  72. // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
  73. // header guard is present in the code, this will return the offset after
  74. // skipping all comments from the start of the code.
  75. unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
  76. StringRef Code,
  77. const IncludeStyle &Style) {
  78. // \p Consume returns location after header guard or 0 if no header guard is
  79. // found.
  80. auto ConsumeHeaderGuardAndComment =
  81. [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
  82. Token Tok)>
  83. Consume) {
  84. return getOffsetAfterTokenSequence(
  85. FileName, Code, Style,
  86. [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
  87. skipComments(Lex, Tok);
  88. unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
  89. return std::max(InitialOffset, Consume(SM, Lex, Tok));
  90. });
  91. };
  92. return std::max(
  93. // #ifndef/#define
  94. ConsumeHeaderGuardAndComment(
  95. [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
  96. if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
  97. skipComments(Lex, Tok);
  98. if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
  99. Tok.isAtStartOfLine())
  100. return SM.getFileOffset(Tok.getLocation());
  101. }
  102. return 0;
  103. }),
  104. // #pragma once
  105. ConsumeHeaderGuardAndComment(
  106. [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
  107. if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
  108. StringRef("once")))
  109. return SM.getFileOffset(Tok.getLocation());
  110. return 0;
  111. }));
  112. }
  113. // Check if a sequence of tokens is like
  114. // "#include ("header.h" | <header.h>)".
  115. // If it is, \p Tok will be the token after this directive; otherwise, it can be
  116. // any token after the given \p Tok (including \p Tok).
  117. bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
  118. auto Matched = [&]() {
  119. Lex.LexFromRawLexer(Tok);
  120. return true;
  121. };
  122. if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  123. Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
  124. if (Lex.LexFromRawLexer(Tok))
  125. return false;
  126. if (Tok.is(tok::string_literal))
  127. return Matched();
  128. if (Tok.is(tok::less)) {
  129. while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
  130. }
  131. if (Tok.is(tok::greater))
  132. return Matched();
  133. }
  134. }
  135. return false;
  136. }
  137. // Returns the offset of the last #include directive after which a new
  138. // #include can be inserted. This ignores #include's after the #include block(s)
  139. // in the beginning of a file to avoid inserting headers into code sections
  140. // where new #include's should not be added by default.
  141. // These code sections include:
  142. // - raw string literals (containing #include).
  143. // - #if blocks.
  144. // - Special #include's among declarations (e.g. functions).
  145. //
  146. // If no #include after which a new #include can be inserted, this returns the
  147. // offset after skipping all comments from the start of the code.
  148. // Inserting after an #include is not allowed if it comes after code that is not
  149. // #include (e.g. pre-processing directive that is not #include, declarations).
  150. unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
  151. const IncludeStyle &Style) {
  152. return getOffsetAfterTokenSequence(
  153. FileName, Code, Style,
  154. [](const SourceManager &SM, Lexer &Lex, Token Tok) {
  155. skipComments(Lex, Tok);
  156. unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
  157. while (checkAndConsumeInclusiveDirective(Lex, Tok))
  158. MaxOffset = SM.getFileOffset(Tok.getLocation());
  159. return MaxOffset;
  160. });
  161. }
  162. inline StringRef trimInclude(StringRef IncludeName) {
  163. return IncludeName.trim("\"<>");
  164. }
  165. const char IncludeRegexPattern[] =
  166. R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
  167. // The filename of Path excluding extension.
  168. // Used to match implementation with headers, this differs from sys::path::stem:
  169. // - in names with multiple dots (foo.cu.cc) it terminates at the *first*
  170. // - an empty stem is never returned: /foo/.bar.x => .bar
  171. // - we don't bother to handle . and .. specially
  172. StringRef matchingStem(llvm::StringRef Path) {
  173. StringRef Name = llvm::sys::path::filename(Path);
  174. return Name.substr(0, Name.find('.', 1));
  175. }
  176. } // anonymous namespace
  177. IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
  178. StringRef FileName)
  179. : Style(Style), FileName(FileName) {
  180. for (const auto &Category : Style.IncludeCategories) {
  181. CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
  182. ? llvm::Regex::NoFlags
  183. : llvm::Regex::IgnoreCase);
  184. }
  185. IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
  186. FileName.endswith(".cpp") || FileName.endswith(".c++") ||
  187. FileName.endswith(".cxx") || FileName.endswith(".m") ||
  188. FileName.endswith(".mm");
  189. if (!Style.IncludeIsMainSourceRegex.empty()) {
  190. llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
  191. IsMainFile |= MainFileRegex.match(FileName);
  192. }
  193. }
  194. int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
  195. bool CheckMainHeader) const {
  196. int Ret = INT_MAX;
  197. for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
  198. if (CategoryRegexs[i].match(IncludeName)) {
  199. Ret = Style.IncludeCategories[i].Priority;
  200. break;
  201. }
  202. if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
  203. Ret = 0;
  204. return Ret;
  205. }
  206. int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
  207. bool CheckMainHeader) const {
  208. int Ret = INT_MAX;
  209. for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
  210. if (CategoryRegexs[i].match(IncludeName)) {
  211. Ret = Style.IncludeCategories[i].SortPriority;
  212. if (Ret == 0)
  213. Ret = Style.IncludeCategories[i].Priority;
  214. break;
  215. }
  216. if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
  217. Ret = 0;
  218. return Ret;
  219. }
  220. bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
  221. if (!IncludeName.startswith("\""))
  222. return false;
  223. IncludeName =
  224. IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
  225. // Not matchingStem: implementation files may have compound extensions but
  226. // headers may not.
  227. StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
  228. StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
  229. StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
  230. // main-header examples:
  231. // 1) foo.h => foo.cc
  232. // 2) foo.h => foo.cu.cc
  233. // 3) foo.proto.h => foo.proto.cc
  234. //
  235. // non-main-header examples:
  236. // 1) foo.h => bar.cc
  237. // 2) foo.proto.h => foo.cc
  238. StringRef Matching;
  239. if (MatchingFileStem.startswith_insensitive(HeaderStem))
  240. Matching = MatchingFileStem; // example 1), 2)
  241. else if (FileStem.equals_insensitive(HeaderStem))
  242. Matching = FileStem; // example 3)
  243. if (!Matching.empty()) {
  244. llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
  245. llvm::Regex::IgnoreCase);
  246. if (MainIncludeRegex.match(Matching))
  247. return true;
  248. }
  249. return false;
  250. }
  251. const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
  252. HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
  253. const IncludeStyle &Style)
  254. : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
  255. MinInsertOffset(
  256. getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
  257. MaxInsertOffset(MinInsertOffset +
  258. getMaxHeaderInsertionOffset(
  259. FileName, Code.drop_front(MinInsertOffset), Style)),
  260. Categories(Style, FileName) {
  261. // Add 0 for main header and INT_MAX for headers that are not in any
  262. // category.
  263. Priorities = {0, INT_MAX};
  264. for (const auto &Category : Style.IncludeCategories)
  265. Priorities.insert(Category.Priority);
  266. SmallVector<StringRef, 32> Lines;
  267. Code.drop_front(MinInsertOffset).split(Lines, "\n");
  268. unsigned Offset = MinInsertOffset;
  269. unsigned NextLineOffset;
  270. SmallVector<StringRef, 4> Matches;
  271. for (auto Line : Lines) {
  272. NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
  273. if (IncludeRegex.match(Line, &Matches)) {
  274. // If this is the last line without trailing newline, we need to make
  275. // sure we don't delete across the file boundary.
  276. addExistingInclude(
  277. Include(Matches[2],
  278. tooling::Range(
  279. Offset, std::min(Line.size() + 1, Code.size() - Offset)),
  280. Matches[1] == "import" ? tooling::IncludeDirective::Import
  281. : tooling::IncludeDirective::Include),
  282. NextLineOffset);
  283. }
  284. Offset = NextLineOffset;
  285. }
  286. // Populate CategoryEndOfssets:
  287. // - Ensure that CategoryEndOffset[Highest] is always populated.
  288. // - If CategoryEndOffset[Priority] isn't set, use the next higher value
  289. // that is set, up to CategoryEndOffset[Highest].
  290. auto Highest = Priorities.begin();
  291. if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
  292. if (FirstIncludeOffset >= 0)
  293. CategoryEndOffsets[*Highest] = FirstIncludeOffset;
  294. else
  295. CategoryEndOffsets[*Highest] = MinInsertOffset;
  296. }
  297. // By this point, CategoryEndOffset[Highest] is always set appropriately:
  298. // - to an appropriate location before/after existing #includes, or
  299. // - to right after the header guard, or
  300. // - to the beginning of the file.
  301. for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
  302. if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
  303. CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
  304. }
  305. // \p Offset: the start of the line following this include directive.
  306. void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
  307. unsigned NextLineOffset) {
  308. auto Iter =
  309. ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
  310. Iter->second.push_back(std::move(IncludeToAdd));
  311. auto &CurInclude = Iter->second.back();
  312. // The header name with quotes or angle brackets.
  313. // Only record the offset of current #include if we can insert after it.
  314. if (CurInclude.R.getOffset() <= MaxInsertOffset) {
  315. int Priority = Categories.getIncludePriority(
  316. CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
  317. CategoryEndOffsets[Priority] = NextLineOffset;
  318. IncludesByPriority[Priority].push_back(&CurInclude);
  319. if (FirstIncludeOffset < 0)
  320. FirstIncludeOffset = CurInclude.R.getOffset();
  321. }
  322. }
  323. std::optional<tooling::Replacement>
  324. HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
  325. IncludeDirective Directive) const {
  326. assert(IncludeName == trimInclude(IncludeName));
  327. // If a <header> ("header") already exists in code, "header" (<header>) with
  328. // different quotation and/or directive will still be inserted.
  329. // FIXME: figure out if this is the best behavior.
  330. auto It = ExistingIncludes.find(IncludeName);
  331. if (It != ExistingIncludes.end()) {
  332. for (const auto &Inc : It->second)
  333. if (Inc.Directive == Directive &&
  334. ((IsAngled && StringRef(Inc.Name).startswith("<")) ||
  335. (!IsAngled && StringRef(Inc.Name).startswith("\""))))
  336. return std::nullopt;
  337. }
  338. std::string Quoted =
  339. std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
  340. StringRef QuotedName = Quoted;
  341. int Priority = Categories.getIncludePriority(
  342. QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
  343. auto CatOffset = CategoryEndOffsets.find(Priority);
  344. assert(CatOffset != CategoryEndOffsets.end());
  345. unsigned InsertOffset = CatOffset->second; // Fall back offset
  346. auto Iter = IncludesByPriority.find(Priority);
  347. if (Iter != IncludesByPriority.end()) {
  348. for (const auto *Inc : Iter->second) {
  349. if (QuotedName < Inc->Name) {
  350. InsertOffset = Inc->R.getOffset();
  351. break;
  352. }
  353. }
  354. }
  355. assert(InsertOffset <= Code.size());
  356. llvm::StringRef DirectiveSpelling =
  357. Directive == IncludeDirective::Include ? "include" : "import";
  358. std::string NewInclude =
  359. llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
  360. // When inserting headers at end of the code, also append '\n' to the code
  361. // if it does not end with '\n'.
  362. // FIXME: when inserting multiple #includes at the end of code, only one
  363. // newline should be added.
  364. if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
  365. NewInclude = "\n" + NewInclude;
  366. return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
  367. }
  368. tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
  369. bool IsAngled) const {
  370. assert(IncludeName == trimInclude(IncludeName));
  371. tooling::Replacements Result;
  372. auto Iter = ExistingIncludes.find(IncludeName);
  373. if (Iter == ExistingIncludes.end())
  374. return Result;
  375. for (const auto &Inc : Iter->second) {
  376. if ((IsAngled && StringRef(Inc.Name).startswith("\"")) ||
  377. (!IsAngled && StringRef(Inc.Name).startswith("<")))
  378. continue;
  379. llvm::Error Err = Result.add(tooling::Replacement(
  380. FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
  381. if (Err) {
  382. auto ErrMsg = "Unexpected conflicts in #include deletions: " +
  383. llvm::toString(std::move(Err));
  384. llvm_unreachable(ErrMsg.c_str());
  385. }
  386. }
  387. return Result;
  388. }
  389. } // namespace tooling
  390. } // namespace clang