HeaderIncludes.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Tooling/Inclusions/HeaderIncludes.h"
  9. #include "clang/Basic/FileManager.h"
  10. #include "clang/Basic/SourceManager.h"
  11. #include "clang/Lex/Lexer.h"
  12. #include "llvm/ADT/Optional.h"
  13. #include "llvm/Support/FormatVariadic.h"
  14. #include "llvm/Support/Path.h"
  15. namespace clang {
  16. namespace tooling {
  17. namespace {
  18. LangOptions createLangOpts() {
  19. LangOptions LangOpts;
  20. LangOpts.CPlusPlus = 1;
  21. LangOpts.CPlusPlus11 = 1;
  22. LangOpts.CPlusPlus14 = 1;
  23. LangOpts.LineComment = 1;
  24. LangOpts.CXXOperatorNames = 1;
  25. LangOpts.Bool = 1;
  26. LangOpts.ObjC = 1;
  27. LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
  28. LangOpts.DeclSpecKeyword = 1; // To get __declspec.
  29. LangOpts.WChar = 1; // To get wchar_t
  30. return LangOpts;
  31. }
  32. // Returns the offset after skipping a sequence of tokens, matched by \p
  33. // GetOffsetAfterSequence, from the start of the code.
  34. // \p GetOffsetAfterSequence should be a function that matches a sequence of
  35. // tokens and returns an offset after the sequence.
  36. unsigned getOffsetAfterTokenSequence(
  37. StringRef FileName, StringRef Code, const IncludeStyle &Style,
  38. llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
  39. GetOffsetAfterSequence) {
  40. SourceManagerForFile VirtualSM(FileName, Code);
  41. SourceManager &SM = VirtualSM.get();
  42. Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
  43. createLangOpts());
  44. Token Tok;
  45. // Get the first token.
  46. Lex.LexFromRawLexer(Tok);
  47. return GetOffsetAfterSequence(SM, Lex, Tok);
  48. }
  49. // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
  50. // \p Tok will be the token after this directive; otherwise, it can be any token
  51. // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
  52. // (second) raw_identifier name is checked.
  53. bool checkAndConsumeDirectiveWithName(
  54. Lexer &Lex, StringRef Name, Token &Tok,
  55. llvm::Optional<StringRef> RawIDName = llvm::None) {
  56. bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  57. Tok.is(tok::raw_identifier) &&
  58. Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
  59. Tok.is(tok::raw_identifier) &&
  60. (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
  61. if (Matched)
  62. Lex.LexFromRawLexer(Tok);
  63. return Matched;
  64. }
  65. void skipComments(Lexer &Lex, Token &Tok) {
  66. while (Tok.is(tok::comment))
  67. if (Lex.LexFromRawLexer(Tok))
  68. return;
  69. }
  70. // Returns the offset after header guard directives and any comments
  71. // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
  72. // header guard is present in the code, this will return the offset after
  73. // skipping all comments from the start of the code.
  74. unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
  75. StringRef Code,
  76. const IncludeStyle &Style) {
  77. // \p Consume returns location after header guard or 0 if no header guard is
  78. // found.
  79. auto ConsumeHeaderGuardAndComment =
  80. [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
  81. Token Tok)>
  82. Consume) {
  83. return getOffsetAfterTokenSequence(
  84. FileName, Code, Style,
  85. [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
  86. skipComments(Lex, Tok);
  87. unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
  88. return std::max(InitialOffset, Consume(SM, Lex, Tok));
  89. });
  90. };
  91. return std::max(
  92. // #ifndef/#define
  93. ConsumeHeaderGuardAndComment(
  94. [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
  95. if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
  96. skipComments(Lex, Tok);
  97. if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
  98. Tok.isAtStartOfLine())
  99. return SM.getFileOffset(Tok.getLocation());
  100. }
  101. return 0;
  102. }),
  103. // #pragma once
  104. ConsumeHeaderGuardAndComment(
  105. [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
  106. if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
  107. StringRef("once")))
  108. return SM.getFileOffset(Tok.getLocation());
  109. return 0;
  110. }));
  111. }
  112. // Check if a sequence of tokens is like
  113. // "#include ("header.h" | <header.h>)".
  114. // If it is, \p Tok will be the token after this directive; otherwise, it can be
  115. // any token after the given \p Tok (including \p Tok).
  116. bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
  117. auto Matched = [&]() {
  118. Lex.LexFromRawLexer(Tok);
  119. return true;
  120. };
  121. if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  122. Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
  123. if (Lex.LexFromRawLexer(Tok))
  124. return false;
  125. if (Tok.is(tok::string_literal))
  126. return Matched();
  127. if (Tok.is(tok::less)) {
  128. while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
  129. }
  130. if (Tok.is(tok::greater))
  131. return Matched();
  132. }
  133. }
  134. return false;
  135. }
  136. // Returns the offset of the last #include directive after which a new
  137. // #include can be inserted. This ignores #include's after the #include block(s)
  138. // in the beginning of a file to avoid inserting headers into code sections
  139. // where new #include's should not be added by default.
  140. // These code sections include:
  141. // - raw string literals (containing #include).
  142. // - #if blocks.
  143. // - Special #include's among declarations (e.g. functions).
  144. //
  145. // If no #include after which a new #include can be inserted, this returns the
  146. // offset after skipping all comments from the start of the code.
  147. // Inserting after an #include is not allowed if it comes after code that is not
  148. // #include (e.g. pre-processing directive that is not #include, declarations).
  149. unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
  150. const IncludeStyle &Style) {
  151. return getOffsetAfterTokenSequence(
  152. FileName, Code, Style,
  153. [](const SourceManager &SM, Lexer &Lex, Token Tok) {
  154. skipComments(Lex, Tok);
  155. unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
  156. while (checkAndConsumeInclusiveDirective(Lex, Tok))
  157. MaxOffset = SM.getFileOffset(Tok.getLocation());
  158. return MaxOffset;
  159. });
  160. }
  161. inline StringRef trimInclude(StringRef IncludeName) {
  162. return IncludeName.trim("\"<>");
  163. }
  164. const char IncludeRegexPattern[] =
  165. R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
  166. // The filename of Path excluding extension.
  167. // Used to match implementation with headers, this differs from sys::path::stem:
  168. // - in names with multiple dots (foo.cu.cc) it terminates at the *first*
  169. // - an empty stem is never returned: /foo/.bar.x => .bar
  170. // - we don't bother to handle . and .. specially
  171. StringRef matchingStem(llvm::StringRef Path) {
  172. StringRef Name = llvm::sys::path::filename(Path);
  173. return Name.substr(0, Name.find('.', 1));
  174. }
  175. } // anonymous namespace
  176. IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
  177. StringRef FileName)
  178. : Style(Style), FileName(FileName) {
  179. for (const auto &Category : Style.IncludeCategories) {
  180. CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
  181. ? llvm::Regex::NoFlags
  182. : llvm::Regex::IgnoreCase);
  183. }
  184. IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
  185. FileName.endswith(".cpp") || FileName.endswith(".c++") ||
  186. FileName.endswith(".cxx") || FileName.endswith(".m") ||
  187. FileName.endswith(".mm");
  188. if (!Style.IncludeIsMainSourceRegex.empty()) {
  189. llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
  190. IsMainFile |= MainFileRegex.match(FileName);
  191. }
  192. }
  193. int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
  194. bool CheckMainHeader) const {
  195. int Ret = INT_MAX;
  196. for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
  197. if (CategoryRegexs[i].match(IncludeName)) {
  198. Ret = Style.IncludeCategories[i].Priority;
  199. break;
  200. }
  201. if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
  202. Ret = 0;
  203. return Ret;
  204. }
  205. int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
  206. bool CheckMainHeader) const {
  207. int Ret = INT_MAX;
  208. for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
  209. if (CategoryRegexs[i].match(IncludeName)) {
  210. Ret = Style.IncludeCategories[i].SortPriority;
  211. if (Ret == 0)
  212. Ret = Style.IncludeCategories[i].Priority;
  213. break;
  214. }
  215. if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
  216. Ret = 0;
  217. return Ret;
  218. }
  219. bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
  220. if (!IncludeName.startswith("\""))
  221. return false;
  222. IncludeName =
  223. IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
  224. // Not matchingStem: implementation files may have compound extensions but
  225. // headers may not.
  226. StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
  227. StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
  228. StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
  229. // main-header examples:
  230. // 1) foo.h => foo.cc
  231. // 2) foo.h => foo.cu.cc
  232. // 3) foo.proto.h => foo.proto.cc
  233. //
  234. // non-main-header examples:
  235. // 1) foo.h => bar.cc
  236. // 2) foo.proto.h => foo.cc
  237. StringRef Matching;
  238. if (MatchingFileStem.startswith_insensitive(HeaderStem))
  239. Matching = MatchingFileStem; // example 1), 2)
  240. else if (FileStem.equals_insensitive(HeaderStem))
  241. Matching = FileStem; // example 3)
  242. if (!Matching.empty()) {
  243. llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
  244. llvm::Regex::IgnoreCase);
  245. if (MainIncludeRegex.match(Matching))
  246. return true;
  247. }
  248. return false;
  249. }
  250. HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
  251. const IncludeStyle &Style)
  252. : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
  253. MinInsertOffset(
  254. getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
  255. MaxInsertOffset(MinInsertOffset +
  256. getMaxHeaderInsertionOffset(
  257. FileName, Code.drop_front(MinInsertOffset), Style)),
  258. Categories(Style, FileName),
  259. IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
  260. // Add 0 for main header and INT_MAX for headers that are not in any
  261. // category.
  262. Priorities = {0, INT_MAX};
  263. for (const auto &Category : Style.IncludeCategories)
  264. Priorities.insert(Category.Priority);
  265. SmallVector<StringRef, 32> Lines;
  266. Code.drop_front(MinInsertOffset).split(Lines, "\n");
  267. unsigned Offset = MinInsertOffset;
  268. unsigned NextLineOffset;
  269. SmallVector<StringRef, 4> Matches;
  270. for (auto Line : Lines) {
  271. NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
  272. if (IncludeRegex.match(Line, &Matches)) {
  273. // If this is the last line without trailing newline, we need to make
  274. // sure we don't delete across the file boundary.
  275. addExistingInclude(
  276. Include(Matches[2],
  277. tooling::Range(
  278. Offset, std::min(Line.size() + 1, Code.size() - Offset))),
  279. NextLineOffset);
  280. }
  281. Offset = NextLineOffset;
  282. }
  283. // Populate CategoryEndOfssets:
  284. // - Ensure that CategoryEndOffset[Highest] is always populated.
  285. // - If CategoryEndOffset[Priority] isn't set, use the next higher value
  286. // that is set, up to CategoryEndOffset[Highest].
  287. auto Highest = Priorities.begin();
  288. if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
  289. if (FirstIncludeOffset >= 0)
  290. CategoryEndOffsets[*Highest] = FirstIncludeOffset;
  291. else
  292. CategoryEndOffsets[*Highest] = MinInsertOffset;
  293. }
  294. // By this point, CategoryEndOffset[Highest] is always set appropriately:
  295. // - to an appropriate location before/after existing #includes, or
  296. // - to right after the header guard, or
  297. // - to the beginning of the file.
  298. for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
  299. if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
  300. CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
  301. }
  302. // \p Offset: the start of the line following this include directive.
  303. void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
  304. unsigned NextLineOffset) {
  305. auto Iter =
  306. ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
  307. Iter->second.push_back(std::move(IncludeToAdd));
  308. auto &CurInclude = Iter->second.back();
  309. // The header name with quotes or angle brackets.
  310. // Only record the offset of current #include if we can insert after it.
  311. if (CurInclude.R.getOffset() <= MaxInsertOffset) {
  312. int Priority = Categories.getIncludePriority(
  313. CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
  314. CategoryEndOffsets[Priority] = NextLineOffset;
  315. IncludesByPriority[Priority].push_back(&CurInclude);
  316. if (FirstIncludeOffset < 0)
  317. FirstIncludeOffset = CurInclude.R.getOffset();
  318. }
  319. }
  320. llvm::Optional<tooling::Replacement>
  321. HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const {
  322. assert(IncludeName == trimInclude(IncludeName));
  323. // If a <header> ("header") already exists in code, "header" (<header>) with
  324. // different quotation will still be inserted.
  325. // FIXME: figure out if this is the best behavior.
  326. auto It = ExistingIncludes.find(IncludeName);
  327. if (It != ExistingIncludes.end())
  328. for (const auto &Inc : It->second)
  329. if ((IsAngled && StringRef(Inc.Name).startswith("<")) ||
  330. (!IsAngled && StringRef(Inc.Name).startswith("\"")))
  331. return llvm::None;
  332. std::string Quoted =
  333. std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
  334. StringRef QuotedName = Quoted;
  335. int Priority = Categories.getIncludePriority(
  336. QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
  337. auto CatOffset = CategoryEndOffsets.find(Priority);
  338. assert(CatOffset != CategoryEndOffsets.end());
  339. unsigned InsertOffset = CatOffset->second; // Fall back offset
  340. auto Iter = IncludesByPriority.find(Priority);
  341. if (Iter != IncludesByPriority.end()) {
  342. for (const auto *Inc : Iter->second) {
  343. if (QuotedName < Inc->Name) {
  344. InsertOffset = Inc->R.getOffset();
  345. break;
  346. }
  347. }
  348. }
  349. assert(InsertOffset <= Code.size());
  350. std::string NewInclude =
  351. std::string(llvm::formatv("#include {0}\n", QuotedName));
  352. // When inserting headers at end of the code, also append '\n' to the code
  353. // if it does not end with '\n'.
  354. // FIXME: when inserting multiple #includes at the end of code, only one
  355. // newline should be added.
  356. if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
  357. NewInclude = "\n" + NewInclude;
  358. return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
  359. }
  360. tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
  361. bool IsAngled) const {
  362. assert(IncludeName == trimInclude(IncludeName));
  363. tooling::Replacements Result;
  364. auto Iter = ExistingIncludes.find(IncludeName);
  365. if (Iter == ExistingIncludes.end())
  366. return Result;
  367. for (const auto &Inc : Iter->second) {
  368. if ((IsAngled && StringRef(Inc.Name).startswith("\"")) ||
  369. (!IsAngled && StringRef(Inc.Name).startswith("<")))
  370. continue;
  371. llvm::Error Err = Result.add(tooling::Replacement(
  372. FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
  373. if (Err) {
  374. auto ErrMsg = "Unexpected conflicts in #include deletions: " +
  375. llvm::toString(std::move(Err));
  376. llvm_unreachable(ErrMsg.c_str());
  377. }
  378. }
  379. return Result;
  380. }
  381. } // namespace tooling
  382. } // namespace clang