LexerUtils.cpp 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "LexerUtils.h"
  9. #include "clang/AST/AST.h"
  10. #include "clang/Basic/SourceManager.h"
  11. #include <optional>
  12. namespace clang::tidy::utils::lexer {
  13. Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
  14. const LangOptions &LangOpts, bool SkipComments) {
  15. Token Token;
  16. Token.setKind(tok::unknown);
  17. Location = Location.getLocWithOffset(-1);
  18. if (Location.isInvalid())
  19. return Token;
  20. auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
  21. while (Location != StartOfFile) {
  22. Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
  23. if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
  24. (!SkipComments || !Token.is(tok::comment))) {
  25. break;
  26. }
  27. Location = Location.getLocWithOffset(-1);
  28. }
  29. return Token;
  30. }
  31. SourceLocation findPreviousTokenStart(SourceLocation Start,
  32. const SourceManager &SM,
  33. const LangOptions &LangOpts) {
  34. if (Start.isInvalid() || Start.isMacroID())
  35. return SourceLocation();
  36. SourceLocation BeforeStart = Start.getLocWithOffset(-1);
  37. if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
  38. return SourceLocation();
  39. return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
  40. }
  41. SourceLocation findPreviousTokenKind(SourceLocation Start,
  42. const SourceManager &SM,
  43. const LangOptions &LangOpts,
  44. tok::TokenKind TK) {
  45. if (Start.isInvalid() || Start.isMacroID())
  46. return SourceLocation();
  47. while (true) {
  48. SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
  49. if (L.isInvalid() || L.isMacroID())
  50. return SourceLocation();
  51. Token T;
  52. if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
  53. return SourceLocation();
  54. if (T.is(TK))
  55. return T.getLocation();
  56. Start = L;
  57. }
  58. }
  59. SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
  60. const LangOptions &LangOpts) {
  61. return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
  62. }
  63. std::optional<Token>
  64. findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
  65. const LangOptions &LangOpts) {
  66. std::optional<Token> CurrentToken;
  67. do {
  68. CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
  69. } while (CurrentToken && CurrentToken->is(tok::comment));
  70. return CurrentToken;
  71. }
  72. bool rangeContainsExpansionsOrDirectives(SourceRange Range,
  73. const SourceManager &SM,
  74. const LangOptions &LangOpts) {
  75. assert(Range.isValid() && "Invalid Range for relexing provided");
  76. SourceLocation Loc = Range.getBegin();
  77. while (Loc < Range.getEnd()) {
  78. if (Loc.isMacroID())
  79. return true;
  80. std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
  81. if (!Tok)
  82. return true;
  83. if (Tok->is(tok::hash))
  84. return true;
  85. Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
  86. }
  87. return false;
  88. }
  89. std::optional<Token> getQualifyingToken(tok::TokenKind TK,
  90. CharSourceRange Range,
  91. const ASTContext &Context,
  92. const SourceManager &SM) {
  93. assert((TK == tok::kw_const || TK == tok::kw_volatile ||
  94. TK == tok::kw_restrict) &&
  95. "TK is not a qualifier keyword");
  96. std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
  97. StringRef File = SM.getBufferData(LocInfo.first);
  98. Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
  99. File.begin(), File.data() + LocInfo.second, File.end());
  100. std::optional<Token> LastMatchBeforeTemplate;
  101. std::optional<Token> LastMatchAfterTemplate;
  102. bool SawTemplate = false;
  103. Token Tok;
  104. while (!RawLexer.LexFromRawLexer(Tok) &&
  105. Range.getEnd() != Tok.getLocation() &&
  106. !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
  107. if (Tok.is(tok::raw_identifier)) {
  108. IdentifierInfo &Info = Context.Idents.get(
  109. StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
  110. Tok.setIdentifierInfo(&Info);
  111. Tok.setKind(Info.getTokenID());
  112. }
  113. if (Tok.is(tok::less))
  114. SawTemplate = true;
  115. else if (Tok.isOneOf(tok::greater, tok::greatergreater))
  116. LastMatchAfterTemplate = std::nullopt;
  117. else if (Tok.is(TK)) {
  118. if (SawTemplate)
  119. LastMatchAfterTemplate = Tok;
  120. else
  121. LastMatchBeforeTemplate = Tok;
  122. }
  123. }
  124. return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
  125. : LastMatchBeforeTemplate;
  126. }
  127. static bool breakAndReturnEnd(const Stmt &S) {
  128. return isa<CompoundStmt, DeclStmt, NullStmt>(S);
  129. }
  130. static bool breakAndReturnEndPlus1Token(const Stmt &S) {
  131. return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
  132. }
  133. // Given a Stmt which does not include it's semicolon this method returns the
  134. // SourceLocation of the semicolon.
  135. static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
  136. const SourceManager &SM,
  137. const LangOptions &LangOpts) {
  138. if (EndLoc.isMacroID()) {
  139. // Assuming EndLoc points to a function call foo within macro F.
  140. // This method is supposed to return location of the semicolon within
  141. // those macro arguments:
  142. // F ( foo() ; )
  143. // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
  144. const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
  145. std::optional<Token> NextTok =
  146. findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
  147. // Was the next token found successfully?
  148. // All macro issues are simply resolved by ensuring it's a semicolon.
  149. if (NextTok && NextTok->is(tok::TokenKind::semi)) {
  150. // Ideally this would return `F` with spelling location `;` (NextTok)
  151. // following the example above. For now simply return NextTok location.
  152. return NextTok->getLocation();
  153. }
  154. // Fallthrough to 'normal handling'.
  155. // F ( foo() ) ;
  156. // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
  157. }
  158. std::optional<Token> NextTok =
  159. findNextTokenSkippingComments(EndLoc, SM, LangOpts);
  160. // Testing for semicolon again avoids some issues with macros.
  161. if (NextTok && NextTok->is(tok::TokenKind::semi))
  162. return NextTok->getLocation();
  163. return SourceLocation();
  164. }
  165. SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
  166. const LangOptions &LangOpts) {
  167. const Stmt *LastChild = &S;
  168. while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
  169. !breakAndReturnEndPlus1Token(*LastChild)) {
  170. for (const Stmt *Child : LastChild->children())
  171. LastChild = Child;
  172. }
  173. if (!breakAndReturnEnd(*LastChild) &&
  174. breakAndReturnEndPlus1Token(*LastChild))
  175. return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
  176. return S.getEndLoc();
  177. }
  178. } // namespace clang::tidy::utils::lexer