FormatTokenLexer.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains FormatTokenLexer, which tokenizes a source file
  11. /// into a token stream suitable for ClangFormat.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  15. #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  16. #include "Encoding.h"
  17. #include "FormatToken.h"
  18. #include "clang/Basic/SourceLocation.h"
  19. #include "clang/Basic/SourceManager.h"
  20. #include "clang/Format/Format.h"
  21. #include "llvm/ADT/MapVector.h"
  22. #include "llvm/ADT/StringSet.h"
  23. #include "llvm/Support/Regex.h"
  24. #include <stack>
  25. namespace clang {
  26. namespace format {
  27. enum LexerState {
  28. NORMAL,
  29. TEMPLATE_STRING,
  30. TOKEN_STASHED,
  31. };
  32. class FormatTokenLexer {
  33. public:
  34. FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
  35. const FormatStyle &Style, encoding::Encoding Encoding,
  36. llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
  37. IdentifierTable &IdentTable);
  38. ArrayRef<FormatToken *> lex();
  39. const AdditionalKeywords &getKeywords() { return Keywords; }
  40. private:
  41. void tryMergePreviousTokens();
  42. bool tryMergeLessLess();
  43. bool tryMergeNSStringLiteral();
  44. bool tryMergeJSPrivateIdentifier();
  45. bool tryMergeCSharpStringLiteral();
  46. bool tryMergeCSharpKeywordVariables();
  47. bool tryMergeNullishCoalescingEqual();
  48. bool tryTransformCSharpForEach();
  49. bool tryMergeForEach();
  50. bool tryTransformTryUsageForC();
  51. bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
  52. // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
  53. bool precedesOperand(FormatToken *Tok);
  54. bool canPrecedeRegexLiteral(FormatToken *Prev);
  55. // Tries to parse a JavaScript Regex literal starting at the current token,
  56. // if that begins with a slash and is in a location where JavaScript allows
  57. // regex literals. Changes the current token to a regex literal and updates
  58. // its text if successful.
  59. void tryParseJSRegexLiteral();
  60. // Handles JavaScript template strings.
  61. //
  62. // JavaScript template strings use backticks ('`') as delimiters, and allow
  63. // embedding expressions nested in ${expr-here}. Template strings can be
  64. // nested recursively, i.e. expressions can contain template strings in turn.
  65. //
  66. // The code below parses starting from a backtick, up to a closing backtick or
  67. // an opening ${. It also maintains a stack of lexing contexts to handle
  68. // nested template parts by balancing curly braces.
  69. void handleTemplateStrings();
  70. void handleCSharpVerbatimAndInterpolatedStrings();
  71. void tryParsePythonComment();
  72. bool tryMerge_TMacro();
  73. bool tryMergeConflictMarkers();
  74. FormatToken *getStashedToken();
  75. FormatToken *getNextToken();
  76. FormatToken *FormatTok;
  77. bool IsFirstToken;
  78. std::stack<LexerState> StateStack;
  79. unsigned Column;
  80. unsigned TrailingWhitespace;
  81. std::unique_ptr<Lexer> Lex;
  82. const SourceManager &SourceMgr;
  83. FileID ID;
  84. const FormatStyle &Style;
  85. IdentifierTable &IdentTable;
  86. AdditionalKeywords Keywords;
  87. encoding::Encoding Encoding;
  88. llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
  89. // Index (in 'Tokens') of the last token that starts a new line.
  90. unsigned FirstInLineIndex;
  91. SmallVector<FormatToken *, 16> Tokens;
  92. llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
  93. bool FormattingDisabled;
  94. llvm::Regex MacroBlockBeginRegex;
  95. llvm::Regex MacroBlockEndRegex;
  96. // Targets that may appear inside a C# attribute.
  97. static const llvm::StringSet<> CSharpAttributeTargets;
  98. void readRawToken(FormatToken &Tok);
  99. void resetLexer(unsigned Offset);
  100. };
  101. } // namespace format
  102. } // namespace clang
  103. #endif