FormatToken.h 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234
  1. //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the declaration of the FormatToken, a wrapper
  11. /// around Token with additional information related to formatting.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
  15. #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
  16. #include "clang/Basic/IdentifierTable.h"
  17. #include "clang/Basic/OperatorPrecedence.h"
  18. #include "clang/Format/Format.h"
  19. #include "clang/Lex/Lexer.h"
  20. #include <memory>
  21. #include <unordered_set>
  22. namespace clang {
  23. namespace format {
  24. #define LIST_TOKEN_TYPES \
  25. TYPE(ArrayInitializerLSquare) \
  26. TYPE(ArraySubscriptLSquare) \
  27. TYPE(AttributeColon) \
  28. TYPE(AttributeMacro) \
  29. TYPE(AttributeParen) \
  30. TYPE(AttributeSquare) \
  31. TYPE(BinaryOperator) \
  32. TYPE(BitFieldColon) \
  33. TYPE(BlockComment) \
  34. TYPE(CastRParen) \
  35. TYPE(ConditionalExpr) \
  36. TYPE(ConflictAlternative) \
  37. TYPE(ConflictEnd) \
  38. TYPE(ConflictStart) \
  39. TYPE(ConstraintJunctions) \
  40. TYPE(CtorInitializerColon) \
  41. TYPE(CtorInitializerComma) \
  42. TYPE(DesignatedInitializerLSquare) \
  43. TYPE(DesignatedInitializerPeriod) \
  44. TYPE(DictLiteral) \
  45. TYPE(FatArrow) \
  46. TYPE(ForEachMacro) \
  47. TYPE(FunctionAnnotationRParen) \
  48. TYPE(FunctionDeclarationName) \
  49. TYPE(FunctionLBrace) \
  50. TYPE(FunctionLikeOrFreestandingMacro) \
  51. TYPE(FunctionTypeLParen) \
  52. TYPE(IfMacro) \
  53. TYPE(ImplicitStringLiteral) \
  54. TYPE(InheritanceColon) \
  55. TYPE(InheritanceComma) \
  56. TYPE(InlineASMBrace) \
  57. TYPE(InlineASMColon) \
  58. TYPE(InlineASMSymbolicNameLSquare) \
  59. TYPE(JavaAnnotation) \
  60. TYPE(JsComputedPropertyName) \
  61. TYPE(JsExponentiation) \
  62. TYPE(JsExponentiationEqual) \
  63. TYPE(JsPipePipeEqual) \
  64. TYPE(JsPrivateIdentifier) \
  65. TYPE(JsTypeColon) \
  66. TYPE(JsTypeOperator) \
  67. TYPE(JsTypeOptionalQuestion) \
  68. TYPE(JsAndAndEqual) \
  69. TYPE(LambdaArrow) \
  70. TYPE(LambdaLBrace) \
  71. TYPE(LambdaLSquare) \
  72. TYPE(LeadingJavaAnnotation) \
  73. TYPE(LineComment) \
  74. TYPE(MacroBlockBegin) \
  75. TYPE(MacroBlockEnd) \
  76. TYPE(ModulePartitionColon) \
  77. TYPE(NamespaceMacro) \
  78. TYPE(NonNullAssertion) \
  79. TYPE(NullCoalescingEqual) \
  80. TYPE(NullCoalescingOperator) \
  81. TYPE(NullPropagatingOperator) \
  82. TYPE(ObjCBlockLBrace) \
  83. TYPE(ObjCBlockLParen) \
  84. TYPE(ObjCDecl) \
  85. TYPE(ObjCForIn) \
  86. TYPE(ObjCMethodExpr) \
  87. TYPE(ObjCMethodSpecifier) \
  88. TYPE(ObjCProperty) \
  89. TYPE(ObjCStringLiteral) \
  90. TYPE(OverloadedOperator) \
  91. TYPE(OverloadedOperatorLParen) \
  92. TYPE(PointerOrReference) \
  93. TYPE(PureVirtualSpecifier) \
  94. TYPE(RangeBasedForLoopColon) \
  95. TYPE(RecordLBrace) \
  96. TYPE(RegexLiteral) \
  97. TYPE(SelectorName) \
  98. TYPE(StartOfName) \
  99. TYPE(StatementAttributeLikeMacro) \
  100. TYPE(StatementMacro) \
  101. TYPE(StructuredBindingLSquare) \
  102. TYPE(TemplateCloser) \
  103. TYPE(TemplateOpener) \
  104. TYPE(TemplateString) \
  105. TYPE(ProtoExtensionLSquare) \
  106. TYPE(TrailingAnnotation) \
  107. TYPE(TrailingReturnArrow) \
  108. TYPE(TrailingUnaryOperator) \
  109. TYPE(TypeDeclarationParen) \
  110. TYPE(TypenameMacro) \
  111. TYPE(UnaryOperator) \
  112. TYPE(UntouchableMacroFunc) \
  113. TYPE(CSharpStringLiteral) \
  114. TYPE(CSharpNamedArgumentColon) \
  115. TYPE(CSharpNullable) \
  116. TYPE(CSharpNullConditionalLSquare) \
  117. TYPE(CSharpGenericTypeConstraint) \
  118. TYPE(CSharpGenericTypeConstraintColon) \
  119. TYPE(CSharpGenericTypeConstraintComma) \
  120. TYPE(Unknown)
  121. /// Sorted operators that can follow a C variable.
  122. static const std::vector<clang::tok::TokenKind> COperatorsFollowingVar = [] {
  123. std::vector<clang::tok::TokenKind> ReturnVal = {
  124. tok::l_square, tok::r_square,
  125. tok::l_paren, tok::r_paren,
  126. tok::r_brace, tok::period,
  127. tok::ellipsis, tok::ampamp,
  128. tok::ampequal, tok::star,
  129. tok::starequal, tok::plus,
  130. tok::plusplus, tok::plusequal,
  131. tok::minus, tok::arrow,
  132. tok::minusminus, tok::minusequal,
  133. tok::exclaim, tok::exclaimequal,
  134. tok::slash, tok::slashequal,
  135. tok::percent, tok::percentequal,
  136. tok::less, tok::lessless,
  137. tok::lessequal, tok::lesslessequal,
  138. tok::greater, tok::greatergreater,
  139. tok::greaterequal, tok::greatergreaterequal,
  140. tok::caret, tok::caretequal,
  141. tok::pipe, tok::pipepipe,
  142. tok::pipeequal, tok::question,
  143. tok::semi, tok::equal,
  144. tok::equalequal, tok::comma};
  145. assert(std::is_sorted(ReturnVal.begin(), ReturnVal.end()));
  146. return ReturnVal;
  147. }();
  148. /// Determines the semantic type of a syntactic token, e.g. whether "<" is a
  149. /// template opener or binary operator.
  150. enum TokenType : uint8_t {
  151. #define TYPE(X) TT_##X,
  152. LIST_TOKEN_TYPES
  153. #undef TYPE
  154. NUM_TOKEN_TYPES
  155. };
  156. /// Determines the name of a token type.
  157. const char *getTokenTypeName(TokenType Type);
  158. // Represents what type of block a set of braces open.
  159. enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
  160. // The packing kind of a function's parameters.
  161. enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
  162. enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
  163. /// Roles a token can take in a configured macro expansion.
  164. enum MacroRole {
  165. /// The token was expanded from a macro argument when formatting the expanded
  166. /// token sequence.
  167. MR_ExpandedArg,
  168. /// The token is part of a macro argument that was previously formatted as
  169. /// expansion when formatting the unexpanded macro call.
  170. MR_UnexpandedArg,
  171. /// The token was expanded from a macro definition, and is not visible as part
  172. /// of the macro call.
  173. MR_Hidden,
  174. };
  175. struct FormatToken;
  176. /// Contains information on the token's role in a macro expansion.
  177. ///
  178. /// Given the following definitions:
  179. /// A(X) = [ X ]
  180. /// B(X) = < X >
  181. /// C(X) = X
  182. ///
  183. /// Consider the macro call:
  184. /// A({B(C(C(x)))}) -> [{<x>}]
  185. ///
  186. /// In this case, the tokens of the unexpanded macro call will have the
  187. /// following relevant entries in their macro context (note that formatting
  188. /// the unexpanded macro call happens *after* formatting the expanded macro
  189. /// call):
  190. /// A( { B( C( C(x) ) ) } )
  191. /// Role: NN U NN NN NNUN N N U N (N=None, U=UnexpandedArg)
  192. ///
  193. /// [ { < x > } ]
  194. /// Role: H E H E H E H (H=Hidden, E=ExpandedArg)
  195. /// ExpandedFrom[0]: A A A A A A A
  196. /// ExpandedFrom[1]: B B B
  197. /// ExpandedFrom[2]: C
  198. /// ExpandedFrom[3]: C
  199. /// StartOfExpansion: 1 0 1 2 0 0 0
  200. /// EndOfExpansion: 0 0 0 2 1 0 1
  201. struct MacroExpansion {
  202. MacroExpansion(MacroRole Role) : Role(Role) {}
  203. /// The token's role in the macro expansion.
  204. /// When formatting an expanded macro, all tokens that are part of macro
  205. /// arguments will be MR_ExpandedArg, while all tokens that are not visible in
  206. /// the macro call will be MR_Hidden.
  207. /// When formatting an unexpanded macro call, all tokens that are part of
  208. /// macro arguments will be MR_UnexpandedArg.
  209. MacroRole Role;
  210. /// The stack of macro call identifier tokens this token was expanded from.
  211. llvm::SmallVector<FormatToken *, 1> ExpandedFrom;
  212. /// The number of expansions of which this macro is the first entry.
  213. unsigned StartOfExpansion = 0;
  214. /// The number of currently open expansions in \c ExpandedFrom this macro is
  215. /// the last token in.
  216. unsigned EndOfExpansion = 0;
  217. };
  218. class TokenRole;
  219. class AnnotatedLine;
  220. /// A wrapper around a \c Token storing information about the
  221. /// whitespace characters preceding it.
  222. struct FormatToken {
  223. FormatToken()
  224. : HasUnescapedNewline(false), IsMultiline(false), IsFirst(false),
  225. MustBreakBefore(false), IsUnterminatedLiteral(false),
  226. CanBreakBefore(false), ClosesTemplateDeclaration(false),
  227. StartsBinaryExpression(false), EndsBinaryExpression(false),
  228. PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false),
  229. Finalized(false), BlockKind(BK_Unknown), Decision(FD_Unformatted),
  230. PackingKind(PPK_Inconclusive), Type(TT_Unknown) {}
  231. /// The \c Token.
  232. Token Tok;
  233. /// The raw text of the token.
  234. ///
  235. /// Contains the raw token text without leading whitespace and without leading
  236. /// escaped newlines.
  237. StringRef TokenText;
  238. /// A token can have a special role that can carry extra information
  239. /// about the token's formatting.
  240. /// FIXME: Make FormatToken for parsing and AnnotatedToken two different
  241. /// classes and make this a unique_ptr in the AnnotatedToken class.
  242. std::shared_ptr<TokenRole> Role;
  243. /// The range of the whitespace immediately preceding the \c Token.
  244. SourceRange WhitespaceRange;
  245. /// Whether there is at least one unescaped newline before the \c
  246. /// Token.
  247. unsigned HasUnescapedNewline : 1;
  248. /// Whether the token text contains newlines (escaped or not).
  249. unsigned IsMultiline : 1;
  250. /// Indicates that this is the first token of the file.
  251. unsigned IsFirst : 1;
  252. /// Whether there must be a line break before this token.
  253. ///
  254. /// This happens for example when a preprocessor directive ended directly
  255. /// before the token.
  256. unsigned MustBreakBefore : 1;
  257. /// Set to \c true if this token is an unterminated literal.
  258. unsigned IsUnterminatedLiteral : 1;
  259. /// \c true if it is allowed to break before this token.
  260. unsigned CanBreakBefore : 1;
  261. /// \c true if this is the ">" of "template<..>".
  262. unsigned ClosesTemplateDeclaration : 1;
  263. /// \c true if this token starts a binary expression, i.e. has at least
  264. /// one fake l_paren with a precedence greater than prec::Unknown.
  265. unsigned StartsBinaryExpression : 1;
  266. /// \c true if this token ends a binary expression.
  267. unsigned EndsBinaryExpression : 1;
  268. /// Is this token part of a \c DeclStmt defining multiple variables?
  269. ///
  270. /// Only set if \c Type == \c TT_StartOfName.
  271. unsigned PartOfMultiVariableDeclStmt : 1;
  272. /// Does this line comment continue a line comment section?
  273. ///
  274. /// Only set to true if \c Type == \c TT_LineComment.
  275. unsigned ContinuesLineCommentSection : 1;
  276. /// If \c true, this token has been fully formatted (indented and
  277. /// potentially re-formatted inside), and we do not allow further formatting
  278. /// changes.
  279. unsigned Finalized : 1;
  280. private:
  281. /// Contains the kind of block if this token is a brace.
  282. unsigned BlockKind : 2;
  283. public:
  284. BraceBlockKind getBlockKind() const {
  285. return static_cast<BraceBlockKind>(BlockKind);
  286. }
  287. void setBlockKind(BraceBlockKind BBK) {
  288. BlockKind = BBK;
  289. assert(getBlockKind() == BBK && "BraceBlockKind overflow!");
  290. }
  291. private:
  292. /// Stores the formatting decision for the token once it was made.
  293. unsigned Decision : 2;
  294. public:
  295. FormatDecision getDecision() const {
  296. return static_cast<FormatDecision>(Decision);
  297. }
  298. void setDecision(FormatDecision D) {
  299. Decision = D;
  300. assert(getDecision() == D && "FormatDecision overflow!");
  301. }
  302. private:
  303. /// If this is an opening parenthesis, how are the parameters packed?
  304. unsigned PackingKind : 2;
  305. public:
  306. ParameterPackingKind getPackingKind() const {
  307. return static_cast<ParameterPackingKind>(PackingKind);
  308. }
  309. void setPackingKind(ParameterPackingKind K) {
  310. PackingKind = K;
  311. assert(getPackingKind() == K && "ParameterPackingKind overflow!");
  312. }
  313. private:
  314. TokenType Type;
  315. public:
  316. /// Returns the token's type, e.g. whether "<" is a template opener or
  317. /// binary operator.
  318. TokenType getType() const { return Type; }
  319. void setType(TokenType T) { Type = T; }
  320. /// The number of newlines immediately before the \c Token.
  321. ///
  322. /// This can be used to determine what the user wrote in the original code
  323. /// and thereby e.g. leave an empty line between two function definitions.
  324. unsigned NewlinesBefore = 0;
  325. /// The offset just past the last '\n' in this token's leading
  326. /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
  327. unsigned LastNewlineOffset = 0;
  328. /// The width of the non-whitespace parts of the token (or its first
  329. /// line for multi-line tokens) in columns.
  330. /// We need this to correctly measure number of columns a token spans.
  331. unsigned ColumnWidth = 0;
  332. /// Contains the width in columns of the last line of a multi-line
  333. /// token.
  334. unsigned LastLineColumnWidth = 0;
  335. /// The number of spaces that should be inserted before this token.
  336. unsigned SpacesRequiredBefore = 0;
  337. /// Number of parameters, if this is "(", "[" or "<".
  338. unsigned ParameterCount = 0;
  339. /// Number of parameters that are nested blocks,
  340. /// if this is "(", "[" or "<".
  341. unsigned BlockParameterCount = 0;
  342. /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
  343. /// the surrounding bracket.
  344. tok::TokenKind ParentBracket = tok::unknown;
  345. /// The total length of the unwrapped line up to and including this
  346. /// token.
  347. unsigned TotalLength = 0;
  348. /// The original 0-based column of this token, including expanded tabs.
  349. /// The configured TabWidth is used as tab width.
  350. unsigned OriginalColumn = 0;
  351. /// The length of following tokens until the next natural split point,
  352. /// or the next token that can be broken.
  353. unsigned UnbreakableTailLength = 0;
  354. // FIXME: Come up with a 'cleaner' concept.
  355. /// The binding strength of a token. This is a combined value of
  356. /// operator precedence, parenthesis nesting, etc.
  357. unsigned BindingStrength = 0;
  358. /// The nesting level of this token, i.e. the number of surrounding (),
  359. /// [], {} or <>.
  360. unsigned NestingLevel = 0;
  361. /// The indent level of this token. Copied from the surrounding line.
  362. unsigned IndentLevel = 0;
  363. /// Penalty for inserting a line break before this token.
  364. unsigned SplitPenalty = 0;
  365. /// If this is the first ObjC selector name in an ObjC method
  366. /// definition or call, this contains the length of the longest name.
  367. ///
  368. /// This being set to 0 means that the selectors should not be colon-aligned,
  369. /// e.g. because several of them are block-type.
  370. unsigned LongestObjCSelectorName = 0;
  371. /// If this is the first ObjC selector name in an ObjC method
  372. /// definition or call, this contains the number of parts that the whole
  373. /// selector consist of.
  374. unsigned ObjCSelectorNameParts = 0;
  375. /// The 0-based index of the parameter/argument. For ObjC it is set
  376. /// for the selector name token.
  377. /// For now calculated only for ObjC.
  378. unsigned ParameterIndex = 0;
  379. /// Stores the number of required fake parentheses and the
  380. /// corresponding operator precedence.
  381. ///
  382. /// If multiple fake parentheses start at a token, this vector stores them in
  383. /// reverse order, i.e. inner fake parenthesis first.
  384. SmallVector<prec::Level, 4> FakeLParens;
  385. /// Insert this many fake ) after this token for correct indentation.
  386. unsigned FakeRParens = 0;
  387. /// If this is an operator (or "."/"->") in a sequence of operators
  388. /// with the same precedence, contains the 0-based operator index.
  389. unsigned OperatorIndex = 0;
  390. /// If this is an operator (or "."/"->") in a sequence of operators
  391. /// with the same precedence, points to the next operator.
  392. FormatToken *NextOperator = nullptr;
  393. /// If this is a bracket, this points to the matching one.
  394. FormatToken *MatchingParen = nullptr;
  395. /// The previous token in the unwrapped line.
  396. FormatToken *Previous = nullptr;
  397. /// The next token in the unwrapped line.
  398. FormatToken *Next = nullptr;
  399. /// The first token in set of column elements.
  400. bool StartsColumn = false;
  401. /// This notes the start of the line of an array initializer.
  402. bool ArrayInitializerLineStart = false;
  403. /// This starts an array initializer.
  404. bool IsArrayInitializer = false;
  405. /// Is optional and can be removed.
  406. bool Optional = false;
  407. /// If this token starts a block, this contains all the unwrapped lines
  408. /// in it.
  409. SmallVector<AnnotatedLine *, 1> Children;
  410. // Contains all attributes related to how this token takes part
  411. // in a configured macro expansion.
  412. llvm::Optional<MacroExpansion> MacroCtx;
  413. bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
  414. bool is(TokenType TT) const { return getType() == TT; }
  415. bool is(const IdentifierInfo *II) const {
  416. return II && II == Tok.getIdentifierInfo();
  417. }
  418. bool is(tok::PPKeywordKind Kind) const {
  419. return Tok.getIdentifierInfo() &&
  420. Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
  421. }
  422. bool is(BraceBlockKind BBK) const { return getBlockKind() == BBK; }
  423. bool is(ParameterPackingKind PPK) const { return getPackingKind() == PPK; }
  424. template <typename A, typename B> bool isOneOf(A K1, B K2) const {
  425. return is(K1) || is(K2);
  426. }
  427. template <typename A, typename B, typename... Ts>
  428. bool isOneOf(A K1, B K2, Ts... Ks) const {
  429. return is(K1) || isOneOf(K2, Ks...);
  430. }
  431. template <typename T> bool isNot(T Kind) const { return !is(Kind); }
  432. bool isIf(bool AllowConstexprMacro = true) const {
  433. return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
  434. (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
  435. }
  436. bool closesScopeAfterBlock() const {
  437. if (getBlockKind() == BK_Block)
  438. return true;
  439. if (closesScope())
  440. return Previous->closesScopeAfterBlock();
  441. return false;
  442. }
  443. /// \c true if this token starts a sequence with the given tokens in order,
  444. /// following the ``Next`` pointers, ignoring comments.
  445. template <typename A, typename... Ts>
  446. bool startsSequence(A K1, Ts... Tokens) const {
  447. return startsSequenceInternal(K1, Tokens...);
  448. }
  449. /// \c true if this token ends a sequence with the given tokens in order,
  450. /// following the ``Previous`` pointers, ignoring comments.
  451. /// For example, given tokens [T1, T2, T3], the function returns true if
  452. /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other
  453. /// words, the tokens passed to this function need to the reverse of the
  454. /// order the tokens appear in code.
  455. template <typename A, typename... Ts>
  456. bool endsSequence(A K1, Ts... Tokens) const {
  457. return endsSequenceInternal(K1, Tokens...);
  458. }
  459. bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
  460. bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
  461. return Tok.isObjCAtKeyword(Kind);
  462. }
  463. bool isAccessSpecifier(bool ColonRequired = true) const {
  464. return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
  465. (!ColonRequired || (Next && Next->is(tok::colon)));
  466. }
  467. bool canBePointerOrReferenceQualifier() const {
  468. return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile,
  469. tok::kw___attribute, tok::kw__Nonnull, tok::kw__Nullable,
  470. tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64,
  471. TT_AttributeMacro);
  472. }
  473. /// Determine whether the token is a simple-type-specifier.
  474. LLVM_NODISCARD bool isSimpleTypeSpecifier() const;
  475. LLVM_NODISCARD bool isTypeOrIdentifier() const;
  476. bool isObjCAccessSpecifier() const {
  477. return is(tok::at) && Next &&
  478. (Next->isObjCAtKeyword(tok::objc_public) ||
  479. Next->isObjCAtKeyword(tok::objc_protected) ||
  480. Next->isObjCAtKeyword(tok::objc_package) ||
  481. Next->isObjCAtKeyword(tok::objc_private));
  482. }
  483. /// Returns whether \p Tok is ([{ or an opening < of a template or in
  484. /// protos.
  485. bool opensScope() const {
  486. if (is(TT_TemplateString) && TokenText.endswith("${"))
  487. return true;
  488. if (is(TT_DictLiteral) && is(tok::less))
  489. return true;
  490. return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
  491. TT_TemplateOpener);
  492. }
  493. /// Returns whether \p Tok is )]} or a closing > of a template or in
  494. /// protos.
  495. bool closesScope() const {
  496. if (is(TT_TemplateString) && TokenText.startswith("}"))
  497. return true;
  498. if (is(TT_DictLiteral) && is(tok::greater))
  499. return true;
  500. return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
  501. TT_TemplateCloser);
  502. }
  503. /// Returns \c true if this is a "." or "->" accessing a member.
  504. bool isMemberAccess() const {
  505. return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
  506. !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
  507. TT_LambdaArrow, TT_LeadingJavaAnnotation);
  508. }
  509. bool isUnaryOperator() const {
  510. switch (Tok.getKind()) {
  511. case tok::plus:
  512. case tok::plusplus:
  513. case tok::minus:
  514. case tok::minusminus:
  515. case tok::exclaim:
  516. case tok::tilde:
  517. case tok::kw_sizeof:
  518. case tok::kw_alignof:
  519. return true;
  520. default:
  521. return false;
  522. }
  523. }
  524. bool isBinaryOperator() const {
  525. // Comma is a binary operator, but does not behave as such wrt. formatting.
  526. return getPrecedence() > prec::Comma;
  527. }
  528. bool isTrailingComment() const {
  529. return is(tok::comment) &&
  530. (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
  531. }
  532. /// Returns \c true if this is a keyword that can be used
  533. /// like a function call (e.g. sizeof, typeid, ...).
  534. bool isFunctionLikeKeyword() const {
  535. switch (Tok.getKind()) {
  536. case tok::kw_throw:
  537. case tok::kw_typeid:
  538. case tok::kw_return:
  539. case tok::kw_sizeof:
  540. case tok::kw_alignof:
  541. case tok::kw_alignas:
  542. case tok::kw_decltype:
  543. case tok::kw_noexcept:
  544. case tok::kw_static_assert:
  545. case tok::kw__Atomic:
  546. case tok::kw___attribute:
  547. case tok::kw___underlying_type:
  548. case tok::kw_requires:
  549. return true;
  550. default:
  551. return false;
  552. }
  553. }
  554. /// Returns \c true if this is a string literal that's like a label,
  555. /// e.g. ends with "=" or ":".
  556. bool isLabelString() const {
  557. if (!is(tok::string_literal))
  558. return false;
  559. StringRef Content = TokenText;
  560. if (Content.startswith("\"") || Content.startswith("'"))
  561. Content = Content.drop_front(1);
  562. if (Content.endswith("\"") || Content.endswith("'"))
  563. Content = Content.drop_back(1);
  564. Content = Content.trim();
  565. return Content.size() > 1 &&
  566. (Content.back() == ':' || Content.back() == '=');
  567. }
  568. /// Returns actual token start location without leading escaped
  569. /// newlines and whitespace.
  570. ///
  571. /// This can be different to Tok.getLocation(), which includes leading escaped
  572. /// newlines.
  573. SourceLocation getStartOfNonWhitespace() const {
  574. return WhitespaceRange.getEnd();
  575. }
  576. /// Returns \c true if the range of whitespace immediately preceding the \c
  577. /// Token is not empty.
  578. bool hasWhitespaceBefore() const {
  579. return WhitespaceRange.getBegin() != WhitespaceRange.getEnd();
  580. }
  581. prec::Level getPrecedence() const {
  582. return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
  583. /*CPlusPlus11=*/true);
  584. }
  585. /// Returns the previous token ignoring comments.
  586. FormatToken *getPreviousNonComment() const {
  587. FormatToken *Tok = Previous;
  588. while (Tok && Tok->is(tok::comment))
  589. Tok = Tok->Previous;
  590. return Tok;
  591. }
  592. /// Returns the next token ignoring comments.
  593. const FormatToken *getNextNonComment() const {
  594. const FormatToken *Tok = Next;
  595. while (Tok && Tok->is(tok::comment))
  596. Tok = Tok->Next;
  597. return Tok;
  598. }
  599. /// Returns \c true if this tokens starts a block-type list, i.e. a
  600. /// list that should be indented with a block indent.
  601. bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
  602. // C# Does not indent object initialisers as continuations.
  603. if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
  604. return true;
  605. if (is(TT_TemplateString) && opensScope())
  606. return true;
  607. return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
  608. (is(tok::l_brace) &&
  609. (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
  610. (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
  611. (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
  612. Style.Language == FormatStyle::LK_TextProto));
  613. }
  614. /// Returns whether the token is the left square bracket of a C++
  615. /// structured binding declaration.
  616. bool isCppStructuredBinding(const FormatStyle &Style) const {
  617. if (!Style.isCpp() || isNot(tok::l_square))
  618. return false;
  619. const FormatToken *T = this;
  620. do {
  621. T = T->getPreviousNonComment();
  622. } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
  623. tok::ampamp));
  624. return T && T->is(tok::kw_auto);
  625. }
  626. /// Same as opensBlockOrBlockTypeList, but for the closing token.
  627. bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
  628. if (is(TT_TemplateString) && closesScope())
  629. return true;
  630. return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
  631. }
  632. /// Return the actual namespace token, if this token starts a namespace
  633. /// block.
  634. const FormatToken *getNamespaceToken() const {
  635. const FormatToken *NamespaceTok = this;
  636. if (is(tok::comment))
  637. NamespaceTok = NamespaceTok->getNextNonComment();
  638. // Detect "(inline|export)? namespace" in the beginning of a line.
  639. if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export))
  640. NamespaceTok = NamespaceTok->getNextNonComment();
  641. return NamespaceTok &&
  642. NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro)
  643. ? NamespaceTok
  644. : nullptr;
  645. }
  646. void copyFrom(const FormatToken &Tok) { *this = Tok; }
  647. private:
  648. // Only allow copying via the explicit copyFrom method.
  649. FormatToken(const FormatToken &) = delete;
  650. FormatToken &operator=(const FormatToken &) = default;
  651. template <typename A, typename... Ts>
  652. bool startsSequenceInternal(A K1, Ts... Tokens) const {
  653. if (is(tok::comment) && Next)
  654. return Next->startsSequenceInternal(K1, Tokens...);
  655. return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
  656. }
  657. template <typename A> bool startsSequenceInternal(A K1) const {
  658. if (is(tok::comment) && Next)
  659. return Next->startsSequenceInternal(K1);
  660. return is(K1);
  661. }
  662. template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
  663. if (is(tok::comment) && Previous)
  664. return Previous->endsSequenceInternal(K1);
  665. return is(K1);
  666. }
  667. template <typename A, typename... Ts>
  668. bool endsSequenceInternal(A K1, Ts... Tokens) const {
  669. if (is(tok::comment) && Previous)
  670. return Previous->endsSequenceInternal(K1, Tokens...);
  671. return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
  672. }
  673. };
  674. class ContinuationIndenter;
  675. struct LineState;
  676. class TokenRole {
  677. public:
  678. TokenRole(const FormatStyle &Style) : Style(Style) {}
  679. virtual ~TokenRole();
  680. /// After the \c TokenAnnotator has finished annotating all the tokens,
  681. /// this function precomputes required information for formatting.
  682. virtual void precomputeFormattingInfos(const FormatToken *Token);
  683. /// Apply the special formatting that the given role demands.
  684. ///
  685. /// Assumes that the token having this role is already formatted.
  686. ///
  687. /// Continues formatting from \p State leaving indentation to \p Indenter and
  688. /// returns the total penalty that this formatting incurs.
  689. virtual unsigned formatFromToken(LineState &State,
  690. ContinuationIndenter *Indenter,
  691. bool DryRun) {
  692. return 0;
  693. }
  694. /// Same as \c formatFromToken, but assumes that the first token has
  695. /// already been set thereby deciding on the first line break.
  696. virtual unsigned formatAfterToken(LineState &State,
  697. ContinuationIndenter *Indenter,
  698. bool DryRun) {
  699. return 0;
  700. }
  701. /// Notifies the \c Role that a comma was found.
  702. virtual void CommaFound(const FormatToken *Token) {}
  703. virtual const FormatToken *lastComma() { return nullptr; }
  704. protected:
  705. const FormatStyle &Style;
  706. };
  707. class CommaSeparatedList : public TokenRole {
  708. public:
  709. CommaSeparatedList(const FormatStyle &Style)
  710. : TokenRole(Style), HasNestedBracedList(false) {}
  711. void precomputeFormattingInfos(const FormatToken *Token) override;
  712. unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
  713. bool DryRun) override;
  714. unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
  715. bool DryRun) override;
  716. /// Adds \p Token as the next comma to the \c CommaSeparated list.
  717. void CommaFound(const FormatToken *Token) override {
  718. Commas.push_back(Token);
  719. }
  720. const FormatToken *lastComma() override {
  721. if (Commas.empty())
  722. return nullptr;
  723. return Commas.back();
  724. }
  725. private:
  726. /// A struct that holds information on how to format a given list with
  727. /// a specific number of columns.
  728. struct ColumnFormat {
  729. /// The number of columns to use.
  730. unsigned Columns;
  731. /// The total width in characters.
  732. unsigned TotalWidth;
  733. /// The number of lines required for this format.
  734. unsigned LineCount;
  735. /// The size of each column in characters.
  736. SmallVector<unsigned, 8> ColumnSizes;
  737. };
  738. /// Calculate which \c ColumnFormat fits best into
  739. /// \p RemainingCharacters.
  740. const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
  741. /// The ordered \c FormatTokens making up the commas of this list.
  742. SmallVector<const FormatToken *, 8> Commas;
  743. /// The length of each of the list's items in characters including the
  744. /// trailing comma.
  745. SmallVector<unsigned, 8> ItemLengths;
  746. /// Precomputed formats that can be used for this list.
  747. SmallVector<ColumnFormat, 4> Formats;
  748. bool HasNestedBracedList;
  749. };
  750. /// Encapsulates keywords that are context sensitive or for languages not
  751. /// properly supported by Clang's lexer.
  752. struct AdditionalKeywords {
  753. AdditionalKeywords(IdentifierTable &IdentTable) {
  754. kw_final = &IdentTable.get("final");
  755. kw_override = &IdentTable.get("override");
  756. kw_in = &IdentTable.get("in");
  757. kw_of = &IdentTable.get("of");
  758. kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM");
  759. kw_CF_ENUM = &IdentTable.get("CF_ENUM");
  760. kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
  761. kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM");
  762. kw_NS_ENUM = &IdentTable.get("NS_ENUM");
  763. kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
  764. kw_as = &IdentTable.get("as");
  765. kw_async = &IdentTable.get("async");
  766. kw_await = &IdentTable.get("await");
  767. kw_declare = &IdentTable.get("declare");
  768. kw_finally = &IdentTable.get("finally");
  769. kw_from = &IdentTable.get("from");
  770. kw_function = &IdentTable.get("function");
  771. kw_get = &IdentTable.get("get");
  772. kw_import = &IdentTable.get("import");
  773. kw_infer = &IdentTable.get("infer");
  774. kw_is = &IdentTable.get("is");
  775. kw_let = &IdentTable.get("let");
  776. kw_module = &IdentTable.get("module");
  777. kw_readonly = &IdentTable.get("readonly");
  778. kw_set = &IdentTable.get("set");
  779. kw_type = &IdentTable.get("type");
  780. kw_typeof = &IdentTable.get("typeof");
  781. kw_var = &IdentTable.get("var");
  782. kw_yield = &IdentTable.get("yield");
  783. kw_abstract = &IdentTable.get("abstract");
  784. kw_assert = &IdentTable.get("assert");
  785. kw_extends = &IdentTable.get("extends");
  786. kw_implements = &IdentTable.get("implements");
  787. kw_instanceof = &IdentTable.get("instanceof");
  788. kw_interface = &IdentTable.get("interface");
  789. kw_native = &IdentTable.get("native");
  790. kw_package = &IdentTable.get("package");
  791. kw_synchronized = &IdentTable.get("synchronized");
  792. kw_throws = &IdentTable.get("throws");
  793. kw___except = &IdentTable.get("__except");
  794. kw___has_include = &IdentTable.get("__has_include");
  795. kw___has_include_next = &IdentTable.get("__has_include_next");
  796. kw_mark = &IdentTable.get("mark");
  797. kw_extend = &IdentTable.get("extend");
  798. kw_option = &IdentTable.get("option");
  799. kw_optional = &IdentTable.get("optional");
  800. kw_repeated = &IdentTable.get("repeated");
  801. kw_required = &IdentTable.get("required");
  802. kw_returns = &IdentTable.get("returns");
  803. kw_signals = &IdentTable.get("signals");
  804. kw_qsignals = &IdentTable.get("Q_SIGNALS");
  805. kw_slots = &IdentTable.get("slots");
  806. kw_qslots = &IdentTable.get("Q_SLOTS");
  807. // C# keywords
  808. kw_dollar = &IdentTable.get("dollar");
  809. kw_base = &IdentTable.get("base");
  810. kw_byte = &IdentTable.get("byte");
  811. kw_checked = &IdentTable.get("checked");
  812. kw_decimal = &IdentTable.get("decimal");
  813. kw_delegate = &IdentTable.get("delegate");
  814. kw_event = &IdentTable.get("event");
  815. kw_fixed = &IdentTable.get("fixed");
  816. kw_foreach = &IdentTable.get("foreach");
  817. kw_implicit = &IdentTable.get("implicit");
  818. kw_internal = &IdentTable.get("internal");
  819. kw_lock = &IdentTable.get("lock");
  820. kw_null = &IdentTable.get("null");
  821. kw_object = &IdentTable.get("object");
  822. kw_out = &IdentTable.get("out");
  823. kw_params = &IdentTable.get("params");
  824. kw_ref = &IdentTable.get("ref");
  825. kw_string = &IdentTable.get("string");
  826. kw_stackalloc = &IdentTable.get("stackalloc");
  827. kw_sbyte = &IdentTable.get("sbyte");
  828. kw_sealed = &IdentTable.get("sealed");
  829. kw_uint = &IdentTable.get("uint");
  830. kw_ulong = &IdentTable.get("ulong");
  831. kw_unchecked = &IdentTable.get("unchecked");
  832. kw_unsafe = &IdentTable.get("unsafe");
  833. kw_ushort = &IdentTable.get("ushort");
  834. kw_when = &IdentTable.get("when");
  835. kw_where = &IdentTable.get("where");
  836. // Keep this at the end of the constructor to make sure everything here
  837. // is
  838. // already initialized.
  839. JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
  840. {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
  841. kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_override,
  842. kw_readonly, kw_set, kw_type, kw_typeof, kw_var, kw_yield,
  843. // Keywords from the Java section.
  844. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
  845. CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>(
  846. {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event,
  847. kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal,
  848. kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params,
  849. kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed,
  850. kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, kw_when,
  851. kw_where,
  852. // Keywords from the JavaScript section.
  853. kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
  854. kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
  855. kw_set, kw_type, kw_typeof, kw_var, kw_yield,
  856. // Keywords from the Java section.
  857. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
  858. }
  859. // Context sensitive keywords.
  860. IdentifierInfo *kw_final;
  861. IdentifierInfo *kw_override;
  862. IdentifierInfo *kw_in;
  863. IdentifierInfo *kw_of;
  864. IdentifierInfo *kw_CF_CLOSED_ENUM;
  865. IdentifierInfo *kw_CF_ENUM;
  866. IdentifierInfo *kw_CF_OPTIONS;
  867. IdentifierInfo *kw_NS_CLOSED_ENUM;
  868. IdentifierInfo *kw_NS_ENUM;
  869. IdentifierInfo *kw_NS_OPTIONS;
  870. IdentifierInfo *kw___except;
  871. IdentifierInfo *kw___has_include;
  872. IdentifierInfo *kw___has_include_next;
  873. // JavaScript keywords.
  874. IdentifierInfo *kw_as;
  875. IdentifierInfo *kw_async;
  876. IdentifierInfo *kw_await;
  877. IdentifierInfo *kw_declare;
  878. IdentifierInfo *kw_finally;
  879. IdentifierInfo *kw_from;
  880. IdentifierInfo *kw_function;
  881. IdentifierInfo *kw_get;
  882. IdentifierInfo *kw_import;
  883. IdentifierInfo *kw_infer;
  884. IdentifierInfo *kw_is;
  885. IdentifierInfo *kw_let;
  886. IdentifierInfo *kw_module;
  887. IdentifierInfo *kw_readonly;
  888. IdentifierInfo *kw_set;
  889. IdentifierInfo *kw_type;
  890. IdentifierInfo *kw_typeof;
  891. IdentifierInfo *kw_var;
  892. IdentifierInfo *kw_yield;
  893. // Java keywords.
  894. IdentifierInfo *kw_abstract;
  895. IdentifierInfo *kw_assert;
  896. IdentifierInfo *kw_extends;
  897. IdentifierInfo *kw_implements;
  898. IdentifierInfo *kw_instanceof;
  899. IdentifierInfo *kw_interface;
  900. IdentifierInfo *kw_native;
  901. IdentifierInfo *kw_package;
  902. IdentifierInfo *kw_synchronized;
  903. IdentifierInfo *kw_throws;
  904. // Pragma keywords.
  905. IdentifierInfo *kw_mark;
  906. // Proto keywords.
  907. IdentifierInfo *kw_extend;
  908. IdentifierInfo *kw_option;
  909. IdentifierInfo *kw_optional;
  910. IdentifierInfo *kw_repeated;
  911. IdentifierInfo *kw_required;
  912. IdentifierInfo *kw_returns;
  913. // QT keywords.
  914. IdentifierInfo *kw_signals;
  915. IdentifierInfo *kw_qsignals;
  916. IdentifierInfo *kw_slots;
  917. IdentifierInfo *kw_qslots;
  918. // C# keywords
  919. IdentifierInfo *kw_dollar;
  920. IdentifierInfo *kw_base;
  921. IdentifierInfo *kw_byte;
  922. IdentifierInfo *kw_checked;
  923. IdentifierInfo *kw_decimal;
  924. IdentifierInfo *kw_delegate;
  925. IdentifierInfo *kw_event;
  926. IdentifierInfo *kw_fixed;
  927. IdentifierInfo *kw_foreach;
  928. IdentifierInfo *kw_implicit;
  929. IdentifierInfo *kw_internal;
  930. IdentifierInfo *kw_lock;
  931. IdentifierInfo *kw_null;
  932. IdentifierInfo *kw_object;
  933. IdentifierInfo *kw_out;
  934. IdentifierInfo *kw_params;
  935. IdentifierInfo *kw_ref;
  936. IdentifierInfo *kw_string;
  937. IdentifierInfo *kw_stackalloc;
  938. IdentifierInfo *kw_sbyte;
  939. IdentifierInfo *kw_sealed;
  940. IdentifierInfo *kw_uint;
  941. IdentifierInfo *kw_ulong;
  942. IdentifierInfo *kw_unchecked;
  943. IdentifierInfo *kw_unsafe;
  944. IdentifierInfo *kw_ushort;
  945. IdentifierInfo *kw_when;
  946. IdentifierInfo *kw_where;
  947. /// Returns \c true if \p Tok is a true JavaScript identifier, returns
  948. /// \c false if it is a keyword or a pseudo keyword.
  949. /// If \c AcceptIdentifierName is true, returns true not only for keywords,
  950. // but also for IdentifierName tokens (aka pseudo-keywords), such as
  951. // ``yield``.
  952. bool IsJavaScriptIdentifier(const FormatToken &Tok,
  953. bool AcceptIdentifierName = true) const {
  954. // Based on the list of JavaScript & TypeScript keywords here:
  955. // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74
  956. switch (Tok.Tok.getKind()) {
  957. case tok::kw_break:
  958. case tok::kw_case:
  959. case tok::kw_catch:
  960. case tok::kw_class:
  961. case tok::kw_continue:
  962. case tok::kw_const:
  963. case tok::kw_default:
  964. case tok::kw_delete:
  965. case tok::kw_do:
  966. case tok::kw_else:
  967. case tok::kw_enum:
  968. case tok::kw_export:
  969. case tok::kw_false:
  970. case tok::kw_for:
  971. case tok::kw_if:
  972. case tok::kw_import:
  973. case tok::kw_module:
  974. case tok::kw_new:
  975. case tok::kw_private:
  976. case tok::kw_protected:
  977. case tok::kw_public:
  978. case tok::kw_return:
  979. case tok::kw_static:
  980. case tok::kw_switch:
  981. case tok::kw_this:
  982. case tok::kw_throw:
  983. case tok::kw_true:
  984. case tok::kw_try:
  985. case tok::kw_typeof:
  986. case tok::kw_void:
  987. case tok::kw_while:
  988. // These are JS keywords that are lexed by LLVM/clang as keywords.
  989. return false;
  990. case tok::identifier: {
  991. // For identifiers, make sure they are true identifiers, excluding the
  992. // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords).
  993. bool IsPseudoKeyword =
  994. JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) !=
  995. JsExtraKeywords.end();
  996. return AcceptIdentifierName || !IsPseudoKeyword;
  997. }
  998. default:
  999. // Other keywords are handled in the switch below, to avoid problems due
  1000. // to duplicate case labels when using the #include trick.
  1001. break;
  1002. }
  1003. switch (Tok.Tok.getKind()) {
  1004. // Handle C++ keywords not included above: these are all JS identifiers.
  1005. #define KEYWORD(X, Y) case tok::kw_##X:
  1006. #include "clang/Basic/TokenKinds.def"
  1007. // #undef KEYWORD is not needed -- it's #undef-ed at the end of
  1008. // TokenKinds.def
  1009. return true;
  1010. default:
  1011. // All other tokens (punctuation etc) are not JS identifiers.
  1012. return false;
  1013. }
  1014. }
  1015. /// Returns \c true if \p Tok is a C# keyword, returns
  1016. /// \c false if it is a anything else.
  1017. bool isCSharpKeyword(const FormatToken &Tok) const {
  1018. switch (Tok.Tok.getKind()) {
  1019. case tok::kw_bool:
  1020. case tok::kw_break:
  1021. case tok::kw_case:
  1022. case tok::kw_catch:
  1023. case tok::kw_char:
  1024. case tok::kw_class:
  1025. case tok::kw_const:
  1026. case tok::kw_continue:
  1027. case tok::kw_default:
  1028. case tok::kw_do:
  1029. case tok::kw_double:
  1030. case tok::kw_else:
  1031. case tok::kw_enum:
  1032. case tok::kw_explicit:
  1033. case tok::kw_extern:
  1034. case tok::kw_false:
  1035. case tok::kw_float:
  1036. case tok::kw_for:
  1037. case tok::kw_goto:
  1038. case tok::kw_if:
  1039. case tok::kw_int:
  1040. case tok::kw_long:
  1041. case tok::kw_namespace:
  1042. case tok::kw_new:
  1043. case tok::kw_operator:
  1044. case tok::kw_private:
  1045. case tok::kw_protected:
  1046. case tok::kw_public:
  1047. case tok::kw_return:
  1048. case tok::kw_short:
  1049. case tok::kw_sizeof:
  1050. case tok::kw_static:
  1051. case tok::kw_struct:
  1052. case tok::kw_switch:
  1053. case tok::kw_this:
  1054. case tok::kw_throw:
  1055. case tok::kw_true:
  1056. case tok::kw_try:
  1057. case tok::kw_typeof:
  1058. case tok::kw_using:
  1059. case tok::kw_virtual:
  1060. case tok::kw_void:
  1061. case tok::kw_volatile:
  1062. case tok::kw_while:
  1063. return true;
  1064. default:
  1065. return Tok.is(tok::identifier) &&
  1066. CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
  1067. CSharpExtraKeywords.end();
  1068. }
  1069. }
  1070. private:
  1071. /// The JavaScript keywords beyond the C++ keyword set.
  1072. std::unordered_set<IdentifierInfo *> JsExtraKeywords;
  1073. /// The C# keywords beyond the C++ keyword set
  1074. std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
  1075. };
  1076. } // namespace format
  1077. } // namespace clang
  1078. #endif