UnwrappedLineParser.cpp 113 KB


  1. //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the implementation of the UnwrappedLineParser,
  11. /// which turns a stream of tokens into UnwrappedLines.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "UnwrappedLineParser.h"
  15. #include "FormatToken.h"
  16. #include "TokenAnnotator.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/raw_ostream.h"
  20. #include <algorithm>
  21. #define DEBUG_TYPE "format-parser"
  22. namespace clang {
  23. namespace format {
  24. class FormatTokenSource {
  25. public:
  26. virtual ~FormatTokenSource() {}
  27. // Returns the next token in the token stream.
  28. virtual FormatToken *getNextToken() = 0;
  29. // Returns the token preceding the token returned by the last call to
  30. // getNextToken() in the token stream, or nullptr if no such token exists.
  31. virtual FormatToken *getPreviousToken() = 0;
  32. // Returns the token that would be returned by the next call to
  33. // getNextToken().
  34. virtual FormatToken *peekNextToken() = 0;
  35. // Returns whether we are at the end of the file.
  36. // This can be different from whether getNextToken() returned an eof token
  37. // when the FormatTokenSource is a view on a part of the token stream.
  38. virtual bool isEOF() = 0;
  39. // Gets the current position in the token stream, to be used by setPosition().
  40. virtual unsigned getPosition() = 0;
  41. // Resets the token stream to the state it was in when getPosition() returned
  42. // Position, and return the token at that position in the stream.
  43. virtual FormatToken *setPosition(unsigned Position) = 0;
  44. };
  45. namespace {
  46. class ScopedDeclarationState {
  47. public:
  48. ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
  49. bool MustBeDeclaration)
  50. : Line(Line), Stack(Stack) {
  51. Line.MustBeDeclaration = MustBeDeclaration;
  52. Stack.push_back(MustBeDeclaration);
  53. }
  54. ~ScopedDeclarationState() {
  55. Stack.pop_back();
  56. if (!Stack.empty())
  57. Line.MustBeDeclaration = Stack.back();
  58. else
  59. Line.MustBeDeclaration = true;
  60. }
  61. private:
  62. UnwrappedLine &Line;
  63. llvm::BitVector &Stack;
  64. };
  65. static bool isLineComment(const FormatToken &FormatTok) {
  66. return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  67. }
  68. // Checks if \p FormatTok is a line comment that continues the line comment
  69. // \p Previous. The original column of \p MinColumnToken is used to determine
  70. // whether \p FormatTok is indented enough to the right to continue \p Previous.
  71. static bool continuesLineComment(const FormatToken &FormatTok,
  72. const FormatToken *Previous,
  73. const FormatToken *MinColumnToken) {
  74. if (!Previous || !MinColumnToken)
  75. return false;
  76. unsigned MinContinueColumn =
  77. MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  78. return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  79. isLineComment(*Previous) &&
  80. FormatTok.OriginalColumn >= MinContinueColumn;
  81. }
  82. class ScopedMacroState : public FormatTokenSource {
  83. public:
  84. ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  85. FormatToken *&ResetToken)
  86. : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  87. PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  88. Token(nullptr), PreviousToken(nullptr) {
  89. FakeEOF.Tok.startToken();
  90. FakeEOF.Tok.setKind(tok::eof);
  91. TokenSource = this;
  92. Line.Level = 0;
  93. Line.InPPDirective = true;
  94. }
  95. ~ScopedMacroState() override {
  96. TokenSource = PreviousTokenSource;
  97. ResetToken = Token;
  98. Line.InPPDirective = false;
  99. Line.Level = PreviousLineLevel;
  100. }
  101. FormatToken *getNextToken() override {
  102. // The \c UnwrappedLineParser guards against this by never calling
  103. // \c getNextToken() after it has encountered the first eof token.
  104. assert(!eof());
  105. PreviousToken = Token;
  106. Token = PreviousTokenSource->getNextToken();
  107. if (eof())
  108. return &FakeEOF;
  109. return Token;
  110. }
  111. FormatToken *getPreviousToken() override {
  112. return PreviousTokenSource->getPreviousToken();
  113. }
  114. FormatToken *peekNextToken() override {
  115. if (eof())
  116. return &FakeEOF;
  117. return PreviousTokenSource->peekNextToken();
  118. }
  119. bool isEOF() override { return PreviousTokenSource->isEOF(); }
  120. unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  121. FormatToken *setPosition(unsigned Position) override {
  122. PreviousToken = nullptr;
  123. Token = PreviousTokenSource->setPosition(Position);
  124. return Token;
  125. }
  126. private:
  127. bool eof() {
  128. return Token && Token->HasUnescapedNewline &&
  129. !continuesLineComment(*Token, PreviousToken,
  130. /*MinColumnToken=*/PreviousToken);
  131. }
  132. FormatToken FakeEOF;
  133. UnwrappedLine &Line;
  134. FormatTokenSource *&TokenSource;
  135. FormatToken *&ResetToken;
  136. unsigned PreviousLineLevel;
  137. FormatTokenSource *PreviousTokenSource;
  138. FormatToken *Token;
  139. FormatToken *PreviousToken;
  140. };
  141. } // end anonymous namespace
  142. class ScopedLineState {
  143. public:
  144. ScopedLineState(UnwrappedLineParser &Parser,
  145. bool SwitchToPreprocessorLines = false)
  146. : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  147. if (SwitchToPreprocessorLines)
  148. Parser.CurrentLines = &Parser.PreprocessorDirectives;
  149. else if (!Parser.Line->Tokens.empty())
  150. Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
  151. PreBlockLine = std::move(Parser.Line);
  152. Parser.Line = std::make_unique<UnwrappedLine>();
  153. Parser.Line->Level = PreBlockLine->Level;
  154. Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
  155. }
  156. ~ScopedLineState() {
  157. if (!Parser.Line->Tokens.empty()) {
  158. Parser.addUnwrappedLine();
  159. }
  160. assert(Parser.Line->Tokens.empty());
  161. Parser.Line = std::move(PreBlockLine);
  162. if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
  163. Parser.MustBreakBeforeNextToken = true;
  164. Parser.CurrentLines = OriginalLines;
  165. }
  166. private:
  167. UnwrappedLineParser &Parser;
  168. std::unique_ptr<UnwrappedLine> PreBlockLine;
  169. SmallVectorImpl<UnwrappedLine> *OriginalLines;
  170. };
  171. class CompoundStatementIndenter {
  172. public:
  173. CompoundStatementIndenter(UnwrappedLineParser *Parser,
  174. const FormatStyle &Style, unsigned &LineLevel)
  175. : CompoundStatementIndenter(Parser, LineLevel,
  176. Style.BraceWrapping.AfterControlStatement,
  177. Style.BraceWrapping.IndentBraces) {}
  178. CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
  179. bool WrapBrace, bool IndentBrace)
  180. : LineLevel(LineLevel), OldLineLevel(LineLevel) {
  181. if (WrapBrace)
  182. Parser->addUnwrappedLine();
  183. if (IndentBrace)
  184. ++LineLevel;
  185. }
  186. ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
  187. private:
  188. unsigned &LineLevel;
  189. unsigned OldLineLevel;
  190. };
  191. namespace {
  192. class IndexedTokenSource : public FormatTokenSource {
  193. public:
  194. IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
  195. : Tokens(Tokens), Position(-1) {}
  196. FormatToken *getNextToken() override {
  197. if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
  198. LLVM_DEBUG({
  199. llvm::dbgs() << "Next ";
  200. dbgToken(Position);
  201. });
  202. return Tokens[Position];
  203. }
  204. ++Position;
  205. LLVM_DEBUG({
  206. llvm::dbgs() << "Next ";
  207. dbgToken(Position);
  208. });
  209. return Tokens[Position];
  210. }
  211. FormatToken *getPreviousToken() override {
  212. return Position > 0 ? Tokens[Position - 1] : nullptr;
  213. }
  214. FormatToken *peekNextToken() override {
  215. int Next = Position + 1;
  216. LLVM_DEBUG({
  217. llvm::dbgs() << "Peeking ";
  218. dbgToken(Next);
  219. });
  220. return Tokens[Next];
  221. }
  222. bool isEOF() override { return Tokens[Position]->is(tok::eof); }
  223. unsigned getPosition() override {
  224. LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
  225. assert(Position >= 0);
  226. return Position;
  227. }
  228. FormatToken *setPosition(unsigned P) override {
  229. LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
  230. Position = P;
  231. return Tokens[Position];
  232. }
  233. void reset() { Position = -1; }
  234. private:
  235. void dbgToken(int Position, llvm::StringRef Indent = "") {
  236. FormatToken *Tok = Tokens[Position];
  237. llvm::dbgs() << Indent << "[" << Position
  238. << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
  239. << ", Macro: " << !!Tok->MacroCtx << "\n";
  240. }
  241. ArrayRef<FormatToken *> Tokens;
  242. int Position;
  243. };
  244. } // end anonymous namespace
  245. UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
  246. const AdditionalKeywords &Keywords,
  247. unsigned FirstStartColumn,
  248. ArrayRef<FormatToken *> Tokens,
  249. UnwrappedLineConsumer &Callback)
  250. : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
  251. CurrentLines(&Lines), Style(Style), Keywords(Keywords),
  252. CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
  253. Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
  254. IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
  255. ? IG_Rejected
  256. : IG_Inited),
  257. IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
  258. void UnwrappedLineParser::reset() {
  259. PPBranchLevel = -1;
  260. IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
  261. ? IG_Rejected
  262. : IG_Inited;
  263. IncludeGuardToken = nullptr;
  264. Line.reset(new UnwrappedLine);
  265. CommentsBeforeNextToken.clear();
  266. FormatTok = nullptr;
  267. MustBreakBeforeNextToken = false;
  268. PreprocessorDirectives.clear();
  269. CurrentLines = &Lines;
  270. DeclarationScopeStack.clear();
  271. NestedTooDeep.clear();
  272. PPStack.clear();
  273. Line->FirstStartColumn = FirstStartColumn;
  274. }
  275. void UnwrappedLineParser::parse() {
  276. IndexedTokenSource TokenSource(AllTokens);
  277. Line->FirstStartColumn = FirstStartColumn;
  278. do {
  279. LLVM_DEBUG(llvm::dbgs() << "----\n");
  280. reset();
  281. Tokens = &TokenSource;
  282. TokenSource.reset();
  283. readToken();
  284. parseFile();
  285. // If we found an include guard then all preprocessor directives (other than
  286. // the guard) are over-indented by one.
  287. if (IncludeGuard == IG_Found)
  288. for (auto &Line : Lines)
  289. if (Line.InPPDirective && Line.Level > 0)
  290. --Line.Level;
  291. // Create line with eof token.
  292. pushToken(FormatTok);
  293. addUnwrappedLine();
  294. for (const UnwrappedLine &Line : Lines)
  295. Callback.consumeUnwrappedLine(Line);
  296. Callback.finishRun();
  297. Lines.clear();
  298. while (!PPLevelBranchIndex.empty() &&
  299. PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
  300. PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
  301. PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
  302. }
  303. if (!PPLevelBranchIndex.empty()) {
  304. ++PPLevelBranchIndex.back();
  305. assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
  306. assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
  307. }
  308. } while (!PPLevelBranchIndex.empty());
  309. }
  310. void UnwrappedLineParser::parseFile() {
  311. // The top-level context in a file always has declarations, except for pre-
  312. // processor directives and JavaScript files.
  313. bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
  314. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  315. MustBeDeclaration);
  316. if (Style.Language == FormatStyle::LK_TextProto)
  317. parseBracedList();
  318. else
  319. parseLevel(/*HasOpeningBrace=*/false);
  320. // Make sure to format the remaining tokens.
  321. //
  322. // LK_TextProto is special since its top-level is parsed as the body of a
  323. // braced list, which does not necessarily have natural line separators such
  324. // as a semicolon. Comments after the last entry that have been determined to
  325. // not belong to that line, as in:
  326. // key: value
  327. // // endfile comment
  328. // do not have a chance to be put on a line of their own until this point.
  329. // Here we add this newline before end-of-file comments.
  330. if (Style.Language == FormatStyle::LK_TextProto &&
  331. !CommentsBeforeNextToken.empty())
  332. addUnwrappedLine();
  333. flushComments(true);
  334. addUnwrappedLine();
  335. }
  336. void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
  337. do {
  338. switch (FormatTok->Tok.getKind()) {
  339. case tok::l_brace:
  340. return;
  341. default:
  342. if (FormatTok->is(Keywords.kw_where)) {
  343. addUnwrappedLine();
  344. nextToken();
  345. parseCSharpGenericTypeConstraint();
  346. break;
  347. }
  348. nextToken();
  349. break;
  350. }
  351. } while (!eof());
  352. }
  353. void UnwrappedLineParser::parseCSharpAttribute() {
  354. int UnpairedSquareBrackets = 1;
  355. do {
  356. switch (FormatTok->Tok.getKind()) {
  357. case tok::r_square:
  358. nextToken();
  359. --UnpairedSquareBrackets;
  360. if (UnpairedSquareBrackets == 0) {
  361. addUnwrappedLine();
  362. return;
  363. }
  364. break;
  365. case tok::l_square:
  366. ++UnpairedSquareBrackets;
  367. nextToken();
  368. break;
  369. default:
  370. nextToken();
  371. break;
  372. }
  373. } while (!eof());
  374. }
  375. bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
  376. if (!Lines.empty() && Lines.back().InPPDirective)
  377. return true;
  378. const FormatToken *Previous = Tokens->getPreviousToken();
  379. return Previous && Previous->is(tok::comment) &&
  380. (Previous->IsMultiline || Previous->NewlinesBefore > 0);
  381. }
  382. bool UnwrappedLineParser::mightFitOnOneLine() const {
  383. const auto ColumnLimit = Style.ColumnLimit;
  384. if (ColumnLimit == 0)
  385. return true;
  386. if (Lines.empty())
  387. return true;
  388. const auto &PreviousLine = Lines.back();
  389. const auto &Tokens = PreviousLine.Tokens;
  390. assert(!Tokens.empty());
  391. const auto *LastToken = Tokens.back().Tok;
  392. assert(LastToken);
  393. if (!LastToken->isOneOf(tok::semi, tok::comment))
  394. return true;
  395. AnnotatedLine Line(PreviousLine);
  396. assert(Line.Last == LastToken);
  397. TokenAnnotator Annotator(Style, Keywords);
  398. Annotator.annotate(Line);
  399. Annotator.calculateFormattingInformation(Line);
  400. return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit;
  401. }
  402. // Returns true if a simple block, or false otherwise. (A simple block has a
  403. // single statement that fits on a single line.)
  404. bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
  405. const bool IsPrecededByCommentOrPPDirective =
  406. !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
  407. unsigned StatementCount = 0;
  408. bool SwitchLabelEncountered = false;
  409. do {
  410. tok::TokenKind kind = FormatTok->Tok.getKind();
  411. if (FormatTok->getType() == TT_MacroBlockBegin) {
  412. kind = tok::l_brace;
  413. } else if (FormatTok->getType() == TT_MacroBlockEnd) {
  414. kind = tok::r_brace;
  415. }
  416. switch (kind) {
  417. case tok::comment:
  418. nextToken();
  419. addUnwrappedLine();
  420. break;
  421. case tok::l_brace:
  422. // FIXME: Add parameter whether this can happen - if this happens, we must
  423. // be in a non-declaration context.
  424. if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
  425. continue;
  426. parseBlock();
  427. ++StatementCount;
  428. assert(StatementCount > 0 && "StatementCount overflow!");
  429. addUnwrappedLine();
  430. break;
  431. case tok::r_brace:
  432. if (HasOpeningBrace) {
  433. if (!Style.RemoveBracesLLVM)
  434. return false;
  435. if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
  436. IsPrecededByCommentOrPPDirective ||
  437. precededByCommentOrPPDirective()) {
  438. return false;
  439. }
  440. const FormatToken *Next = Tokens->peekNextToken();
  441. if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
  442. return false;
  443. return mightFitOnOneLine();
  444. }
  445. nextToken();
  446. addUnwrappedLine();
  447. break;
  448. case tok::kw_default: {
  449. unsigned StoredPosition = Tokens->getPosition();
  450. FormatToken *Next;
  451. do {
  452. Next = Tokens->getNextToken();
  453. } while (Next->is(tok::comment));
  454. FormatTok = Tokens->setPosition(StoredPosition);
  455. if (Next && Next->isNot(tok::colon)) {
  456. // default not followed by ':' is not a case label; treat it like
  457. // an identifier.
  458. parseStructuralElement();
  459. break;
  460. }
  461. // Else, if it is 'default:', fall through to the case handling.
  462. LLVM_FALLTHROUGH;
  463. }
  464. case tok::kw_case:
  465. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  466. // A 'case: string' style field declaration.
  467. parseStructuralElement();
  468. break;
  469. }
  470. if (!SwitchLabelEncountered &&
  471. (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
  472. ++Line->Level;
  473. SwitchLabelEncountered = true;
  474. parseStructuralElement();
  475. break;
  476. case tok::l_square:
  477. if (Style.isCSharp()) {
  478. nextToken();
  479. parseCSharpAttribute();
  480. break;
  481. }
  482. LLVM_FALLTHROUGH;
  483. default:
  484. parseStructuralElement(IfKind, !HasOpeningBrace);
  485. ++StatementCount;
  486. assert(StatementCount > 0 && "StatementCount overflow!");
  487. break;
  488. }
  489. } while (!eof());
  490. return false;
  491. }
  492. void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
  493. // We'll parse forward through the tokens until we hit
  494. // a closing brace or eof - note that getNextToken() will
  495. // parse macros, so this will magically work inside macro
  496. // definitions, too.
  497. unsigned StoredPosition = Tokens->getPosition();
  498. FormatToken *Tok = FormatTok;
  499. const FormatToken *PrevTok = Tok->Previous;
  500. // Keep a stack of positions of lbrace tokens. We will
  501. // update information about whether an lbrace starts a
  502. // braced init list or a different block during the loop.
  503. SmallVector<FormatToken *, 8> LBraceStack;
  504. assert(Tok->Tok.is(tok::l_brace));
  505. do {
  506. // Get next non-comment token.
  507. FormatToken *NextTok;
  508. unsigned ReadTokens = 0;
  509. do {
  510. NextTok = Tokens->getNextToken();
  511. ++ReadTokens;
  512. } while (NextTok->is(tok::comment));
  513. switch (Tok->Tok.getKind()) {
  514. case tok::l_brace:
  515. if (Style.isJavaScript() && PrevTok) {
  516. if (PrevTok->isOneOf(tok::colon, tok::less))
  517. // A ':' indicates this code is in a type, or a braced list
  518. // following a label in an object literal ({a: {b: 1}}).
  519. // A '<' could be an object used in a comparison, but that is nonsense
  520. // code (can never return true), so more likely it is a generic type
  521. // argument (`X<{a: string; b: number}>`).
  522. // The code below could be confused by semicolons between the
  523. // individual members in a type member list, which would normally
  524. // trigger BK_Block. In both cases, this must be parsed as an inline
  525. // braced init.
  526. Tok->setBlockKind(BK_BracedInit);
  527. else if (PrevTok->is(tok::r_paren))
  528. // `) { }` can only occur in function or method declarations in JS.
  529. Tok->setBlockKind(BK_Block);
  530. } else {
  531. Tok->setBlockKind(BK_Unknown);
  532. }
  533. LBraceStack.push_back(Tok);
  534. break;
  535. case tok::r_brace:
  536. if (LBraceStack.empty())
  537. break;
  538. if (LBraceStack.back()->is(BK_Unknown)) {
  539. bool ProbablyBracedList = false;
  540. if (Style.Language == FormatStyle::LK_Proto) {
  541. ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
  542. } else {
  543. // Skip NextTok over preprocessor lines, otherwise we may not
  544. // properly diagnose the block as a braced intializer
  545. // if the comma separator appears after the pp directive.
  546. while (NextTok->is(tok::hash)) {
  547. ScopedMacroState MacroState(*Line, Tokens, NextTok);
  548. do {
  549. NextTok = Tokens->getNextToken();
  550. ++ReadTokens;
  551. } while (NextTok->isNot(tok::eof));
  552. }
  553. // Using OriginalColumn to distinguish between ObjC methods and
  554. // binary operators is a bit hacky.
  555. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
  556. NextTok->OriginalColumn == 0;
  557. // If there is a comma, semicolon or right paren after the closing
  558. // brace, we assume this is a braced initializer list. Note that
  559. // regardless how we mark inner braces here, we will overwrite the
  560. // BlockKind later if we parse a braced list (where all blocks
  561. // inside are by default braced lists), or when we explicitly detect
  562. // blocks (for example while parsing lambdas).
  563. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
  564. // braced list in JS.
  565. ProbablyBracedList =
  566. (Style.isJavaScript() &&
  567. NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
  568. Keywords.kw_as)) ||
  569. (Style.isCpp() && NextTok->is(tok::l_paren)) ||
  570. NextTok->isOneOf(tok::comma, tok::period, tok::colon,
  571. tok::r_paren, tok::r_square, tok::l_brace,
  572. tok::ellipsis) ||
  573. (NextTok->is(tok::identifier) &&
  574. !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
  575. (NextTok->is(tok::semi) &&
  576. (!ExpectClassBody || LBraceStack.size() != 1)) ||
  577. (NextTok->isBinaryOperator() && !NextIsObjCMethod);
  578. if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
  579. // We can have an array subscript after a braced init
  580. // list, but C++11 attributes are expected after blocks.
  581. NextTok = Tokens->getNextToken();
  582. ++ReadTokens;
  583. ProbablyBracedList = NextTok->isNot(tok::l_square);
  584. }
  585. }
  586. if (ProbablyBracedList) {
  587. Tok->setBlockKind(BK_BracedInit);
  588. LBraceStack.back()->setBlockKind(BK_BracedInit);
  589. } else {
  590. Tok->setBlockKind(BK_Block);
  591. LBraceStack.back()->setBlockKind(BK_Block);
  592. }
  593. }
  594. LBraceStack.pop_back();
  595. break;
  596. case tok::identifier:
  597. if (!Tok->is(TT_StatementMacro))
  598. break;
  599. LLVM_FALLTHROUGH;
  600. case tok::at:
  601. case tok::semi:
  602. case tok::kw_if:
  603. case tok::kw_while:
  604. case tok::kw_for:
  605. case tok::kw_switch:
  606. case tok::kw_try:
  607. case tok::kw___try:
  608. if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
  609. LBraceStack.back()->setBlockKind(BK_Block);
  610. break;
  611. default:
  612. break;
  613. }
  614. PrevTok = Tok;
  615. Tok = NextTok;
  616. } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
  617. // Assume other blocks for all unclosed opening braces.
  618. for (FormatToken *LBrace : LBraceStack) {
  619. if (LBrace->is(BK_Unknown))
  620. LBrace->setBlockKind(BK_Block);
  621. }
  622. FormatTok = Tokens->setPosition(StoredPosition);
  623. }
  624. template <class T>
  625. static inline void hash_combine(std::size_t &seed, const T &v) {
  626. std::hash<T> hasher;
  627. seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
  628. }
  629. size_t UnwrappedLineParser::computePPHash() const {
  630. size_t h = 0;
  631. for (const auto &i : PPStack) {
  632. hash_combine(h, size_t(i.Kind));
  633. hash_combine(h, i.Line);
  634. }
  635. return h;
  636. }
  637. UnwrappedLineParser::IfStmtKind
  638. UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
  639. bool MunchSemi,
  640. bool UnindentWhitesmithsBraces) {
  641. assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
  642. "'{' or macro block token expected");
  643. FormatToken *Tok = FormatTok;
  644. const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
  645. FormatTok->setBlockKind(BK_Block);
  646. // For Whitesmiths mode, jump to the next level prior to skipping over the
  647. // braces.
  648. if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
  649. ++Line->Level;
  650. size_t PPStartHash = computePPHash();
  651. unsigned InitialLevel = Line->Level;
  652. nextToken(/*LevelDifference=*/AddLevels);
  653. if (MacroBlock && FormatTok->is(tok::l_paren))
  654. parseParens();
  655. size_t NbPreprocessorDirectives =
  656. CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
  657. addUnwrappedLine();
  658. size_t OpeningLineIndex =
  659. CurrentLines->empty()
  660. ? (UnwrappedLine::kInvalidIndex)
  661. : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
  662. // Whitesmiths is weird here. The brace needs to be indented for the namespace
  663. // block, but the block itself may not be indented depending on the style
  664. // settings. This allows the format to back up one level in those cases.
  665. if (UnindentWhitesmithsBraces)
  666. --Line->Level;
  667. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  668. MustBeDeclaration);
  669. if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
  670. Line->Level += AddLevels;
  671. IfStmtKind IfKind = IfStmtKind::NotIf;
  672. const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
  673. if (eof())
  674. return IfKind;
  675. if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
  676. : !FormatTok->is(tok::r_brace)) {
  677. Line->Level = InitialLevel;
  678. FormatTok->setBlockKind(BK_Block);
  679. return IfKind;
  680. }
  681. if (SimpleBlock && Tok->is(tok::l_brace)) {
  682. assert(FormatTok->is(tok::r_brace));
  683. const FormatToken *Previous = Tokens->getPreviousToken();
  684. assert(Previous);
  685. if (Previous->isNot(tok::r_brace) || Previous->Optional) {
  686. Tok->MatchingParen = FormatTok;
  687. FormatTok->MatchingParen = Tok;
  688. }
  689. }
  690. size_t PPEndHash = computePPHash();
  691. // Munch the closing brace.
  692. nextToken(/*LevelDifference=*/-AddLevels);
  693. if (MacroBlock && FormatTok->is(tok::l_paren))
  694. parseParens();
  695. if (FormatTok->is(tok::arrow)) {
  696. // Following the } we can find a trailing return type arrow
  697. // as part of an implicit conversion constraint.
  698. nextToken();
  699. parseStructuralElement();
  700. }
  701. if (MunchSemi && FormatTok->Tok.is(tok::semi))
  702. nextToken();
  703. Line->Level = InitialLevel;
  704. if (PPStartHash == PPEndHash) {
  705. Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
  706. if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
  707. // Update the opening line to add the forward reference as well
  708. (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
  709. CurrentLines->size() - 1;
  710. }
  711. }
  712. return IfKind;
  713. }
  714. static bool isGoogScope(const UnwrappedLine &Line) {
  715. // FIXME: Closure-library specific stuff should not be hard-coded but be
  716. // configurable.
  717. if (Line.Tokens.size() < 4)
  718. return false;
  719. auto I = Line.Tokens.begin();
  720. if (I->Tok->TokenText != "goog")
  721. return false;
  722. ++I;
  723. if (I->Tok->isNot(tok::period))
  724. return false;
  725. ++I;
  726. if (I->Tok->TokenText != "scope")
  727. return false;
  728. ++I;
  729. return I->Tok->is(tok::l_paren);
  730. }
  731. static bool isIIFE(const UnwrappedLine &Line,
  732. const AdditionalKeywords &Keywords) {
  733. // Look for the start of an immediately invoked anonymous function.
  734. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
  735. // This is commonly done in JavaScript to create a new, anonymous scope.
  736. // Example: (function() { ... })()
  737. if (Line.Tokens.size() < 3)
  738. return false;
  739. auto I = Line.Tokens.begin();
  740. if (I->Tok->isNot(tok::l_paren))
  741. return false;
  742. ++I;
  743. if (I->Tok->isNot(Keywords.kw_function))
  744. return false;
  745. ++I;
  746. return I->Tok->is(tok::l_paren);
  747. }
  748. static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
  749. const FormatToken &InitialToken) {
  750. if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
  751. return Style.BraceWrapping.AfterNamespace;
  752. if (InitialToken.is(tok::kw_class))
  753. return Style.BraceWrapping.AfterClass;
  754. if (InitialToken.is(tok::kw_union))
  755. return Style.BraceWrapping.AfterUnion;
  756. if (InitialToken.is(tok::kw_struct))
  757. return Style.BraceWrapping.AfterStruct;
  758. if (InitialToken.is(tok::kw_enum))
  759. return Style.BraceWrapping.AfterEnum;
  760. return false;
  761. }
  762. void UnwrappedLineParser::parseChildBlock() {
  763. FormatTok->setBlockKind(BK_Block);
  764. nextToken();
  765. {
  766. bool SkipIndent = (Style.isJavaScript() &&
  767. (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
  768. ScopedLineState LineState(*this);
  769. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  770. /*MustBeDeclaration=*/false);
  771. Line->Level += SkipIndent ? 0 : 1;
  772. parseLevel(/*HasOpeningBrace=*/true);
  773. flushComments(isOnNewLine(*FormatTok));
  774. Line->Level -= SkipIndent ? 0 : 1;
  775. }
  776. nextToken();
  777. }
  778. void UnwrappedLineParser::parsePPDirective() {
  779. assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
  780. ScopedMacroState MacroState(*Line, Tokens, FormatTok);
  781. nextToken();
  782. if (!FormatTok->Tok.getIdentifierInfo()) {
  783. parsePPUnknown();
  784. return;
  785. }
  786. switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
  787. case tok::pp_define:
  788. parsePPDefine();
  789. return;
  790. case tok::pp_if:
  791. parsePPIf(/*IfDef=*/false);
  792. break;
  793. case tok::pp_ifdef:
  794. case tok::pp_ifndef:
  795. parsePPIf(/*IfDef=*/true);
  796. break;
  797. case tok::pp_else:
  798. parsePPElse();
  799. break;
  800. case tok::pp_elifdef:
  801. case tok::pp_elifndef:
  802. case tok::pp_elif:
  803. parsePPElIf();
  804. break;
  805. case tok::pp_endif:
  806. parsePPEndIf();
  807. break;
  808. default:
  809. parsePPUnknown();
  810. break;
  811. }
  812. }
  813. void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
  814. size_t Line = CurrentLines->size();
  815. if (CurrentLines == &PreprocessorDirectives)
  816. Line += Lines.size();
  817. if (Unreachable ||
  818. (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
  819. PPStack.push_back({PP_Unreachable, Line});
  820. else
  821. PPStack.push_back({PP_Conditional, Line});
  822. }
  823. void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
  824. ++PPBranchLevel;
  825. assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
  826. if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
  827. PPLevelBranchIndex.push_back(0);
  828. PPLevelBranchCount.push_back(0);
  829. }
  830. PPChainBranchIndex.push(0);
  831. bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
  832. conditionalCompilationCondition(Unreachable || Skip);
  833. }
  834. void UnwrappedLineParser::conditionalCompilationAlternative() {
  835. if (!PPStack.empty())
  836. PPStack.pop_back();
  837. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  838. if (!PPChainBranchIndex.empty())
  839. ++PPChainBranchIndex.top();
  840. conditionalCompilationCondition(
  841. PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
  842. PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
  843. }
  844. void UnwrappedLineParser::conditionalCompilationEnd() {
  845. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  846. if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
  847. if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
  848. PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
  849. }
  850. }
  851. // Guard against #endif's without #if.
  852. if (PPBranchLevel > -1)
  853. --PPBranchLevel;
  854. if (!PPChainBranchIndex.empty())
  855. PPChainBranchIndex.pop();
  856. if (!PPStack.empty())
  857. PPStack.pop_back();
  858. }
  859. void UnwrappedLineParser::parsePPIf(bool IfDef) {
  860. bool IfNDef = FormatTok->is(tok::pp_ifndef);
  861. nextToken();
  862. bool Unreachable = false;
  863. if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
  864. Unreachable = true;
  865. if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
  866. Unreachable = true;
  867. conditionalCompilationStart(Unreachable);
  868. FormatToken *IfCondition = FormatTok;
  869. // If there's a #ifndef on the first line, and the only lines before it are
  870. // comments, it could be an include guard.
  871. bool MaybeIncludeGuard = IfNDef;
  872. if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
  873. for (auto &Line : Lines) {
  874. if (!Line.Tokens.front().Tok->is(tok::comment)) {
  875. MaybeIncludeGuard = false;
  876. IncludeGuard = IG_Rejected;
  877. break;
  878. }
  879. }
  880. --PPBranchLevel;
  881. parsePPUnknown();
  882. ++PPBranchLevel;
  883. if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
  884. IncludeGuard = IG_IfNdefed;
  885. IncludeGuardToken = IfCondition;
  886. }
  887. }
  888. void UnwrappedLineParser::parsePPElse() {
  889. // If a potential include guard has an #else, it's not an include guard.
  890. if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
  891. IncludeGuard = IG_Rejected;
  892. conditionalCompilationAlternative();
  893. if (PPBranchLevel > -1)
  894. --PPBranchLevel;
  895. parsePPUnknown();
  896. ++PPBranchLevel;
  897. }
  898. void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
  899. void UnwrappedLineParser::parsePPEndIf() {
  900. conditionalCompilationEnd();
  901. parsePPUnknown();
  902. // If the #endif of a potential include guard is the last thing in the file,
  903. // then we found an include guard.
  904. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
  905. Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  906. IncludeGuard = IG_Found;
  907. }
  908. void UnwrappedLineParser::parsePPDefine() {
  909. nextToken();
  910. if (!FormatTok->Tok.getIdentifierInfo()) {
  911. IncludeGuard = IG_Rejected;
  912. IncludeGuardToken = nullptr;
  913. parsePPUnknown();
  914. return;
  915. }
  916. if (IncludeGuard == IG_IfNdefed &&
  917. IncludeGuardToken->TokenText == FormatTok->TokenText) {
  918. IncludeGuard = IG_Defined;
  919. IncludeGuardToken = nullptr;
  920. for (auto &Line : Lines) {
  921. if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
  922. IncludeGuard = IG_Rejected;
  923. break;
  924. }
  925. }
  926. }
  927. nextToken();
  928. if (FormatTok->Tok.getKind() == tok::l_paren &&
  929. !FormatTok->hasWhitespaceBefore()) {
  930. parseParens();
  931. }
  932. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  933. Line->Level += PPBranchLevel + 1;
  934. addUnwrappedLine();
  935. ++Line->Level;
  936. // Errors during a preprocessor directive can only affect the layout of the
  937. // preprocessor directive, and thus we ignore them. An alternative approach
  938. // would be to use the same approach we use on the file level (no
  939. // re-indentation if there was a structural error) within the macro
  940. // definition.
  941. parseFile();
  942. }
  943. void UnwrappedLineParser::parsePPUnknown() {
  944. do {
  945. nextToken();
  946. } while (!eof());
  947. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  948. Line->Level += PPBranchLevel + 1;
  949. addUnwrappedLine();
  950. }
  951. // Here we exclude certain tokens that are not usually the first token in an
  952. // unwrapped line. This is used in attempt to distinguish macro calls without
  953. // trailing semicolons from other constructs split to several lines.
  954. static bool tokenCanStartNewLine(const FormatToken &Tok) {
  955. // Semicolon can be a null-statement, l_square can be a start of a macro or
  956. // a C++11 attribute, but this doesn't seem to be common.
  957. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
  958. Tok.isNot(TT_AttributeSquare) &&
  959. // Tokens that can only be used as binary operators and a part of
  960. // overloaded operator names.
  961. Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
  962. Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
  963. Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
  964. Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
  965. Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
  966. Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
  967. Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
  968. Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
  969. Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
  970. Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
  971. Tok.isNot(tok::lesslessequal) &&
  972. // Colon is used in labels, base class lists, initializer lists,
  973. // range-based for loops, ternary operator, but should never be the
  974. // first token in an unwrapped line.
  975. Tok.isNot(tok::colon) &&
  976. // 'noexcept' is a trailing annotation.
  977. Tok.isNot(tok::kw_noexcept);
  978. }
  979. static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
  980. const FormatToken *FormatTok) {
  981. // FIXME: This returns true for C/C++ keywords like 'struct'.
  982. return FormatTok->is(tok::identifier) &&
  983. (FormatTok->Tok.getIdentifierInfo() == nullptr ||
  984. !FormatTok->isOneOf(
  985. Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
  986. Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
  987. Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
  988. Keywords.kw_let, Keywords.kw_var, tok::kw_const,
  989. Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
  990. Keywords.kw_instanceof, Keywords.kw_interface,
  991. Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
  992. }
  993. static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
  994. const FormatToken *FormatTok) {
  995. return FormatTok->Tok.isLiteral() ||
  996. FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
  997. mustBeJSIdent(Keywords, FormatTok);
  998. }
  999. // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
  1000. // when encountered after a value (see mustBeJSIdentOrValue).
  1001. static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
  1002. const FormatToken *FormatTok) {
  1003. return FormatTok->isOneOf(
  1004. tok::kw_return, Keywords.kw_yield,
  1005. // conditionals
  1006. tok::kw_if, tok::kw_else,
  1007. // loops
  1008. tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
  1009. // switch/case
  1010. tok::kw_switch, tok::kw_case,
  1011. // exceptions
  1012. tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
  1013. // declaration
  1014. tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
  1015. Keywords.kw_async, Keywords.kw_function,
  1016. // import/export
  1017. Keywords.kw_import, tok::kw_export);
  1018. }
  1019. // Checks whether a token is a type in K&R C (aka C78).
  1020. static bool isC78Type(const FormatToken &Tok) {
  1021. return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
  1022. tok::kw_unsigned, tok::kw_float, tok::kw_double,
  1023. tok::identifier);
  1024. }
  1025. // This function checks whether a token starts the first parameter declaration
  1026. // in a K&R C (aka C78) function definition, e.g.:
  1027. // int f(a, b)
  1028. // short a, b;
  1029. // {
  1030. // return a + b;
  1031. // }
  1032. static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
  1033. const FormatToken *FuncName) {
  1034. assert(Tok);
  1035. assert(Next);
  1036. assert(FuncName);
  1037. if (FuncName->isNot(tok::identifier))
  1038. return false;
  1039. const FormatToken *Prev = FuncName->Previous;
  1040. if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
  1041. return false;
  1042. if (!isC78Type(*Tok) &&
  1043. !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
  1044. return false;
  1045. if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
  1046. return false;
  1047. Tok = Tok->Previous;
  1048. if (!Tok || Tok->isNot(tok::r_paren))
  1049. return false;
  1050. Tok = Tok->Previous;
  1051. if (!Tok || Tok->isNot(tok::identifier))
  1052. return false;
  1053. return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
  1054. }
  1055. void UnwrappedLineParser::parseModuleImport() {
  1056. nextToken();
  1057. while (!eof()) {
  1058. if (FormatTok->is(tok::colon)) {
  1059. FormatTok->setType(TT_ModulePartitionColon);
  1060. }
  1061. // Handle import <foo/bar.h> as we would an include statement.
  1062. else if (FormatTok->is(tok::less)) {
  1063. nextToken();
  1064. while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
  1065. // Mark tokens up to the trailing line comments as implicit string
  1066. // literals.
  1067. if (FormatTok->isNot(tok::comment) &&
  1068. !FormatTok->TokenText.startswith("//"))
  1069. FormatTok->setType(TT_ImplicitStringLiteral);
  1070. nextToken();
  1071. }
  1072. }
  1073. if (FormatTok->is(tok::semi)) {
  1074. nextToken();
  1075. break;
  1076. }
  1077. nextToken();
  1078. }
  1079. addUnwrappedLine();
  1080. }
  1081. // readTokenWithJavaScriptASI reads the next token and terminates the current
  1082. // line if JavaScript Automatic Semicolon Insertion must
  1083. // happen between the current token and the next token.
  1084. //
  1085. // This method is conservative - it cannot cover all edge cases of JavaScript,
  1086. // but only aims to correctly handle certain well known cases. It *must not*
  1087. // return true in speculative cases.
  1088. void UnwrappedLineParser::readTokenWithJavaScriptASI() {
  1089. FormatToken *Previous = FormatTok;
  1090. readToken();
  1091. FormatToken *Next = FormatTok;
  1092. bool IsOnSameLine =
  1093. CommentsBeforeNextToken.empty()
  1094. ? Next->NewlinesBefore == 0
  1095. : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
  1096. if (IsOnSameLine)
  1097. return;
  1098. bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
  1099. bool PreviousStartsTemplateExpr =
  1100. Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
  1101. if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
  1102. // If the line contains an '@' sign, the previous token might be an
  1103. // annotation, which can precede another identifier/value.
  1104. bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
  1105. return LineNode.Tok->is(tok::at);
  1106. });
  1107. if (HasAt)
  1108. return;
  1109. }
  1110. if (Next->is(tok::exclaim) && PreviousMustBeValue)
  1111. return addUnwrappedLine();
  1112. bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
  1113. bool NextEndsTemplateExpr =
  1114. Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
  1115. if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
  1116. (PreviousMustBeValue ||
  1117. Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
  1118. tok::minusminus)))
  1119. return addUnwrappedLine();
  1120. if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
  1121. isJSDeclOrStmt(Keywords, Next))
  1122. return addUnwrappedLine();
  1123. }
  1124. void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
  1125. bool IsTopLevel) {
  1126. if (Style.Language == FormatStyle::LK_TableGen &&
  1127. FormatTok->is(tok::pp_include)) {
  1128. nextToken();
  1129. if (FormatTok->is(tok::string_literal))
  1130. nextToken();
  1131. addUnwrappedLine();
  1132. return;
  1133. }
  1134. switch (FormatTok->Tok.getKind()) {
  1135. case tok::kw_asm:
  1136. nextToken();
  1137. if (FormatTok->is(tok::l_brace)) {
  1138. FormatTok->setType(TT_InlineASMBrace);
  1139. nextToken();
  1140. while (FormatTok && FormatTok->isNot(tok::eof)) {
  1141. if (FormatTok->is(tok::r_brace)) {
  1142. FormatTok->setType(TT_InlineASMBrace);
  1143. nextToken();
  1144. addUnwrappedLine();
  1145. break;
  1146. }
  1147. FormatTok->Finalized = true;
  1148. nextToken();
  1149. }
  1150. }
  1151. break;
  1152. case tok::kw_namespace:
  1153. parseNamespace();
  1154. return;
  1155. case tok::kw_public:
  1156. case tok::kw_protected:
  1157. case tok::kw_private:
  1158. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
  1159. Style.isCSharp())
  1160. nextToken();
  1161. else
  1162. parseAccessSpecifier();
  1163. return;
  1164. case tok::kw_if:
  1165. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1166. // field/method declaration.
  1167. break;
  1168. parseIfThenElse(IfKind);
  1169. return;
  1170. case tok::kw_for:
  1171. case tok::kw_while:
  1172. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1173. // field/method declaration.
  1174. break;
  1175. parseForOrWhileLoop();
  1176. return;
  1177. case tok::kw_do:
  1178. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1179. // field/method declaration.
  1180. break;
  1181. parseDoWhile();
  1182. return;
  1183. case tok::kw_switch:
  1184. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1185. // 'switch: string' field declaration.
  1186. break;
  1187. parseSwitch();
  1188. return;
  1189. case tok::kw_default:
  1190. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1191. // 'default: string' field declaration.
  1192. break;
  1193. nextToken();
  1194. if (FormatTok->is(tok::colon)) {
  1195. parseLabel();
  1196. return;
  1197. }
  1198. // e.g. "default void f() {}" in a Java interface.
  1199. break;
  1200. case tok::kw_case:
  1201. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1202. // 'case: string' field declaration.
  1203. break;
  1204. parseCaseLabel();
  1205. return;
  1206. case tok::kw_try:
  1207. case tok::kw___try:
  1208. if (Style.isJavaScript() && Line->MustBeDeclaration)
  1209. // field/method declaration.
  1210. break;
  1211. parseTryCatch();
  1212. return;
  1213. case tok::kw_extern:
  1214. nextToken();
  1215. if (FormatTok->Tok.is(tok::string_literal)) {
  1216. nextToken();
  1217. if (FormatTok->Tok.is(tok::l_brace)) {
  1218. if (Style.BraceWrapping.AfterExternBlock)
  1219. addUnwrappedLine();
  1220. // Either we indent or for backwards compatibility we follow the
  1221. // AfterExternBlock style.
  1222. unsigned AddLevels =
  1223. (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
  1224. (Style.BraceWrapping.AfterExternBlock &&
  1225. Style.IndentExternBlock ==
  1226. FormatStyle::IEBS_AfterExternBlock)
  1227. ? 1u
  1228. : 0u;
  1229. parseBlock(/*MustBeDeclaration=*/true, AddLevels);
  1230. addUnwrappedLine();
  1231. return;
  1232. }
  1233. }
  1234. break;
  1235. case tok::kw_export:
  1236. if (Style.isJavaScript()) {
  1237. parseJavaScriptEs6ImportExport();
  1238. return;
  1239. }
  1240. if (!Style.isCpp())
  1241. break;
  1242. // Handle C++ "(inline|export) namespace".
  1243. LLVM_FALLTHROUGH;
  1244. case tok::kw_inline:
  1245. nextToken();
  1246. if (FormatTok->Tok.is(tok::kw_namespace)) {
  1247. parseNamespace();
  1248. return;
  1249. }
  1250. break;
  1251. case tok::identifier:
  1252. if (FormatTok->is(TT_ForEachMacro)) {
  1253. parseForOrWhileLoop();
  1254. return;
  1255. }
  1256. if (FormatTok->is(TT_MacroBlockBegin)) {
  1257. parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
  1258. /*MunchSemi=*/false);
  1259. return;
  1260. }
  1261. if (FormatTok->is(Keywords.kw_import)) {
  1262. if (Style.isJavaScript()) {
  1263. parseJavaScriptEs6ImportExport();
  1264. return;
  1265. }
  1266. if (Style.Language == FormatStyle::LK_Proto) {
  1267. nextToken();
  1268. if (FormatTok->is(tok::kw_public))
  1269. nextToken();
  1270. if (!FormatTok->is(tok::string_literal))
  1271. return;
  1272. nextToken();
  1273. if (FormatTok->is(tok::semi))
  1274. nextToken();
  1275. addUnwrappedLine();
  1276. return;
  1277. }
  1278. if (Style.isCpp()) {
  1279. parseModuleImport();
  1280. return;
  1281. }
  1282. }
  1283. if (Style.isCpp() &&
  1284. FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
  1285. Keywords.kw_slots, Keywords.kw_qslots)) {
  1286. nextToken();
  1287. if (FormatTok->is(tok::colon)) {
  1288. nextToken();
  1289. addUnwrappedLine();
  1290. return;
  1291. }
  1292. }
  1293. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1294. parseStatementMacro();
  1295. return;
  1296. }
  1297. if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
  1298. parseNamespace();
  1299. return;
  1300. }
  1301. // In all other cases, parse the declaration.
  1302. break;
  1303. default:
  1304. break;
  1305. }
  1306. do {
  1307. const FormatToken *Previous = FormatTok->Previous;
  1308. switch (FormatTok->Tok.getKind()) {
  1309. case tok::at:
  1310. nextToken();
  1311. if (FormatTok->Tok.is(tok::l_brace)) {
  1312. nextToken();
  1313. parseBracedList();
  1314. break;
  1315. } else if (Style.Language == FormatStyle::LK_Java &&
  1316. FormatTok->is(Keywords.kw_interface)) {
  1317. nextToken();
  1318. break;
  1319. }
  1320. switch (FormatTok->Tok.getObjCKeywordID()) {
  1321. case tok::objc_public:
  1322. case tok::objc_protected:
  1323. case tok::objc_package:
  1324. case tok::objc_private:
  1325. return parseAccessSpecifier();
  1326. case tok::objc_interface:
  1327. case tok::objc_implementation:
  1328. return parseObjCInterfaceOrImplementation();
  1329. case tok::objc_protocol:
  1330. if (parseObjCProtocol())
  1331. return;
  1332. break;
  1333. case tok::objc_end:
  1334. return; // Handled by the caller.
  1335. case tok::objc_optional:
  1336. case tok::objc_required:
  1337. nextToken();
  1338. addUnwrappedLine();
  1339. return;
  1340. case tok::objc_autoreleasepool:
  1341. nextToken();
  1342. if (FormatTok->Tok.is(tok::l_brace)) {
  1343. if (Style.BraceWrapping.AfterControlStatement ==
  1344. FormatStyle::BWACS_Always)
  1345. addUnwrappedLine();
  1346. parseBlock();
  1347. }
  1348. addUnwrappedLine();
  1349. return;
  1350. case tok::objc_synchronized:
  1351. nextToken();
  1352. if (FormatTok->Tok.is(tok::l_paren))
  1353. // Skip synchronization object
  1354. parseParens();
  1355. if (FormatTok->Tok.is(tok::l_brace)) {
  1356. if (Style.BraceWrapping.AfterControlStatement ==
  1357. FormatStyle::BWACS_Always)
  1358. addUnwrappedLine();
  1359. parseBlock();
  1360. }
  1361. addUnwrappedLine();
  1362. return;
  1363. case tok::objc_try:
  1364. // This branch isn't strictly necessary (the kw_try case below would
  1365. // do this too after the tok::at is parsed above). But be explicit.
  1366. parseTryCatch();
  1367. return;
  1368. default:
  1369. break;
  1370. }
  1371. break;
  1372. case tok::kw_concept:
  1373. parseConcept();
  1374. return;
  1375. case tok::kw_requires:
  1376. parseRequires();
  1377. return;
  1378. case tok::kw_enum:
  1379. // Ignore if this is part of "template <enum ...".
  1380. if (Previous && Previous->is(tok::less)) {
  1381. nextToken();
  1382. break;
  1383. }
  1384. // parseEnum falls through and does not yet add an unwrapped line as an
  1385. // enum definition can start a structural element.
  1386. if (!parseEnum())
  1387. break;
  1388. // This only applies for C++.
  1389. if (!Style.isCpp()) {
  1390. addUnwrappedLine();
  1391. return;
  1392. }
  1393. break;
  1394. case tok::kw_typedef:
  1395. nextToken();
  1396. if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
  1397. Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
  1398. Keywords.kw_CF_CLOSED_ENUM,
  1399. Keywords.kw_NS_CLOSED_ENUM))
  1400. parseEnum();
  1401. break;
  1402. case tok::kw_struct:
  1403. case tok::kw_union:
  1404. case tok::kw_class:
  1405. if (parseStructLike()) {
  1406. return;
  1407. }
  1408. break;
  1409. case tok::period:
  1410. nextToken();
  1411. // In Java, classes have an implicit static member "class".
  1412. if (Style.Language == FormatStyle::LK_Java && FormatTok &&
  1413. FormatTok->is(tok::kw_class))
  1414. nextToken();
  1415. if (Style.isJavaScript() && FormatTok &&
  1416. FormatTok->Tok.getIdentifierInfo())
  1417. // JavaScript only has pseudo keywords, all keywords are allowed to
  1418. // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
  1419. nextToken();
  1420. break;
  1421. case tok::semi:
  1422. nextToken();
  1423. addUnwrappedLine();
  1424. return;
  1425. case tok::r_brace:
  1426. addUnwrappedLine();
  1427. return;
  1428. case tok::l_paren: {
  1429. parseParens();
  1430. // Break the unwrapped line if a K&R C function definition has a parameter
  1431. // declaration.
  1432. if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
  1433. break;
  1434. if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
  1435. addUnwrappedLine();
  1436. return;
  1437. }
  1438. break;
  1439. }
  1440. case tok::kw_operator:
  1441. nextToken();
  1442. if (FormatTok->isBinaryOperator())
  1443. nextToken();
  1444. break;
  1445. case tok::caret:
  1446. nextToken();
  1447. if (FormatTok->Tok.isAnyIdentifier() ||
  1448. FormatTok->isSimpleTypeSpecifier())
  1449. nextToken();
  1450. if (FormatTok->is(tok::l_paren))
  1451. parseParens();
  1452. if (FormatTok->is(tok::l_brace))
  1453. parseChildBlock();
  1454. break;
  1455. case tok::l_brace:
  1456. if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
  1457. // A block outside of parentheses must be the last part of a
  1458. // structural element.
  1459. // FIXME: Figure out cases where this is not true, and add projections
  1460. // for them (the one we know is missing are lambdas).
  1461. if (Style.Language == FormatStyle::LK_Java &&
  1462. Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
  1463. // If necessary, we could set the type to something different than
  1464. // TT_FunctionLBrace.
  1465. if (Style.BraceWrapping.AfterControlStatement ==
  1466. FormatStyle::BWACS_Always)
  1467. addUnwrappedLine();
  1468. } else if (Style.BraceWrapping.AfterFunction) {
  1469. addUnwrappedLine();
  1470. }
  1471. FormatTok->setType(TT_FunctionLBrace);
  1472. parseBlock();
  1473. addUnwrappedLine();
  1474. return;
  1475. }
  1476. // Otherwise this was a braced init list, and the structural
  1477. // element continues.
  1478. break;
  1479. case tok::kw_try:
  1480. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1481. // field/method declaration.
  1482. nextToken();
  1483. break;
  1484. }
  1485. // We arrive here when parsing function-try blocks.
  1486. if (Style.BraceWrapping.AfterFunction)
  1487. addUnwrappedLine();
  1488. parseTryCatch();
  1489. return;
  1490. case tok::identifier: {
  1491. if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
  1492. Line->MustBeDeclaration) {
  1493. addUnwrappedLine();
  1494. parseCSharpGenericTypeConstraint();
  1495. break;
  1496. }
  1497. if (FormatTok->is(TT_MacroBlockEnd)) {
  1498. addUnwrappedLine();
  1499. return;
  1500. }
  1501. // Function declarations (as opposed to function expressions) are parsed
  1502. // on their own unwrapped line by continuing this loop. Function
  1503. // expressions (functions that are not on their own line) must not create
  1504. // a new unwrapped line, so they are special cased below.
  1505. size_t TokenCount = Line->Tokens.size();
  1506. if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
  1507. (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
  1508. Keywords.kw_async)))) {
  1509. tryToParseJSFunction();
  1510. break;
  1511. }
  1512. if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
  1513. FormatTok->is(Keywords.kw_interface)) {
  1514. if (Style.isJavaScript()) {
  1515. // In JavaScript/TypeScript, "interface" can be used as a standalone
  1516. // identifier, e.g. in `var interface = 1;`. If "interface" is
  1517. // followed by another identifier, it is very like to be an actual
  1518. // interface declaration.
  1519. unsigned StoredPosition = Tokens->getPosition();
  1520. FormatToken *Next = Tokens->getNextToken();
  1521. FormatTok = Tokens->setPosition(StoredPosition);
  1522. if (!mustBeJSIdent(Keywords, Next)) {
  1523. nextToken();
  1524. break;
  1525. }
  1526. }
  1527. parseRecord();
  1528. addUnwrappedLine();
  1529. return;
  1530. }
  1531. if (FormatTok->is(Keywords.kw_interface)) {
  1532. if (parseStructLike()) {
  1533. return;
  1534. }
  1535. break;
  1536. }
  1537. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1538. parseStatementMacro();
  1539. return;
  1540. }
  1541. // See if the following token should start a new unwrapped line.
  1542. auto isAttr = FormatTok->is(TT_AttributeMacro);
  1543. StringRef Text = FormatTok->TokenText;
  1544. FormatToken *PreviousToken = FormatTok;
  1545. nextToken();
  1546. // JS doesn't have macros, and within classes colons indicate fields, not
  1547. // labels.
  1548. if (Style.isJavaScript())
  1549. break;
  1550. TokenCount = Line->Tokens.size();
  1551. if (TokenCount == 1 ||
  1552. (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
  1553. if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
  1554. Line->Tokens.begin()->Tok->MustBreakBefore = true;
  1555. parseLabel(!Style.IndentGotoLabels);
  1556. return;
  1557. }
  1558. // Recognize function-like macro usages without trailing semicolon as
  1559. // well as free-standing macros like Q_OBJECT.
  1560. bool FunctionLike = FormatTok->is(tok::l_paren);
  1561. if (FunctionLike)
  1562. parseParens();
  1563. bool FollowedByNewline =
  1564. CommentsBeforeNextToken.empty()
  1565. ? FormatTok->NewlinesBefore > 0
  1566. : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
  1567. if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
  1568. tokenCanStartNewLine(*FormatTok) && Text == Text.upper() && !isAttr) {
  1569. PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
  1570. addUnwrappedLine();
  1571. return;
  1572. }
  1573. }
  1574. break;
  1575. }
  1576. case tok::equal:
  1577. if ((Style.isJavaScript() || Style.isCSharp()) &&
  1578. FormatTok->is(TT_FatArrow)) {
  1579. tryToParseChildBlock();
  1580. break;
  1581. }
  1582. nextToken();
  1583. if (FormatTok->Tok.is(tok::l_brace)) {
  1584. // Block kind should probably be set to BK_BracedInit for any language.
  1585. // C# needs this change to ensure that array initialisers and object
  1586. // initialisers are indented the same way.
  1587. if (Style.isCSharp())
  1588. FormatTok->setBlockKind(BK_BracedInit);
  1589. nextToken();
  1590. parseBracedList();
  1591. } else if (Style.Language == FormatStyle::LK_Proto &&
  1592. FormatTok->Tok.is(tok::less)) {
  1593. nextToken();
  1594. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  1595. /*ClosingBraceKind=*/tok::greater);
  1596. }
  1597. break;
  1598. case tok::l_square:
  1599. parseSquare();
  1600. break;
  1601. case tok::kw_new:
  1602. parseNew();
  1603. break;
  1604. default:
  1605. nextToken();
  1606. break;
  1607. }
  1608. } while (!eof());
  1609. }
  1610. bool UnwrappedLineParser::tryToParsePropertyAccessor() {
  1611. assert(FormatTok->is(tok::l_brace));
  1612. if (!Style.isCSharp())
  1613. return false;
  1614. // See if it's a property accessor.
  1615. if (FormatTok->Previous->isNot(tok::identifier))
  1616. return false;
  1617. // See if we are inside a property accessor.
  1618. //
  1619. // Record the current tokenPosition so that we can advance and
  1620. // reset the current token. `Next` is not set yet so we need
  1621. // another way to advance along the token stream.
  1622. unsigned int StoredPosition = Tokens->getPosition();
  1623. FormatToken *Tok = Tokens->getNextToken();
  1624. // A trivial property accessor is of the form:
  1625. // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
  1626. // Track these as they do not require line breaks to be introduced.
  1627. bool HasGetOrSet = false;
  1628. bool IsTrivialPropertyAccessor = true;
  1629. while (!eof()) {
  1630. if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
  1631. tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
  1632. Keywords.kw_set)) {
  1633. if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
  1634. HasGetOrSet = true;
  1635. Tok = Tokens->getNextToken();
  1636. continue;
  1637. }
  1638. if (Tok->isNot(tok::r_brace))
  1639. IsTrivialPropertyAccessor = false;
  1640. break;
  1641. }
  1642. if (!HasGetOrSet) {
  1643. Tokens->setPosition(StoredPosition);
  1644. return false;
  1645. }
  1646. // Try to parse the property accessor:
  1647. // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
  1648. Tokens->setPosition(StoredPosition);
  1649. if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
  1650. addUnwrappedLine();
  1651. nextToken();
  1652. do {
  1653. switch (FormatTok->Tok.getKind()) {
  1654. case tok::r_brace:
  1655. nextToken();
  1656. if (FormatTok->is(tok::equal)) {
  1657. while (!eof() && FormatTok->isNot(tok::semi))
  1658. nextToken();
  1659. nextToken();
  1660. }
  1661. addUnwrappedLine();
  1662. return true;
  1663. case tok::l_brace:
  1664. ++Line->Level;
  1665. parseBlock(/*MustBeDeclaration=*/true);
  1666. addUnwrappedLine();
  1667. --Line->Level;
  1668. break;
  1669. case tok::equal:
  1670. if (FormatTok->is(TT_FatArrow)) {
  1671. ++Line->Level;
  1672. do {
  1673. nextToken();
  1674. } while (!eof() && FormatTok->isNot(tok::semi));
  1675. nextToken();
  1676. addUnwrappedLine();
  1677. --Line->Level;
  1678. break;
  1679. }
  1680. nextToken();
  1681. break;
  1682. default:
  1683. if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
  1684. !IsTrivialPropertyAccessor) {
  1685. // Non-trivial get/set needs to be on its own line.
  1686. addUnwrappedLine();
  1687. }
  1688. nextToken();
  1689. }
  1690. } while (!eof());
  1691. // Unreachable for well-formed code (paired '{' and '}').
  1692. return true;
  1693. }
  1694. bool UnwrappedLineParser::tryToParseLambda() {
  1695. if (!Style.isCpp()) {
  1696. nextToken();
  1697. return false;
  1698. }
  1699. assert(FormatTok->is(tok::l_square));
  1700. FormatToken &LSquare = *FormatTok;
  1701. if (!tryToParseLambdaIntroducer())
  1702. return false;
  1703. bool SeenArrow = false;
  1704. bool InTemplateParameterList = false;
  1705. while (FormatTok->isNot(tok::l_brace)) {
  1706. if (FormatTok->isSimpleTypeSpecifier()) {
  1707. nextToken();
  1708. continue;
  1709. }
  1710. switch (FormatTok->Tok.getKind()) {
  1711. case tok::l_brace:
  1712. break;
  1713. case tok::l_paren:
  1714. parseParens();
  1715. break;
  1716. case tok::l_square:
  1717. parseSquare();
  1718. break;
  1719. case tok::kw_class:
  1720. case tok::kw_template:
  1721. case tok::kw_typename:
  1722. assert(FormatTok->Previous);
  1723. if (FormatTok->Previous->is(tok::less))
  1724. InTemplateParameterList = true;
  1725. nextToken();
  1726. break;
  1727. case tok::amp:
  1728. case tok::star:
  1729. case tok::kw_const:
  1730. case tok::comma:
  1731. case tok::less:
  1732. case tok::greater:
  1733. case tok::identifier:
  1734. case tok::numeric_constant:
  1735. case tok::coloncolon:
  1736. case tok::kw_mutable:
  1737. case tok::kw_noexcept:
  1738. nextToken();
  1739. break;
  1740. // Specialization of a template with an integer parameter can contain
  1741. // arithmetic, logical, comparison and ternary operators.
  1742. //
  1743. // FIXME: This also accepts sequences of operators that are not in the scope
  1744. // of a template argument list.
  1745. //
  1746. // In a C++ lambda a template type can only occur after an arrow. We use
  1747. // this as an heuristic to distinguish between Objective-C expressions
  1748. // followed by an `a->b` expression, such as:
  1749. // ([obj func:arg] + a->b)
  1750. // Otherwise the code below would parse as a lambda.
  1751. //
  1752. // FIXME: This heuristic is incorrect for C++20 generic lambdas with
  1753. // explicit template lists: []<bool b = true && false>(U &&u){}
  1754. case tok::plus:
  1755. case tok::minus:
  1756. case tok::exclaim:
  1757. case tok::tilde:
  1758. case tok::slash:
  1759. case tok::percent:
  1760. case tok::lessless:
  1761. case tok::pipe:
  1762. case tok::pipepipe:
  1763. case tok::ampamp:
  1764. case tok::caret:
  1765. case tok::equalequal:
  1766. case tok::exclaimequal:
  1767. case tok::greaterequal:
  1768. case tok::lessequal:
  1769. case tok::question:
  1770. case tok::colon:
  1771. case tok::ellipsis:
  1772. case tok::kw_true:
  1773. case tok::kw_false:
  1774. if (SeenArrow || InTemplateParameterList) {
  1775. nextToken();
  1776. break;
  1777. }
  1778. return true;
  1779. case tok::arrow:
  1780. // This might or might not actually be a lambda arrow (this could be an
  1781. // ObjC method invocation followed by a dereferencing arrow). We might
  1782. // reset this back to TT_Unknown in TokenAnnotator.
  1783. FormatTok->setType(TT_LambdaArrow);
  1784. SeenArrow = true;
  1785. nextToken();
  1786. break;
  1787. default:
  1788. return true;
  1789. }
  1790. }
  1791. FormatTok->setType(TT_LambdaLBrace);
  1792. LSquare.setType(TT_LambdaLSquare);
  1793. parseChildBlock();
  1794. return true;
  1795. }
  1796. bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
  1797. const FormatToken *Previous = FormatTok->Previous;
  1798. if (Previous &&
  1799. (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
  1800. tok::kw_delete, tok::l_square) ||
  1801. FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
  1802. Previous->isSimpleTypeSpecifier())) {
  1803. nextToken();
  1804. return false;
  1805. }
  1806. nextToken();
  1807. if (FormatTok->is(tok::l_square)) {
  1808. return false;
  1809. }
  1810. parseSquare(/*LambdaIntroducer=*/true);
  1811. return true;
  1812. }
  1813. void UnwrappedLineParser::tryToParseJSFunction() {
  1814. assert(FormatTok->is(Keywords.kw_function) ||
  1815. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
  1816. if (FormatTok->is(Keywords.kw_async))
  1817. nextToken();
  1818. // Consume "function".
  1819. nextToken();
  1820. // Consume * (generator function). Treat it like C++'s overloaded operators.
  1821. if (FormatTok->is(tok::star)) {
  1822. FormatTok->setType(TT_OverloadedOperator);
  1823. nextToken();
  1824. }
  1825. // Consume function name.
  1826. if (FormatTok->is(tok::identifier))
  1827. nextToken();
  1828. if (FormatTok->isNot(tok::l_paren))
  1829. return;
  1830. // Parse formal parameter list.
  1831. parseParens();
  1832. if (FormatTok->is(tok::colon)) {
  1833. // Parse a type definition.
  1834. nextToken();
  1835. // Eat the type declaration. For braced inline object types, balance braces,
  1836. // otherwise just parse until finding an l_brace for the function body.
  1837. if (FormatTok->is(tok::l_brace))
  1838. tryToParseBracedList();
  1839. else
  1840. while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
  1841. nextToken();
  1842. }
  1843. if (FormatTok->is(tok::semi))
  1844. return;
  1845. parseChildBlock();
  1846. }
  1847. bool UnwrappedLineParser::tryToParseBracedList() {
  1848. if (FormatTok->is(BK_Unknown))
  1849. calculateBraceTypes();
  1850. assert(FormatTok->isNot(BK_Unknown));
  1851. if (FormatTok->is(BK_Block))
  1852. return false;
  1853. nextToken();
  1854. parseBracedList();
  1855. return true;
  1856. }
  1857. bool UnwrappedLineParser::tryToParseChildBlock() {
  1858. assert(Style.isJavaScript() || Style.isCSharp());
  1859. assert(FormatTok->is(TT_FatArrow));
  1860. // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
  1861. // They always start an expression or a child block if followed by a curly
  1862. // brace.
  1863. nextToken();
  1864. if (FormatTok->isNot(tok::l_brace))
  1865. return false;
  1866. parseChildBlock();
  1867. return true;
  1868. }
  1869. bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
  1870. bool IsEnum,
  1871. tok::TokenKind ClosingBraceKind) {
  1872. bool HasError = false;
  1873. // FIXME: Once we have an expression parser in the UnwrappedLineParser,
  1874. // replace this by using parseAssignmentExpression() inside.
  1875. do {
  1876. if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
  1877. tryToParseChildBlock())
  1878. continue;
  1879. if (Style.isJavaScript()) {
  1880. if (FormatTok->is(Keywords.kw_function) ||
  1881. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
  1882. tryToParseJSFunction();
  1883. continue;
  1884. }
  1885. if (FormatTok->is(tok::l_brace)) {
  1886. // Could be a method inside of a braced list `{a() { return 1; }}`.
  1887. if (tryToParseBracedList())
  1888. continue;
  1889. parseChildBlock();
  1890. }
  1891. }
  1892. if (FormatTok->Tok.getKind() == ClosingBraceKind) {
  1893. if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
  1894. addUnwrappedLine();
  1895. nextToken();
  1896. return !HasError;
  1897. }
  1898. switch (FormatTok->Tok.getKind()) {
  1899. case tok::l_square:
  1900. if (Style.isCSharp())
  1901. parseSquare();
  1902. else
  1903. tryToParseLambda();
  1904. break;
  1905. case tok::l_paren:
  1906. parseParens();
  1907. // JavaScript can just have free standing methods and getters/setters in
  1908. // object literals. Detect them by a "{" following ")".
  1909. if (Style.isJavaScript()) {
  1910. if (FormatTok->is(tok::l_brace))
  1911. parseChildBlock();
  1912. break;
  1913. }
  1914. break;
  1915. case tok::l_brace:
  1916. // Assume there are no blocks inside a braced init list apart
  1917. // from the ones we explicitly parse out (like lambdas).
  1918. FormatTok->setBlockKind(BK_BracedInit);
  1919. nextToken();
  1920. parseBracedList();
  1921. break;
  1922. case tok::less:
  1923. if (Style.Language == FormatStyle::LK_Proto) {
  1924. nextToken();
  1925. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  1926. /*ClosingBraceKind=*/tok::greater);
  1927. } else {
  1928. nextToken();
  1929. }
  1930. break;
  1931. case tok::semi:
  1932. // JavaScript (or more precisely TypeScript) can have semicolons in braced
  1933. // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
  1934. // used for error recovery if we have otherwise determined that this is
  1935. // a braced list.
  1936. if (Style.isJavaScript()) {
  1937. nextToken();
  1938. break;
  1939. }
  1940. HasError = true;
  1941. if (!ContinueOnSemicolons)
  1942. return !HasError;
  1943. nextToken();
  1944. break;
  1945. case tok::comma:
  1946. nextToken();
  1947. if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
  1948. addUnwrappedLine();
  1949. break;
  1950. default:
  1951. nextToken();
  1952. break;
  1953. }
  1954. } while (!eof());
  1955. return false;
  1956. }
  1957. void UnwrappedLineParser::parseParens() {
  1958. assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
  1959. nextToken();
  1960. do {
  1961. switch (FormatTok->Tok.getKind()) {
  1962. case tok::l_paren:
  1963. parseParens();
  1964. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
  1965. parseChildBlock();
  1966. break;
  1967. case tok::r_paren:
  1968. nextToken();
  1969. return;
  1970. case tok::r_brace:
  1971. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1972. return;
  1973. case tok::l_square:
  1974. tryToParseLambda();
  1975. break;
  1976. case tok::l_brace:
  1977. if (!tryToParseBracedList())
  1978. parseChildBlock();
  1979. break;
  1980. case tok::at:
  1981. nextToken();
  1982. if (FormatTok->Tok.is(tok::l_brace)) {
  1983. nextToken();
  1984. parseBracedList();
  1985. }
  1986. break;
  1987. case tok::equal:
  1988. if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
  1989. tryToParseChildBlock();
  1990. else
  1991. nextToken();
  1992. break;
  1993. case tok::kw_class:
  1994. if (Style.isJavaScript())
  1995. parseRecord(/*ParseAsExpr=*/true);
  1996. else
  1997. nextToken();
  1998. break;
  1999. case tok::identifier:
  2000. if (Style.isJavaScript() &&
  2001. (FormatTok->is(Keywords.kw_function) ||
  2002. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
  2003. tryToParseJSFunction();
  2004. else
  2005. nextToken();
  2006. break;
  2007. default:
  2008. nextToken();
  2009. break;
  2010. }
  2011. } while (!eof());
  2012. }
  2013. void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
  2014. if (!LambdaIntroducer) {
  2015. assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
  2016. if (tryToParseLambda())
  2017. return;
  2018. }
  2019. do {
  2020. switch (FormatTok->Tok.getKind()) {
  2021. case tok::l_paren:
  2022. parseParens();
  2023. break;
  2024. case tok::r_square:
  2025. nextToken();
  2026. return;
  2027. case tok::r_brace:
  2028. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  2029. return;
  2030. case tok::l_square:
  2031. parseSquare();
  2032. break;
  2033. case tok::l_brace: {
  2034. if (!tryToParseBracedList())
  2035. parseChildBlock();
  2036. break;
  2037. }
  2038. case tok::at:
  2039. nextToken();
  2040. if (FormatTok->Tok.is(tok::l_brace)) {
  2041. nextToken();
  2042. parseBracedList();
  2043. }
  2044. break;
  2045. default:
  2046. nextToken();
  2047. break;
  2048. }
  2049. } while (!eof());
  2050. }
  2051. void UnwrappedLineParser::keepAncestorBraces() {
  2052. if (!Style.RemoveBracesLLVM)
  2053. return;
  2054. const int MaxNestingLevels = 2;
  2055. const int Size = NestedTooDeep.size();
  2056. if (Size >= MaxNestingLevels)
  2057. NestedTooDeep[Size - MaxNestingLevels] = true;
  2058. NestedTooDeep.push_back(false);
  2059. }
  2060. static void markOptionalBraces(FormatToken *LeftBrace) {
  2061. if (!LeftBrace)
  2062. return;
  2063. assert(LeftBrace->is(tok::l_brace));
  2064. FormatToken *RightBrace = LeftBrace->MatchingParen;
  2065. if (!RightBrace) {
  2066. assert(!LeftBrace->Optional);
  2067. return;
  2068. }
  2069. assert(RightBrace->is(tok::r_brace));
  2070. assert(RightBrace->MatchingParen == LeftBrace);
  2071. assert(LeftBrace->Optional == RightBrace->Optional);
  2072. LeftBrace->Optional = true;
  2073. RightBrace->Optional = true;
  2074. }
  2075. FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
  2076. bool KeepBraces) {
  2077. auto HandleAttributes = [this]() {
  2078. // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
  2079. if (FormatTok->is(TT_AttributeMacro))
  2080. nextToken();
  2081. // Handle [[likely]] / [[unlikely]] attributes.
  2082. if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
  2083. parseSquare();
  2084. };
  2085. assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
  2086. nextToken();
  2087. if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
  2088. nextToken();
  2089. if (FormatTok->Tok.is(tok::l_paren))
  2090. parseParens();
  2091. HandleAttributes();
  2092. bool NeedsUnwrappedLine = false;
  2093. keepAncestorBraces();
  2094. FormatToken *IfLeftBrace = nullptr;
  2095. IfStmtKind IfBlockKind = IfStmtKind::NotIf;
  2096. if (FormatTok->Tok.is(tok::l_brace)) {
  2097. IfLeftBrace = FormatTok;
  2098. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2099. IfBlockKind = parseBlock();
  2100. if (Style.BraceWrapping.BeforeElse)
  2101. addUnwrappedLine();
  2102. else
  2103. NeedsUnwrappedLine = true;
  2104. } else {
  2105. addUnwrappedLine();
  2106. ++Line->Level;
  2107. parseStructuralElement();
  2108. --Line->Level;
  2109. }
  2110. bool KeepIfBraces = false;
  2111. if (Style.RemoveBracesLLVM) {
  2112. assert(!NestedTooDeep.empty());
  2113. KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
  2114. NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
  2115. IfBlockKind == IfStmtKind::IfElseIf;
  2116. }
  2117. FormatToken *ElseLeftBrace = nullptr;
  2118. IfStmtKind Kind = IfStmtKind::IfOnly;
  2119. if (FormatTok->Tok.is(tok::kw_else)) {
  2120. if (Style.RemoveBracesLLVM) {
  2121. NestedTooDeep.back() = false;
  2122. Kind = IfStmtKind::IfElse;
  2123. }
  2124. nextToken();
  2125. HandleAttributes();
  2126. if (FormatTok->Tok.is(tok::l_brace)) {
  2127. ElseLeftBrace = FormatTok;
  2128. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2129. if (parseBlock() == IfStmtKind::IfOnly)
  2130. Kind = IfStmtKind::IfElseIf;
  2131. addUnwrappedLine();
  2132. } else if (FormatTok->Tok.is(tok::kw_if)) {
  2133. FormatToken *Previous = Tokens->getPreviousToken();
  2134. const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
  2135. if (IsPrecededByComment) {
  2136. addUnwrappedLine();
  2137. ++Line->Level;
  2138. }
  2139. bool TooDeep = true;
  2140. if (Style.RemoveBracesLLVM) {
  2141. Kind = IfStmtKind::IfElseIf;
  2142. TooDeep = NestedTooDeep.pop_back_val();
  2143. }
  2144. ElseLeftBrace =
  2145. parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
  2146. if (Style.RemoveBracesLLVM)
  2147. NestedTooDeep.push_back(TooDeep);
  2148. if (IsPrecededByComment)
  2149. --Line->Level;
  2150. } else {
  2151. addUnwrappedLine();
  2152. ++Line->Level;
  2153. parseStructuralElement();
  2154. if (FormatTok->is(tok::eof))
  2155. addUnwrappedLine();
  2156. --Line->Level;
  2157. }
  2158. } else {
  2159. if (Style.RemoveBracesLLVM)
  2160. KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
  2161. if (NeedsUnwrappedLine)
  2162. addUnwrappedLine();
  2163. }
  2164. if (!Style.RemoveBracesLLVM)
  2165. return nullptr;
  2166. assert(!NestedTooDeep.empty());
  2167. const bool KeepElseBraces =
  2168. (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
  2169. NestedTooDeep.pop_back();
  2170. if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
  2171. markOptionalBraces(IfLeftBrace);
  2172. markOptionalBraces(ElseLeftBrace);
  2173. } else if (IfLeftBrace) {
  2174. FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
  2175. if (IfRightBrace) {
  2176. assert(IfRightBrace->MatchingParen == IfLeftBrace);
  2177. assert(!IfLeftBrace->Optional);
  2178. assert(!IfRightBrace->Optional);
  2179. IfLeftBrace->MatchingParen = nullptr;
  2180. IfRightBrace->MatchingParen = nullptr;
  2181. }
  2182. }
  2183. if (IfKind)
  2184. *IfKind = Kind;
  2185. return IfLeftBrace;
  2186. }
  2187. void UnwrappedLineParser::parseTryCatch() {
  2188. assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
  2189. nextToken();
  2190. bool NeedsUnwrappedLine = false;
  2191. if (FormatTok->is(tok::colon)) {
  2192. // We are in a function try block, what comes is an initializer list.
  2193. nextToken();
  2194. // In case identifiers were removed by clang-tidy, what might follow is
  2195. // multiple commas in sequence - before the first identifier.
  2196. while (FormatTok->is(tok::comma))
  2197. nextToken();
  2198. while (FormatTok->is(tok::identifier)) {
  2199. nextToken();
  2200. if (FormatTok->is(tok::l_paren))
  2201. parseParens();
  2202. if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
  2203. FormatTok->is(tok::l_brace)) {
  2204. do {
  2205. nextToken();
  2206. } while (!FormatTok->is(tok::r_brace));
  2207. nextToken();
  2208. }
  2209. // In case identifiers were removed by clang-tidy, what might follow is
  2210. // multiple commas in sequence - after the first identifier.
  2211. while (FormatTok->is(tok::comma))
  2212. nextToken();
  2213. }
  2214. }
  2215. // Parse try with resource.
  2216. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
  2217. parseParens();
  2218. }
  2219. keepAncestorBraces();
  2220. if (FormatTok->is(tok::l_brace)) {
  2221. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2222. parseBlock();
  2223. if (Style.BraceWrapping.BeforeCatch) {
  2224. addUnwrappedLine();
  2225. } else {
  2226. NeedsUnwrappedLine = true;
  2227. }
  2228. } else if (!FormatTok->is(tok::kw_catch)) {
  2229. // The C++ standard requires a compound-statement after a try.
  2230. // If there's none, we try to assume there's a structuralElement
  2231. // and try to continue.
  2232. addUnwrappedLine();
  2233. ++Line->Level;
  2234. parseStructuralElement();
  2235. --Line->Level;
  2236. }
  2237. while (true) {
  2238. if (FormatTok->is(tok::at))
  2239. nextToken();
  2240. if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
  2241. tok::kw___finally) ||
  2242. ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
  2243. FormatTok->is(Keywords.kw_finally)) ||
  2244. (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
  2245. FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
  2246. break;
  2247. nextToken();
  2248. while (FormatTok->isNot(tok::l_brace)) {
  2249. if (FormatTok->is(tok::l_paren)) {
  2250. parseParens();
  2251. continue;
  2252. }
  2253. if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
  2254. if (Style.RemoveBracesLLVM)
  2255. NestedTooDeep.pop_back();
  2256. return;
  2257. }
  2258. nextToken();
  2259. }
  2260. NeedsUnwrappedLine = false;
  2261. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2262. parseBlock();
  2263. if (Style.BraceWrapping.BeforeCatch)
  2264. addUnwrappedLine();
  2265. else
  2266. NeedsUnwrappedLine = true;
  2267. }
  2268. if (Style.RemoveBracesLLVM)
  2269. NestedTooDeep.pop_back();
  2270. if (NeedsUnwrappedLine)
  2271. addUnwrappedLine();
  2272. }
  2273. void UnwrappedLineParser::parseNamespace() {
  2274. assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  2275. "'namespace' expected");
  2276. const FormatToken &InitialToken = *FormatTok;
  2277. nextToken();
  2278. if (InitialToken.is(TT_NamespaceMacro)) {
  2279. parseParens();
  2280. } else {
  2281. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
  2282. tok::l_square, tok::period) ||
  2283. (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
  2284. if (FormatTok->is(tok::l_square))
  2285. parseSquare();
  2286. else
  2287. nextToken();
  2288. }
  2289. }
  2290. if (FormatTok->Tok.is(tok::l_brace)) {
  2291. if (ShouldBreakBeforeBrace(Style, InitialToken))
  2292. addUnwrappedLine();
  2293. unsigned AddLevels =
  2294. Style.NamespaceIndentation == FormatStyle::NI_All ||
  2295. (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
  2296. DeclarationScopeStack.size() > 1)
  2297. ? 1u
  2298. : 0u;
  2299. bool ManageWhitesmithsBraces =
  2300. AddLevels == 0u &&
  2301. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
  2302. // If we're in Whitesmiths mode, indent the brace if we're not indenting
  2303. // the whole block.
  2304. if (ManageWhitesmithsBraces)
  2305. ++Line->Level;
  2306. parseBlock(/*MustBeDeclaration=*/true, AddLevels,
  2307. /*MunchSemi=*/true,
  2308. /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
  2309. // Munch the semicolon after a namespace. This is more common than one would
  2310. // think. Putting the semicolon into its own line is very ugly.
  2311. if (FormatTok->Tok.is(tok::semi))
  2312. nextToken();
  2313. addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
  2314. if (ManageWhitesmithsBraces)
  2315. --Line->Level;
  2316. }
  2317. // FIXME: Add error handling.
  2318. }
  2319. void UnwrappedLineParser::parseNew() {
  2320. assert(FormatTok->is(tok::kw_new) && "'new' expected");
  2321. nextToken();
  2322. if (Style.isCSharp()) {
  2323. do {
  2324. if (FormatTok->is(tok::l_brace))
  2325. parseBracedList();
  2326. if (FormatTok->isOneOf(tok::semi, tok::comma))
  2327. return;
  2328. nextToken();
  2329. } while (!eof());
  2330. }
  2331. if (Style.Language != FormatStyle::LK_Java)
  2332. return;
  2333. // In Java, we can parse everything up to the parens, which aren't optional.
  2334. do {
  2335. // There should not be a ;, { or } before the new's open paren.
  2336. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
  2337. return;
  2338. // Consume the parens.
  2339. if (FormatTok->is(tok::l_paren)) {
  2340. parseParens();
  2341. // If there is a class body of an anonymous class, consume that as child.
  2342. if (FormatTok->is(tok::l_brace))
  2343. parseChildBlock();
  2344. return;
  2345. }
  2346. nextToken();
  2347. } while (!eof());
  2348. }
  2349. void UnwrappedLineParser::parseForOrWhileLoop() {
  2350. assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
  2351. "'for', 'while' or foreach macro expected");
  2352. nextToken();
  2353. // JS' for await ( ...
  2354. if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
  2355. nextToken();
  2356. if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
  2357. nextToken();
  2358. if (FormatTok->Tok.is(tok::l_paren))
  2359. parseParens();
  2360. keepAncestorBraces();
  2361. if (FormatTok->Tok.is(tok::l_brace)) {
  2362. FormatToken *LeftBrace = FormatTok;
  2363. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2364. parseBlock();
  2365. if (Style.RemoveBracesLLVM) {
  2366. assert(!NestedTooDeep.empty());
  2367. if (!NestedTooDeep.back())
  2368. markOptionalBraces(LeftBrace);
  2369. }
  2370. addUnwrappedLine();
  2371. } else {
  2372. addUnwrappedLine();
  2373. ++Line->Level;
  2374. parseStructuralElement();
  2375. --Line->Level;
  2376. }
  2377. if (Style.RemoveBracesLLVM)
  2378. NestedTooDeep.pop_back();
  2379. }
  2380. void UnwrappedLineParser::parseDoWhile() {
  2381. assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
  2382. nextToken();
  2383. keepAncestorBraces();
  2384. if (FormatTok->Tok.is(tok::l_brace)) {
  2385. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2386. parseBlock();
  2387. if (Style.BraceWrapping.BeforeWhile)
  2388. addUnwrappedLine();
  2389. } else {
  2390. addUnwrappedLine();
  2391. ++Line->Level;
  2392. parseStructuralElement();
  2393. --Line->Level;
  2394. }
  2395. if (Style.RemoveBracesLLVM)
  2396. NestedTooDeep.pop_back();
  2397. // FIXME: Add error handling.
  2398. if (!FormatTok->Tok.is(tok::kw_while)) {
  2399. addUnwrappedLine();
  2400. return;
  2401. }
  2402. // If in Whitesmiths mode, the line with the while() needs to be indented
  2403. // to the same level as the block.
  2404. if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
  2405. ++Line->Level;
  2406. nextToken();
  2407. parseStructuralElement();
  2408. }
  2409. void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
  2410. nextToken();
  2411. unsigned OldLineLevel = Line->Level;
  2412. if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
  2413. --Line->Level;
  2414. if (LeftAlignLabel)
  2415. Line->Level = 0;
  2416. if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
  2417. FormatTok->Tok.is(tok::l_brace)) {
  2418. CompoundStatementIndenter Indenter(this, Line->Level,
  2419. Style.BraceWrapping.AfterCaseLabel,
  2420. Style.BraceWrapping.IndentBraces);
  2421. parseBlock();
  2422. if (FormatTok->Tok.is(tok::kw_break)) {
  2423. if (Style.BraceWrapping.AfterControlStatement ==
  2424. FormatStyle::BWACS_Always) {
  2425. addUnwrappedLine();
  2426. if (!Style.IndentCaseBlocks &&
  2427. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
  2428. ++Line->Level;
  2429. }
  2430. }
  2431. parseStructuralElement();
  2432. }
  2433. addUnwrappedLine();
  2434. } else {
  2435. if (FormatTok->is(tok::semi))
  2436. nextToken();
  2437. addUnwrappedLine();
  2438. }
  2439. Line->Level = OldLineLevel;
  2440. if (FormatTok->isNot(tok::l_brace)) {
  2441. parseStructuralElement();
  2442. addUnwrappedLine();
  2443. }
  2444. }
  2445. void UnwrappedLineParser::parseCaseLabel() {
  2446. assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
  2447. // FIXME: fix handling of complex expressions here.
  2448. do {
  2449. nextToken();
  2450. } while (!eof() && !FormatTok->Tok.is(tok::colon));
  2451. parseLabel();
  2452. }
  2453. void UnwrappedLineParser::parseSwitch() {
  2454. assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
  2455. nextToken();
  2456. if (FormatTok->Tok.is(tok::l_paren))
  2457. parseParens();
  2458. keepAncestorBraces();
  2459. if (FormatTok->Tok.is(tok::l_brace)) {
  2460. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2461. parseBlock();
  2462. addUnwrappedLine();
  2463. } else {
  2464. addUnwrappedLine();
  2465. ++Line->Level;
  2466. parseStructuralElement();
  2467. --Line->Level;
  2468. }
  2469. if (Style.RemoveBracesLLVM)
  2470. NestedTooDeep.pop_back();
  2471. }
  2472. void UnwrappedLineParser::parseAccessSpecifier() {
  2473. FormatToken *AccessSpecifierCandidate = FormatTok;
  2474. nextToken();
  2475. // Understand Qt's slots.
  2476. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
  2477. nextToken();
  2478. // Otherwise, we don't know what it is, and we'd better keep the next token.
  2479. if (FormatTok->Tok.is(tok::colon)) {
  2480. nextToken();
  2481. addUnwrappedLine();
  2482. } else if (!FormatTok->Tok.is(tok::coloncolon) &&
  2483. !std::binary_search(COperatorsFollowingVar.begin(),
  2484. COperatorsFollowingVar.end(),
  2485. FormatTok->Tok.getKind())) {
  2486. // Not a variable name nor namespace name.
  2487. addUnwrappedLine();
  2488. } else if (AccessSpecifierCandidate) {
  2489. // Consider the access specifier to be a C identifier.
  2490. AccessSpecifierCandidate->Tok.setKind(tok::identifier);
  2491. }
  2492. }
  2493. void UnwrappedLineParser::parseConcept() {
  2494. assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
  2495. nextToken();
  2496. if (!FormatTok->Tok.is(tok::identifier))
  2497. return;
  2498. nextToken();
  2499. if (!FormatTok->Tok.is(tok::equal))
  2500. return;
  2501. nextToken();
  2502. if (FormatTok->Tok.is(tok::kw_requires)) {
  2503. nextToken();
  2504. parseRequiresExpression(Line->Level);
  2505. } else {
  2506. parseConstraintExpression(Line->Level);
  2507. }
  2508. }
  2509. void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
  2510. // requires (R range)
  2511. if (FormatTok->Tok.is(tok::l_paren)) {
  2512. parseParens();
  2513. if (Style.IndentRequires && OriginalLevel != Line->Level) {
  2514. addUnwrappedLine();
  2515. --Line->Level;
  2516. }
  2517. }
  2518. if (FormatTok->Tok.is(tok::l_brace)) {
  2519. if (Style.BraceWrapping.AfterFunction)
  2520. addUnwrappedLine();
  2521. FormatTok->setType(TT_FunctionLBrace);
  2522. parseBlock();
  2523. addUnwrappedLine();
  2524. } else {
  2525. parseConstraintExpression(OriginalLevel);
  2526. }
  2527. }
  2528. void UnwrappedLineParser::parseConstraintExpression(
  2529. unsigned int OriginalLevel) {
  2530. // requires Id<T> && Id<T> || Id<T>
  2531. while (
  2532. FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
  2533. nextToken();
  2534. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
  2535. tok::greater, tok::comma, tok::ellipsis)) {
  2536. if (FormatTok->Tok.is(tok::less)) {
  2537. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  2538. /*ClosingBraceKind=*/tok::greater);
  2539. continue;
  2540. }
  2541. nextToken();
  2542. }
  2543. if (FormatTok->Tok.is(tok::kw_requires)) {
  2544. parseRequiresExpression(OriginalLevel);
  2545. }
  2546. if (FormatTok->Tok.is(tok::less)) {
  2547. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  2548. /*ClosingBraceKind=*/tok::greater);
  2549. }
  2550. if (FormatTok->Tok.is(tok::l_paren)) {
  2551. parseParens();
  2552. }
  2553. if (FormatTok->Tok.is(tok::l_brace)) {
  2554. if (Style.BraceWrapping.AfterFunction)
  2555. addUnwrappedLine();
  2556. FormatTok->setType(TT_FunctionLBrace);
  2557. parseBlock();
  2558. }
  2559. if (FormatTok->Tok.is(tok::semi)) {
  2560. // Eat any trailing semi.
  2561. nextToken();
  2562. addUnwrappedLine();
  2563. }
  2564. if (FormatTok->Tok.is(tok::colon)) {
  2565. return;
  2566. }
  2567. if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
  2568. if (FormatTok->Previous &&
  2569. !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
  2570. tok::coloncolon)) {
  2571. addUnwrappedLine();
  2572. }
  2573. if (Style.IndentRequires && OriginalLevel != Line->Level) {
  2574. --Line->Level;
  2575. }
  2576. break;
  2577. } else {
  2578. FormatTok->setType(TT_ConstraintJunctions);
  2579. }
  2580. nextToken();
  2581. }
  2582. }
  2583. void UnwrappedLineParser::parseRequires() {
  2584. assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
  2585. unsigned OriginalLevel = Line->Level;
  2586. if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
  2587. addUnwrappedLine();
  2588. if (Style.IndentRequires) {
  2589. ++Line->Level;
  2590. }
  2591. }
  2592. nextToken();
  2593. parseRequiresExpression(OriginalLevel);
  2594. }
  2595. bool UnwrappedLineParser::parseEnum() {
  2596. const FormatToken &InitialToken = *FormatTok;
  2597. // Won't be 'enum' for NS_ENUMs.
  2598. if (FormatTok->Tok.is(tok::kw_enum))
  2599. nextToken();
  2600. // In TypeScript, "enum" can also be used as property name, e.g. in interface
  2601. // declarations. An "enum" keyword followed by a colon would be a syntax
  2602. // error and thus assume it is just an identifier.
  2603. if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
  2604. return false;
  2605. // In protobuf, "enum" can be used as a field name.
  2606. if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
  2607. return false;
  2608. // Eat up enum class ...
  2609. if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
  2610. nextToken();
  2611. while (FormatTok->Tok.getIdentifierInfo() ||
  2612. FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
  2613. tok::greater, tok::comma, tok::question)) {
  2614. nextToken();
  2615. // We can have macros or attributes in between 'enum' and the enum name.
  2616. if (FormatTok->is(tok::l_paren))
  2617. parseParens();
  2618. if (FormatTok->is(tok::identifier)) {
  2619. nextToken();
  2620. // If there are two identifiers in a row, this is likely an elaborate
  2621. // return type. In Java, this can be "implements", etc.
  2622. if (Style.isCpp() && FormatTok->is(tok::identifier))
  2623. return false;
  2624. }
  2625. }
  2626. // Just a declaration or something is wrong.
  2627. if (FormatTok->isNot(tok::l_brace))
  2628. return true;
  2629. FormatTok->setType(TT_RecordLBrace);
  2630. FormatTok->setBlockKind(BK_Block);
  2631. if (Style.Language == FormatStyle::LK_Java) {
  2632. // Java enums are different.
  2633. parseJavaEnumBody();
  2634. return true;
  2635. }
  2636. if (Style.Language == FormatStyle::LK_Proto) {
  2637. parseBlock(/*MustBeDeclaration=*/true);
  2638. return true;
  2639. }
  2640. if (!Style.AllowShortEnumsOnASingleLine &&
  2641. ShouldBreakBeforeBrace(Style, InitialToken))
  2642. addUnwrappedLine();
  2643. // Parse enum body.
  2644. nextToken();
  2645. if (!Style.AllowShortEnumsOnASingleLine) {
  2646. addUnwrappedLine();
  2647. Line->Level += 1;
  2648. }
  2649. bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
  2650. /*IsEnum=*/true);
  2651. if (!Style.AllowShortEnumsOnASingleLine)
  2652. Line->Level -= 1;
  2653. if (HasError) {
  2654. if (FormatTok->is(tok::semi))
  2655. nextToken();
  2656. addUnwrappedLine();
  2657. }
  2658. return true;
  2659. // There is no addUnwrappedLine() here so that we fall through to parsing a
  2660. // structural element afterwards. Thus, in "enum A {} n, m;",
  2661. // "} n, m;" will end up in one unwrapped line.
  2662. }
  2663. bool UnwrappedLineParser::parseStructLike() {
  2664. // parseRecord falls through and does not yet add an unwrapped line as a
  2665. // record declaration or definition can start a structural element.
  2666. parseRecord();
  2667. // This does not apply to Java, JavaScript and C#.
  2668. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
  2669. Style.isCSharp()) {
  2670. if (FormatTok->is(tok::semi))
  2671. nextToken();
  2672. addUnwrappedLine();
  2673. return true;
  2674. }
  2675. return false;
  2676. }
  2677. namespace {
  2678. // A class used to set and restore the Token position when peeking
  2679. // ahead in the token source.
  2680. class ScopedTokenPosition {
  2681. unsigned StoredPosition;
  2682. FormatTokenSource *Tokens;
  2683. public:
  2684. ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
  2685. assert(Tokens && "Tokens expected to not be null");
  2686. StoredPosition = Tokens->getPosition();
  2687. }
  2688. ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
  2689. };
  2690. } // namespace
  2691. // Look to see if we have [[ by looking ahead, if
  2692. // its not then rewind to the original position.
  2693. bool UnwrappedLineParser::tryToParseSimpleAttribute() {
  2694. ScopedTokenPosition AutoPosition(Tokens);
  2695. FormatToken *Tok = Tokens->getNextToken();
  2696. // We already read the first [ check for the second.
  2697. if (!Tok->is(tok::l_square)) {
  2698. return false;
  2699. }
  2700. // Double check that the attribute is just something
  2701. // fairly simple.
  2702. while (Tok->isNot(tok::eof)) {
  2703. if (Tok->is(tok::r_square)) {
  2704. break;
  2705. }
  2706. Tok = Tokens->getNextToken();
  2707. }
  2708. if (Tok->is(tok::eof))
  2709. return false;
  2710. Tok = Tokens->getNextToken();
  2711. if (!Tok->is(tok::r_square)) {
  2712. return false;
  2713. }
  2714. Tok = Tokens->getNextToken();
  2715. if (Tok->is(tok::semi)) {
  2716. return false;
  2717. }
  2718. return true;
  2719. }
  2720. void UnwrappedLineParser::parseJavaEnumBody() {
  2721. // Determine whether the enum is simple, i.e. does not have a semicolon or
  2722. // constants with class bodies. Simple enums can be formatted like braced
  2723. // lists, contracted to a single line, etc.
  2724. unsigned StoredPosition = Tokens->getPosition();
  2725. bool IsSimple = true;
  2726. FormatToken *Tok = Tokens->getNextToken();
  2727. while (!Tok->is(tok::eof)) {
  2728. if (Tok->is(tok::r_brace))
  2729. break;
  2730. if (Tok->isOneOf(tok::l_brace, tok::semi)) {
  2731. IsSimple = false;
  2732. break;
  2733. }
  2734. // FIXME: This will also mark enums with braces in the arguments to enum
  2735. // constants as "not simple". This is probably fine in practice, though.
  2736. Tok = Tokens->getNextToken();
  2737. }
  2738. FormatTok = Tokens->setPosition(StoredPosition);
  2739. if (IsSimple) {
  2740. nextToken();
  2741. parseBracedList();
  2742. addUnwrappedLine();
  2743. return;
  2744. }
  2745. // Parse the body of a more complex enum.
  2746. // First add a line for everything up to the "{".
  2747. nextToken();
  2748. addUnwrappedLine();
  2749. ++Line->Level;
  2750. // Parse the enum constants.
  2751. while (FormatTok) {
  2752. if (FormatTok->is(tok::l_brace)) {
  2753. // Parse the constant's class body.
  2754. parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
  2755. /*MunchSemi=*/false);
  2756. } else if (FormatTok->is(tok::l_paren)) {
  2757. parseParens();
  2758. } else if (FormatTok->is(tok::comma)) {
  2759. nextToken();
  2760. addUnwrappedLine();
  2761. } else if (FormatTok->is(tok::semi)) {
  2762. nextToken();
  2763. addUnwrappedLine();
  2764. break;
  2765. } else if (FormatTok->is(tok::r_brace)) {
  2766. addUnwrappedLine();
  2767. break;
  2768. } else {
  2769. nextToken();
  2770. }
  2771. }
  2772. // Parse the class body after the enum's ";" if any.
  2773. parseLevel(/*HasOpeningBrace=*/true);
  2774. nextToken();
  2775. --Line->Level;
  2776. addUnwrappedLine();
  2777. }
  2778. void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
  2779. const FormatToken &InitialToken = *FormatTok;
  2780. nextToken();
  2781. // The actual identifier can be a nested name specifier, and in macros
  2782. // it is often token-pasted.
  2783. // An [[attribute]] can be before the identifier.
  2784. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
  2785. tok::kw___attribute, tok::kw___declspec,
  2786. tok::kw_alignas, tok::l_square, tok::r_square) ||
  2787. ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
  2788. FormatTok->isOneOf(tok::period, tok::comma))) {
  2789. if (Style.isJavaScript() &&
  2790. FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
  2791. // JavaScript/TypeScript supports inline object types in
  2792. // extends/implements positions:
  2793. // class Foo implements {bar: number} { }
  2794. nextToken();
  2795. if (FormatTok->is(tok::l_brace)) {
  2796. tryToParseBracedList();
  2797. continue;
  2798. }
  2799. }
  2800. bool IsNonMacroIdentifier =
  2801. FormatTok->is(tok::identifier) &&
  2802. FormatTok->TokenText != FormatTok->TokenText.upper();
  2803. nextToken();
  2804. // We can have macros or attributes in between 'class' and the class name.
  2805. if (!IsNonMacroIdentifier) {
  2806. if (FormatTok->Tok.is(tok::l_paren)) {
  2807. parseParens();
  2808. } else if (FormatTok->is(TT_AttributeSquare)) {
  2809. parseSquare();
  2810. // Consume the closing TT_AttributeSquare.
  2811. if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
  2812. nextToken();
  2813. }
  2814. }
  2815. }
  2816. // Note that parsing away template declarations here leads to incorrectly
  2817. // accepting function declarations as record declarations.
  2818. // In general, we cannot solve this problem. Consider:
  2819. // class A<int> B() {}
  2820. // which can be a function definition or a class definition when B() is a
  2821. // macro. If we find enough real-world cases where this is a problem, we
  2822. // can parse for the 'template' keyword in the beginning of the statement,
  2823. // and thus rule out the record production in case there is no template
  2824. // (this would still leave us with an ambiguity between template function
  2825. // and class declarations).
  2826. if (FormatTok->isOneOf(tok::colon, tok::less)) {
  2827. while (!eof()) {
  2828. if (FormatTok->is(tok::l_brace)) {
  2829. calculateBraceTypes(/*ExpectClassBody=*/true);
  2830. if (!tryToParseBracedList())
  2831. break;
  2832. }
  2833. if (FormatTok->is(tok::l_square)) {
  2834. FormatToken *Previous = FormatTok->Previous;
  2835. if (!Previous ||
  2836. !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
  2837. // Don't try parsing a lambda if we had a closing parenthesis before,
  2838. // it was probably a pointer to an array: int (*)[].
  2839. if (!tryToParseLambda())
  2840. break;
  2841. }
  2842. }
  2843. if (FormatTok->Tok.is(tok::semi))
  2844. return;
  2845. if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
  2846. addUnwrappedLine();
  2847. nextToken();
  2848. parseCSharpGenericTypeConstraint();
  2849. break;
  2850. }
  2851. nextToken();
  2852. }
  2853. }
  2854. if (FormatTok->Tok.is(tok::l_brace)) {
  2855. FormatTok->setType(TT_RecordLBrace);
  2856. if (ParseAsExpr) {
  2857. parseChildBlock();
  2858. } else {
  2859. if (ShouldBreakBeforeBrace(Style, InitialToken))
  2860. addUnwrappedLine();
  2861. unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
  2862. parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
  2863. }
  2864. }
  2865. // There is no addUnwrappedLine() here so that we fall through to parsing a
  2866. // structural element afterwards. Thus, in "class A {} n, m;",
  2867. // "} n, m;" will end up in one unwrapped line.
  2868. }
  2869. void UnwrappedLineParser::parseObjCMethod() {
  2870. assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
  2871. "'(' or identifier expected.");
  2872. do {
  2873. if (FormatTok->Tok.is(tok::semi)) {
  2874. nextToken();
  2875. addUnwrappedLine();
  2876. return;
  2877. } else if (FormatTok->Tok.is(tok::l_brace)) {
  2878. if (Style.BraceWrapping.AfterFunction)
  2879. addUnwrappedLine();
  2880. parseBlock();
  2881. addUnwrappedLine();
  2882. return;
  2883. } else {
  2884. nextToken();
  2885. }
  2886. } while (!eof());
  2887. }
  2888. void UnwrappedLineParser::parseObjCProtocolList() {
  2889. assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
  2890. do {
  2891. nextToken();
  2892. // Early exit in case someone forgot a close angle.
  2893. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  2894. FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
  2895. return;
  2896. } while (!eof() && FormatTok->Tok.isNot(tok::greater));
  2897. nextToken(); // Skip '>'.
  2898. }
  2899. void UnwrappedLineParser::parseObjCUntilAtEnd() {
  2900. do {
  2901. if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
  2902. nextToken();
  2903. addUnwrappedLine();
  2904. break;
  2905. }
  2906. if (FormatTok->is(tok::l_brace)) {
  2907. parseBlock();
  2908. // In ObjC interfaces, nothing should be following the "}".
  2909. addUnwrappedLine();
  2910. } else if (FormatTok->is(tok::r_brace)) {
  2911. // Ignore stray "}". parseStructuralElement doesn't consume them.
  2912. nextToken();
  2913. addUnwrappedLine();
  2914. } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
  2915. nextToken();
  2916. parseObjCMethod();
  2917. } else {
  2918. parseStructuralElement();
  2919. }
  2920. } while (!eof());
  2921. }
  2922. void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
  2923. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
  2924. FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
  2925. nextToken();
  2926. nextToken(); // interface name
  2927. // @interface can be followed by a lightweight generic
  2928. // specialization list, then either a base class or a category.
  2929. if (FormatTok->Tok.is(tok::less)) {
  2930. parseObjCLightweightGenerics();
  2931. }
  2932. if (FormatTok->Tok.is(tok::colon)) {
  2933. nextToken();
  2934. nextToken(); // base class name
  2935. // The base class can also have lightweight generics applied to it.
  2936. if (FormatTok->Tok.is(tok::less)) {
  2937. parseObjCLightweightGenerics();
  2938. }
  2939. } else if (FormatTok->Tok.is(tok::l_paren))
  2940. // Skip category, if present.
  2941. parseParens();
  2942. if (FormatTok->Tok.is(tok::less))
  2943. parseObjCProtocolList();
  2944. if (FormatTok->Tok.is(tok::l_brace)) {
  2945. if (Style.BraceWrapping.AfterObjCDeclaration)
  2946. addUnwrappedLine();
  2947. parseBlock(/*MustBeDeclaration=*/true);
  2948. }
  2949. // With instance variables, this puts '}' on its own line. Without instance
  2950. // variables, this ends the @interface line.
  2951. addUnwrappedLine();
  2952. parseObjCUntilAtEnd();
  2953. }
  2954. void UnwrappedLineParser::parseObjCLightweightGenerics() {
  2955. assert(FormatTok->Tok.is(tok::less));
  2956. // Unlike protocol lists, generic parameterizations support
  2957. // nested angles:
  2958. //
  2959. // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
  2960. // NSObject <NSCopying, NSSecureCoding>
  2961. //
  2962. // so we need to count how many open angles we have left.
  2963. unsigned NumOpenAngles = 1;
  2964. do {
  2965. nextToken();
  2966. // Early exit in case someone forgot a close angle.
  2967. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  2968. FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
  2969. break;
  2970. if (FormatTok->Tok.is(tok::less))
  2971. ++NumOpenAngles;
  2972. else if (FormatTok->Tok.is(tok::greater)) {
  2973. assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
  2974. --NumOpenAngles;
  2975. }
  2976. } while (!eof() && NumOpenAngles != 0);
  2977. nextToken(); // Skip '>'.
  2978. }
  2979. // Returns true for the declaration/definition form of @protocol,
  2980. // false for the expression form.
  2981. bool UnwrappedLineParser::parseObjCProtocol() {
  2982. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
  2983. nextToken();
  2984. if (FormatTok->is(tok::l_paren))
  2985. // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
  2986. return false;
  2987. // The definition/declaration form,
  2988. // @protocol Foo
  2989. // - (int)someMethod;
  2990. // @end
  2991. nextToken(); // protocol name
  2992. if (FormatTok->Tok.is(tok::less))
  2993. parseObjCProtocolList();
  2994. // Check for protocol declaration.
  2995. if (FormatTok->Tok.is(tok::semi)) {
  2996. nextToken();
  2997. addUnwrappedLine();
  2998. return true;
  2999. }
  3000. addUnwrappedLine();
  3001. parseObjCUntilAtEnd();
  3002. return true;
  3003. }
  3004. void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
  3005. bool IsImport = FormatTok->is(Keywords.kw_import);
  3006. assert(IsImport || FormatTok->is(tok::kw_export));
  3007. nextToken();
  3008. // Consume the "default" in "export default class/function".
  3009. if (FormatTok->is(tok::kw_default))
  3010. nextToken();
  3011. // Consume "async function", "function" and "default function", so that these
  3012. // get parsed as free-standing JS functions, i.e. do not require a trailing
  3013. // semicolon.
  3014. if (FormatTok->is(Keywords.kw_async))
  3015. nextToken();
  3016. if (FormatTok->is(Keywords.kw_function)) {
  3017. nextToken();
  3018. return;
  3019. }
  3020. // For imports, `export *`, `export {...}`, consume the rest of the line up
  3021. // to the terminating `;`. For everything else, just return and continue
  3022. // parsing the structural element, i.e. the declaration or expression for
  3023. // `export default`.
  3024. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
  3025. !FormatTok->isStringLiteral())
  3026. return;
  3027. while (!eof()) {
  3028. if (FormatTok->is(tok::semi))
  3029. return;
  3030. if (Line->Tokens.empty()) {
  3031. // Common issue: Automatic Semicolon Insertion wrapped the line, so the
  3032. // import statement should terminate.
  3033. return;
  3034. }
  3035. if (FormatTok->is(tok::l_brace)) {
  3036. FormatTok->setBlockKind(BK_Block);
  3037. nextToken();
  3038. parseBracedList();
  3039. } else {
  3040. nextToken();
  3041. }
  3042. }
  3043. }
  3044. void UnwrappedLineParser::parseStatementMacro() {
  3045. nextToken();
  3046. if (FormatTok->is(tok::l_paren))
  3047. parseParens();
  3048. if (FormatTok->is(tok::semi))
  3049. nextToken();
  3050. addUnwrappedLine();
  3051. }
  3052. LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
  3053. StringRef Prefix = "") {
  3054. llvm::dbgs() << Prefix << "Line(" << Line.Level
  3055. << ", FSC=" << Line.FirstStartColumn << ")"
  3056. << (Line.InPPDirective ? " MACRO" : "") << ": ";
  3057. for (const auto &Node : Line.Tokens) {
  3058. llvm::dbgs() << Node.Tok->Tok.getName() << "["
  3059. << "T=" << static_cast<unsigned>(Node.Tok->getType())
  3060. << ", OC=" << Node.Tok->OriginalColumn << "] ";
  3061. }
  3062. for (const auto &Node : Line.Tokens)
  3063. for (const auto &ChildNode : Node.Children)
  3064. printDebugInfo(ChildNode, "\nChild: ");
  3065. llvm::dbgs() << "\n";
  3066. }
  3067. void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
  3068. if (Line->Tokens.empty())
  3069. return;
  3070. LLVM_DEBUG({
  3071. if (CurrentLines == &Lines)
  3072. printDebugInfo(*Line);
  3073. });
  3074. // If this line closes a block when in Whitesmiths mode, remember that
  3075. // information so that the level can be decreased after the line is added.
  3076. // This has to happen after the addition of the line since the line itself
  3077. // needs to be indented.
  3078. bool ClosesWhitesmithsBlock =
  3079. Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
  3080. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
  3081. CurrentLines->push_back(std::move(*Line));
  3082. Line->Tokens.clear();
  3083. Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
  3084. Line->FirstStartColumn = 0;
  3085. if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
  3086. --Line->Level;
  3087. if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
  3088. CurrentLines->append(
  3089. std::make_move_iterator(PreprocessorDirectives.begin()),
  3090. std::make_move_iterator(PreprocessorDirectives.end()));
  3091. PreprocessorDirectives.clear();
  3092. }
  3093. // Disconnect the current token from the last token on the previous line.
  3094. FormatTok->Previous = nullptr;
  3095. }
  3096. bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
  3097. bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
  3098. return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
  3099. FormatTok.NewlinesBefore > 0;
  3100. }
  3101. // Checks if \p FormatTok is a line comment that continues the line comment
  3102. // section on \p Line.
  3103. static bool
  3104. continuesLineCommentSection(const FormatToken &FormatTok,
  3105. const UnwrappedLine &Line,
  3106. const llvm::Regex &CommentPragmasRegex) {
  3107. if (Line.Tokens.empty())
  3108. return false;
  3109. StringRef IndentContent = FormatTok.TokenText;
  3110. if (FormatTok.TokenText.startswith("//") ||
  3111. FormatTok.TokenText.startswith("/*"))
  3112. IndentContent = FormatTok.TokenText.substr(2);
  3113. if (CommentPragmasRegex.match(IndentContent))
  3114. return false;
  3115. // If Line starts with a line comment, then FormatTok continues the comment
  3116. // section if its original column is greater or equal to the original start
  3117. // column of the line.
  3118. //
  3119. // Define the min column token of a line as follows: if a line ends in '{' or
  3120. // contains a '{' followed by a line comment, then the min column token is
  3121. // that '{'. Otherwise, the min column token of the line is the first token of
  3122. // the line.
  3123. //
  3124. // If Line starts with a token other than a line comment, then FormatTok
  3125. // continues the comment section if its original column is greater than the
  3126. // original start column of the min column token of the line.
  3127. //
  3128. // For example, the second line comment continues the first in these cases:
  3129. //
  3130. // // first line
  3131. // // second line
  3132. //
  3133. // and:
  3134. //
  3135. // // first line
  3136. // // second line
  3137. //
  3138. // and:
  3139. //
  3140. // int i; // first line
  3141. // // second line
  3142. //
  3143. // and:
  3144. //
  3145. // do { // first line
  3146. // // second line
  3147. // int i;
  3148. // } while (true);
  3149. //
  3150. // and:
  3151. //
  3152. // enum {
  3153. // a, // first line
  3154. // // second line
  3155. // b
  3156. // };
  3157. //
  3158. // The second line comment doesn't continue the first in these cases:
  3159. //
  3160. // // first line
  3161. // // second line
  3162. //
  3163. // and:
  3164. //
  3165. // int i; // first line
  3166. // // second line
  3167. //
  3168. // and:
  3169. //
  3170. // do { // first line
  3171. // // second line
  3172. // int i;
  3173. // } while (true);
  3174. //
  3175. // and:
  3176. //
  3177. // enum {
  3178. // a, // first line
  3179. // // second line
  3180. // };
  3181. const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
  3182. // Scan for '{//'. If found, use the column of '{' as a min column for line
  3183. // comment section continuation.
  3184. const FormatToken *PreviousToken = nullptr;
  3185. for (const UnwrappedLineNode &Node : Line.Tokens) {
  3186. if (PreviousToken && PreviousToken->is(tok::l_brace) &&
  3187. isLineComment(*Node.Tok)) {
  3188. MinColumnToken = PreviousToken;
  3189. break;
  3190. }
  3191. PreviousToken = Node.Tok;
  3192. // Grab the last newline preceding a token in this unwrapped line.
  3193. if (Node.Tok->NewlinesBefore > 0) {
  3194. MinColumnToken = Node.Tok;
  3195. }
  3196. }
  3197. if (PreviousToken && PreviousToken->is(tok::l_brace)) {
  3198. MinColumnToken = PreviousToken;
  3199. }
  3200. return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
  3201. MinColumnToken);
  3202. }
  3203. void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
  3204. bool JustComments = Line->Tokens.empty();
  3205. for (FormatToken *Tok : CommentsBeforeNextToken) {
  3206. // Line comments that belong to the same line comment section are put on the
  3207. // same line since later we might want to reflow content between them.
  3208. // Additional fine-grained breaking of line comment sections is controlled
  3209. // by the class BreakableLineCommentSection in case it is desirable to keep
  3210. // several line comment sections in the same unwrapped line.
  3211. //
  3212. // FIXME: Consider putting separate line comment sections as children to the
  3213. // unwrapped line instead.
  3214. Tok->ContinuesLineCommentSection =
  3215. continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
  3216. if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
  3217. addUnwrappedLine();
  3218. pushToken(Tok);
  3219. }
  3220. if (NewlineBeforeNext && JustComments)
  3221. addUnwrappedLine();
  3222. CommentsBeforeNextToken.clear();
  3223. }
  3224. void UnwrappedLineParser::nextToken(int LevelDifference) {
  3225. if (eof())
  3226. return;
  3227. flushComments(isOnNewLine(*FormatTok));
  3228. pushToken(FormatTok);
  3229. FormatToken *Previous = FormatTok;
  3230. if (!Style.isJavaScript())
  3231. readToken(LevelDifference);
  3232. else
  3233. readTokenWithJavaScriptASI();
  3234. FormatTok->Previous = Previous;
  3235. }
  3236. void UnwrappedLineParser::distributeComments(
  3237. const SmallVectorImpl<FormatToken *> &Comments,
  3238. const FormatToken *NextTok) {
  3239. // Whether or not a line comment token continues a line is controlled by
  3240. // the method continuesLineCommentSection, with the following caveat:
  3241. //
  3242. // Define a trail of Comments to be a nonempty proper postfix of Comments such
  3243. // that each comment line from the trail is aligned with the next token, if
  3244. // the next token exists. If a trail exists, the beginning of the maximal
  3245. // trail is marked as a start of a new comment section.
  3246. //
  3247. // For example in this code:
  3248. //
  3249. // int a; // line about a
  3250. // // line 1 about b
  3251. // // line 2 about b
  3252. // int b;
  3253. //
  3254. // the two lines about b form a maximal trail, so there are two sections, the
  3255. // first one consisting of the single comment "// line about a" and the
  3256. // second one consisting of the next two comments.
  3257. if (Comments.empty())
  3258. return;
  3259. bool ShouldPushCommentsInCurrentLine = true;
  3260. bool HasTrailAlignedWithNextToken = false;
  3261. unsigned StartOfTrailAlignedWithNextToken = 0;
  3262. if (NextTok) {
  3263. // We are skipping the first element intentionally.
  3264. for (unsigned i = Comments.size() - 1; i > 0; --i) {
  3265. if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
  3266. HasTrailAlignedWithNextToken = true;
  3267. StartOfTrailAlignedWithNextToken = i;
  3268. }
  3269. }
  3270. }
  3271. for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
  3272. FormatToken *FormatTok = Comments[i];
  3273. if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
  3274. FormatTok->ContinuesLineCommentSection = false;
  3275. } else {
  3276. FormatTok->ContinuesLineCommentSection =
  3277. continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
  3278. }
  3279. if (!FormatTok->ContinuesLineCommentSection &&
  3280. (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
  3281. ShouldPushCommentsInCurrentLine = false;
  3282. }
  3283. if (ShouldPushCommentsInCurrentLine) {
  3284. pushToken(FormatTok);
  3285. } else {
  3286. CommentsBeforeNextToken.push_back(FormatTok);
  3287. }
  3288. }
  3289. }
  3290. void UnwrappedLineParser::readToken(int LevelDifference) {
  3291. SmallVector<FormatToken *, 1> Comments;
  3292. do {
  3293. FormatTok = Tokens->getNextToken();
  3294. assert(FormatTok);
  3295. while (FormatTok->getType() == TT_ConflictStart ||
  3296. FormatTok->getType() == TT_ConflictEnd ||
  3297. FormatTok->getType() == TT_ConflictAlternative) {
  3298. if (FormatTok->getType() == TT_ConflictStart) {
  3299. conditionalCompilationStart(/*Unreachable=*/false);
  3300. } else if (FormatTok->getType() == TT_ConflictAlternative) {
  3301. conditionalCompilationAlternative();
  3302. } else if (FormatTok->getType() == TT_ConflictEnd) {
  3303. conditionalCompilationEnd();
  3304. }
  3305. FormatTok = Tokens->getNextToken();
  3306. FormatTok->MustBreakBefore = true;
  3307. }
  3308. while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
  3309. (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
  3310. distributeComments(Comments, FormatTok);
  3311. Comments.clear();
  3312. // If there is an unfinished unwrapped line, we flush the preprocessor
  3313. // directives only after that unwrapped line was finished later.
  3314. bool SwitchToPreprocessorLines = !Line->Tokens.empty();
  3315. ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
  3316. assert((LevelDifference >= 0 ||
  3317. static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
  3318. "LevelDifference makes Line->Level negative");
  3319. Line->Level += LevelDifference;
  3320. // Comments stored before the preprocessor directive need to be output
  3321. // before the preprocessor directive, at the same level as the
  3322. // preprocessor directive, as we consider them to apply to the directive.
  3323. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
  3324. PPBranchLevel > 0)
  3325. Line->Level += PPBranchLevel;
  3326. flushComments(isOnNewLine(*FormatTok));
  3327. parsePPDirective();
  3328. }
  3329. if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
  3330. !Line->InPPDirective) {
  3331. continue;
  3332. }
  3333. if (!FormatTok->Tok.is(tok::comment)) {
  3334. distributeComments(Comments, FormatTok);
  3335. Comments.clear();
  3336. return;
  3337. }
  3338. Comments.push_back(FormatTok);
  3339. } while (!eof());
  3340. distributeComments(Comments, nullptr);
  3341. Comments.clear();
  3342. }
  3343. void UnwrappedLineParser::pushToken(FormatToken *Tok) {
  3344. Line->Tokens.push_back(UnwrappedLineNode(Tok));
  3345. if (MustBreakBeforeNextToken) {
  3346. Line->Tokens.back().Tok->MustBreakBefore = true;
  3347. MustBreakBeforeNextToken = false;
  3348. }
  3349. }
  3350. } // end namespace format
  3351. } // end namespace clang