UnwrappedLineParser.cpp 145 KB


  1. //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the implementation of the UnwrappedLineParser,
  11. /// which turns a stream of tokens into UnwrappedLines.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "UnwrappedLineParser.h"
  15. #include "FormatToken.h"
  16. #include "TokenAnnotator.h"
  17. #include "clang/Basic/TokenKinds.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/Support/Debug.h"
  20. #include "llvm/Support/raw_ostream.h"
  21. #include <algorithm>
  22. #include <utility>
  23. #define DEBUG_TYPE "format-parser"
  24. namespace clang {
  25. namespace format {
  26. class FormatTokenSource {
  27. public:
  28. virtual ~FormatTokenSource() {}
  29. // Returns the next token in the token stream.
  30. virtual FormatToken *getNextToken() = 0;
  31. // Returns the token preceding the token returned by the last call to
  32. // getNextToken() in the token stream, or nullptr if no such token exists.
  33. virtual FormatToken *getPreviousToken() = 0;
  34. // Returns the token that would be returned by the next call to
  35. // getNextToken().
  36. virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
  37. // Returns whether we are at the end of the file.
  38. // This can be different from whether getNextToken() returned an eof token
  39. // when the FormatTokenSource is a view on a part of the token stream.
  40. virtual bool isEOF() = 0;
  41. // Gets the current position in the token stream, to be used by setPosition().
  42. virtual unsigned getPosition() = 0;
  43. // Resets the token stream to the state it was in when getPosition() returned
  44. // Position, and return the token at that position in the stream.
  45. virtual FormatToken *setPosition(unsigned Position) = 0;
  46. };
  47. namespace {
  48. void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
  49. StringRef Prefix = "", bool PrintText = false) {
  50. OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
  51. << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
  52. bool NewLine = false;
  53. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  54. E = Line.Tokens.end();
  55. I != E; ++I) {
  56. if (NewLine) {
  57. OS << Prefix;
  58. NewLine = false;
  59. }
  60. OS << I->Tok->Tok.getName() << "["
  61. << "T=" << (unsigned)I->Tok->getType()
  62. << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
  63. << "\"] ";
  64. for (SmallVectorImpl<UnwrappedLine>::const_iterator
  65. CI = I->Children.begin(),
  66. CE = I->Children.end();
  67. CI != CE; ++CI) {
  68. OS << "\n";
  69. printLine(OS, *CI, (Prefix + " ").str());
  70. NewLine = true;
  71. }
  72. }
  73. if (!NewLine)
  74. OS << "\n";
  75. }
  76. LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
  77. printLine(llvm::dbgs(), Line);
  78. }
  79. class ScopedDeclarationState {
  80. public:
  81. ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
  82. bool MustBeDeclaration)
  83. : Line(Line), Stack(Stack) {
  84. Line.MustBeDeclaration = MustBeDeclaration;
  85. Stack.push_back(MustBeDeclaration);
  86. }
  87. ~ScopedDeclarationState() {
  88. Stack.pop_back();
  89. if (!Stack.empty())
  90. Line.MustBeDeclaration = Stack.back();
  91. else
  92. Line.MustBeDeclaration = true;
  93. }
  94. private:
  95. UnwrappedLine &Line;
  96. llvm::BitVector &Stack;
  97. };
  98. static bool isLineComment(const FormatToken &FormatTok) {
  99. return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  100. }
  101. // Checks if \p FormatTok is a line comment that continues the line comment
  102. // \p Previous. The original column of \p MinColumnToken is used to determine
  103. // whether \p FormatTok is indented enough to the right to continue \p Previous.
  104. static bool continuesLineComment(const FormatToken &FormatTok,
  105. const FormatToken *Previous,
  106. const FormatToken *MinColumnToken) {
  107. if (!Previous || !MinColumnToken)
  108. return false;
  109. unsigned MinContinueColumn =
  110. MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  111. return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  112. isLineComment(*Previous) &&
  113. FormatTok.OriginalColumn >= MinContinueColumn;
  114. }
  115. class ScopedMacroState : public FormatTokenSource {
  116. public:
  117. ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  118. FormatToken *&ResetToken)
  119. : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  120. PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  121. Token(nullptr), PreviousToken(nullptr) {
  122. FakeEOF.Tok.startToken();
  123. FakeEOF.Tok.setKind(tok::eof);
  124. TokenSource = this;
  125. Line.Level = 0;
  126. Line.InPPDirective = true;
  127. // InMacroBody gets set after the `#define x` part.
  128. }
  129. ~ScopedMacroState() override {
  130. TokenSource = PreviousTokenSource;
  131. ResetToken = Token;
  132. Line.InPPDirective = false;
  133. Line.InMacroBody = false;
  134. Line.Level = PreviousLineLevel;
  135. }
  136. FormatToken *getNextToken() override {
  137. // The \c UnwrappedLineParser guards against this by never calling
  138. // \c getNextToken() after it has encountered the first eof token.
  139. assert(!eof());
  140. PreviousToken = Token;
  141. Token = PreviousTokenSource->getNextToken();
  142. if (eof())
  143. return &FakeEOF;
  144. return Token;
  145. }
  146. FormatToken *getPreviousToken() override {
  147. return PreviousTokenSource->getPreviousToken();
  148. }
  149. FormatToken *peekNextToken(bool SkipComment) override {
  150. if (eof())
  151. return &FakeEOF;
  152. return PreviousTokenSource->peekNextToken(SkipComment);
  153. }
  154. bool isEOF() override { return PreviousTokenSource->isEOF(); }
  155. unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  156. FormatToken *setPosition(unsigned Position) override {
  157. PreviousToken = nullptr;
  158. Token = PreviousTokenSource->setPosition(Position);
  159. return Token;
  160. }
  161. private:
  162. bool eof() {
  163. return Token && Token->HasUnescapedNewline &&
  164. !continuesLineComment(*Token, PreviousToken,
  165. /*MinColumnToken=*/PreviousToken);
  166. }
  167. FormatToken FakeEOF;
  168. UnwrappedLine &Line;
  169. FormatTokenSource *&TokenSource;
  170. FormatToken *&ResetToken;
  171. unsigned PreviousLineLevel;
  172. FormatTokenSource *PreviousTokenSource;
  173. FormatToken *Token;
  174. FormatToken *PreviousToken;
  175. };
  176. } // end anonymous namespace
  177. class ScopedLineState {
  178. public:
  179. ScopedLineState(UnwrappedLineParser &Parser,
  180. bool SwitchToPreprocessorLines = false)
  181. : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  182. if (SwitchToPreprocessorLines)
  183. Parser.CurrentLines = &Parser.PreprocessorDirectives;
  184. else if (!Parser.Line->Tokens.empty())
  185. Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
  186. PreBlockLine = std::move(Parser.Line);
  187. Parser.Line = std::make_unique<UnwrappedLine>();
  188. Parser.Line->Level = PreBlockLine->Level;
  189. Parser.Line->PPLevel = PreBlockLine->PPLevel;
  190. Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
  191. Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
  192. }
  193. ~ScopedLineState() {
  194. if (!Parser.Line->Tokens.empty())
  195. Parser.addUnwrappedLine();
  196. assert(Parser.Line->Tokens.empty());
  197. Parser.Line = std::move(PreBlockLine);
  198. if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
  199. Parser.MustBreakBeforeNextToken = true;
  200. Parser.CurrentLines = OriginalLines;
  201. }
  202. private:
  203. UnwrappedLineParser &Parser;
  204. std::unique_ptr<UnwrappedLine> PreBlockLine;
  205. SmallVectorImpl<UnwrappedLine> *OriginalLines;
  206. };
  207. class CompoundStatementIndenter {
  208. public:
  209. CompoundStatementIndenter(UnwrappedLineParser *Parser,
  210. const FormatStyle &Style, unsigned &LineLevel)
  211. : CompoundStatementIndenter(Parser, LineLevel,
  212. Style.BraceWrapping.AfterControlStatement,
  213. Style.BraceWrapping.IndentBraces) {}
  214. CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
  215. bool WrapBrace, bool IndentBrace)
  216. : LineLevel(LineLevel), OldLineLevel(LineLevel) {
  217. if (WrapBrace)
  218. Parser->addUnwrappedLine();
  219. if (IndentBrace)
  220. ++LineLevel;
  221. }
  222. ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
  223. private:
  224. unsigned &LineLevel;
  225. unsigned OldLineLevel;
  226. };
  227. namespace {
  228. class IndexedTokenSource : public FormatTokenSource {
  229. public:
  230. IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
  231. : Tokens(Tokens), Position(-1) {}
  232. FormatToken *getNextToken() override {
  233. if (Position >= 0 && isEOF()) {
  234. LLVM_DEBUG({
  235. llvm::dbgs() << "Next ";
  236. dbgToken(Position);
  237. });
  238. return Tokens[Position];
  239. }
  240. ++Position;
  241. LLVM_DEBUG({
  242. llvm::dbgs() << "Next ";
  243. dbgToken(Position);
  244. });
  245. return Tokens[Position];
  246. }
  247. FormatToken *getPreviousToken() override {
  248. return Position > 0 ? Tokens[Position - 1] : nullptr;
  249. }
  250. FormatToken *peekNextToken(bool SkipComment) override {
  251. int Next = Position + 1;
  252. if (SkipComment)
  253. while (Tokens[Next]->is(tok::comment))
  254. ++Next;
  255. LLVM_DEBUG({
  256. llvm::dbgs() << "Peeking ";
  257. dbgToken(Next);
  258. });
  259. return Tokens[Next];
  260. }
  261. bool isEOF() override { return Tokens[Position]->is(tok::eof); }
  262. unsigned getPosition() override {
  263. LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
  264. assert(Position >= 0);
  265. return Position;
  266. }
  267. FormatToken *setPosition(unsigned P) override {
  268. LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
  269. Position = P;
  270. return Tokens[Position];
  271. }
  272. void reset() { Position = -1; }
  273. private:
  274. void dbgToken(int Position, llvm::StringRef Indent = "") {
  275. FormatToken *Tok = Tokens[Position];
  276. llvm::dbgs() << Indent << "[" << Position
  277. << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
  278. << ", Macro: " << !!Tok->MacroCtx << "\n";
  279. }
  280. ArrayRef<FormatToken *> Tokens;
  281. int Position;
  282. };
  283. } // end anonymous namespace
  284. UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
  285. const AdditionalKeywords &Keywords,
  286. unsigned FirstStartColumn,
  287. ArrayRef<FormatToken *> Tokens,
  288. UnwrappedLineConsumer &Callback)
  289. : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
  290. CurrentLines(&Lines), Style(Style), Keywords(Keywords),
  291. CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
  292. Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
  293. IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
  294. ? IG_Rejected
  295. : IG_Inited),
  296. IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
  297. void UnwrappedLineParser::reset() {
  298. PPBranchLevel = -1;
  299. IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
  300. ? IG_Rejected
  301. : IG_Inited;
  302. IncludeGuardToken = nullptr;
  303. Line.reset(new UnwrappedLine);
  304. CommentsBeforeNextToken.clear();
  305. FormatTok = nullptr;
  306. MustBreakBeforeNextToken = false;
  307. PreprocessorDirectives.clear();
  308. CurrentLines = &Lines;
  309. DeclarationScopeStack.clear();
  310. NestedTooDeep.clear();
  311. PPStack.clear();
  312. Line->FirstStartColumn = FirstStartColumn;
  313. }
  314. void UnwrappedLineParser::parse() {
  315. IndexedTokenSource TokenSource(AllTokens);
  316. Line->FirstStartColumn = FirstStartColumn;
  317. do {
  318. LLVM_DEBUG(llvm::dbgs() << "----\n");
  319. reset();
  320. Tokens = &TokenSource;
  321. TokenSource.reset();
  322. readToken();
  323. parseFile();
  324. // If we found an include guard then all preprocessor directives (other than
  325. // the guard) are over-indented by one.
  326. if (IncludeGuard == IG_Found) {
  327. for (auto &Line : Lines)
  328. if (Line.InPPDirective && Line.Level > 0)
  329. --Line.Level;
  330. }
  331. // Create line with eof token.
  332. pushToken(FormatTok);
  333. addUnwrappedLine();
  334. for (const UnwrappedLine &Line : Lines)
  335. Callback.consumeUnwrappedLine(Line);
  336. Callback.finishRun();
  337. Lines.clear();
  338. while (!PPLevelBranchIndex.empty() &&
  339. PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
  340. PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
  341. PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
  342. }
  343. if (!PPLevelBranchIndex.empty()) {
  344. ++PPLevelBranchIndex.back();
  345. assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
  346. assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
  347. }
  348. } while (!PPLevelBranchIndex.empty());
  349. }
  350. void UnwrappedLineParser::parseFile() {
  351. // The top-level context in a file always has declarations, except for pre-
  352. // processor directives and JavaScript files.
  353. bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
  354. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  355. MustBeDeclaration);
  356. if (Style.Language == FormatStyle::LK_TextProto)
  357. parseBracedList();
  358. else
  359. parseLevel();
  360. // Make sure to format the remaining tokens.
  361. //
  362. // LK_TextProto is special since its top-level is parsed as the body of a
  363. // braced list, which does not necessarily have natural line separators such
  364. // as a semicolon. Comments after the last entry that have been determined to
  365. // not belong to that line, as in:
  366. // key: value
  367. // // endfile comment
  368. // do not have a chance to be put on a line of their own until this point.
  369. // Here we add this newline before end-of-file comments.
  370. if (Style.Language == FormatStyle::LK_TextProto &&
  371. !CommentsBeforeNextToken.empty()) {
  372. addUnwrappedLine();
  373. }
  374. flushComments(true);
  375. addUnwrappedLine();
  376. }
  377. void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
  378. do {
  379. switch (FormatTok->Tok.getKind()) {
  380. case tok::l_brace:
  381. return;
  382. default:
  383. if (FormatTok->is(Keywords.kw_where)) {
  384. addUnwrappedLine();
  385. nextToken();
  386. parseCSharpGenericTypeConstraint();
  387. break;
  388. }
  389. nextToken();
  390. break;
  391. }
  392. } while (!eof());
  393. }
  394. void UnwrappedLineParser::parseCSharpAttribute() {
  395. int UnpairedSquareBrackets = 1;
  396. do {
  397. switch (FormatTok->Tok.getKind()) {
  398. case tok::r_square:
  399. nextToken();
  400. --UnpairedSquareBrackets;
  401. if (UnpairedSquareBrackets == 0) {
  402. addUnwrappedLine();
  403. return;
  404. }
  405. break;
  406. case tok::l_square:
  407. ++UnpairedSquareBrackets;
  408. nextToken();
  409. break;
  410. default:
  411. nextToken();
  412. break;
  413. }
  414. } while (!eof());
  415. }
  416. bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
  417. if (!Lines.empty() && Lines.back().InPPDirective)
  418. return true;
  419. const FormatToken *Previous = Tokens->getPreviousToken();
  420. return Previous && Previous->is(tok::comment) &&
  421. (Previous->IsMultiline || Previous->NewlinesBefore > 0);
  422. }
  423. /// \brief Parses a level, that is ???.
  424. /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
  425. /// \param CanContainBracedList If the content can contain (at any level) a
  426. /// braced list.
  427. /// \param NextLBracesType The type for left brace found in this level.
  428. /// \param IfKind The \p if statement kind in the level.
  429. /// \param IfLeftBrace The left brace of the \p if block in the level.
  430. /// \returns true if a simple block of if/else/for/while, or false otherwise.
  431. /// (A simple block has a single statement.)
  432. bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
  433. bool CanContainBracedList,
  434. TokenType NextLBracesType,
  435. IfStmtKind *IfKind,
  436. FormatToken **IfLeftBrace) {
  437. auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
  438. ? TT_BracedListLBrace
  439. : TT_Unknown;
  440. const bool IsPrecededByCommentOrPPDirective =
  441. !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
  442. FormatToken *IfLBrace = nullptr;
  443. bool HasDoWhile = false;
  444. bool HasLabel = false;
  445. unsigned StatementCount = 0;
  446. bool SwitchLabelEncountered = false;
  447. do {
  448. if (FormatTok->getType() == TT_AttributeMacro) {
  449. nextToken();
  450. continue;
  451. }
  452. tok::TokenKind kind = FormatTok->Tok.getKind();
  453. if (FormatTok->getType() == TT_MacroBlockBegin)
  454. kind = tok::l_brace;
  455. else if (FormatTok->getType() == TT_MacroBlockEnd)
  456. kind = tok::r_brace;
  457. auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
  458. &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
  459. parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
  460. &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
  461. HasLabel ? nullptr : &HasLabel);
  462. ++StatementCount;
  463. assert(StatementCount > 0 && "StatementCount overflow!");
  464. };
  465. switch (kind) {
  466. case tok::comment:
  467. nextToken();
  468. addUnwrappedLine();
  469. break;
  470. case tok::l_brace:
  471. if (NextLBracesType != TT_Unknown) {
  472. FormatTok->setFinalizedType(NextLBracesType);
  473. } else if (FormatTok->Previous &&
  474. FormatTok->Previous->ClosesRequiresClause) {
  475. // We need the 'default' case here to correctly parse a function
  476. // l_brace.
  477. ParseDefault();
  478. continue;
  479. }
  480. if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
  481. tryToParseBracedList()) {
  482. continue;
  483. }
  484. parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
  485. /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
  486. /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
  487. NextLBracesType);
  488. ++StatementCount;
  489. assert(StatementCount > 0 && "StatementCount overflow!");
  490. addUnwrappedLine();
  491. break;
  492. case tok::r_brace:
  493. if (OpeningBrace) {
  494. if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
  495. !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
  496. return false;
  497. }
  498. if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
  499. HasDoWhile || IsPrecededByCommentOrPPDirective ||
  500. precededByCommentOrPPDirective()) {
  501. return false;
  502. }
  503. const FormatToken *Next = Tokens->peekNextToken();
  504. if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
  505. return false;
  506. if (IfLeftBrace)
  507. *IfLeftBrace = IfLBrace;
  508. return true;
  509. }
  510. nextToken();
  511. addUnwrappedLine();
  512. break;
  513. case tok::kw_default: {
  514. unsigned StoredPosition = Tokens->getPosition();
  515. FormatToken *Next;
  516. do {
  517. Next = Tokens->getNextToken();
  518. assert(Next);
  519. } while (Next->is(tok::comment));
  520. FormatTok = Tokens->setPosition(StoredPosition);
  521. if (Next->isNot(tok::colon)) {
  522. // default not followed by ':' is not a case label; treat it like
  523. // an identifier.
  524. parseStructuralElement();
  525. break;
  526. }
  527. // Else, if it is 'default:', fall through to the case handling.
  528. [[fallthrough]];
  529. }
  530. case tok::kw_case:
  531. if (Style.isProto() || Style.isVerilog() ||
  532. (Style.isJavaScript() && Line->MustBeDeclaration)) {
  533. // Proto: there are no switch/case statements
  534. // Verilog: Case labels don't have this word. We handle case
  535. // labels including default in TokenAnnotator.
  536. // JavaScript: A 'case: string' style field declaration.
  537. ParseDefault();
  538. break;
  539. }
  540. if (!SwitchLabelEncountered &&
  541. (Style.IndentCaseLabels ||
  542. (Line->InPPDirective && Line->Level == 1))) {
  543. ++Line->Level;
  544. }
  545. SwitchLabelEncountered = true;
  546. parseStructuralElement();
  547. break;
  548. case tok::l_square:
  549. if (Style.isCSharp()) {
  550. nextToken();
  551. parseCSharpAttribute();
  552. break;
  553. }
  554. if (handleCppAttributes())
  555. break;
  556. [[fallthrough]];
  557. default:
  558. ParseDefault();
  559. break;
  560. }
  561. } while (!eof());
  562. return false;
  563. }
  564. void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
  565. // We'll parse forward through the tokens until we hit
  566. // a closing brace or eof - note that getNextToken() will
  567. // parse macros, so this will magically work inside macro
  568. // definitions, too.
  569. unsigned StoredPosition = Tokens->getPosition();
  570. FormatToken *Tok = FormatTok;
  571. const FormatToken *PrevTok = Tok->Previous;
  572. // Keep a stack of positions of lbrace tokens. We will
  573. // update information about whether an lbrace starts a
  574. // braced init list or a different block during the loop.
  575. SmallVector<FormatToken *, 8> LBraceStack;
  576. assert(Tok->is(tok::l_brace));
  577. do {
  578. // Get next non-comment token.
  579. FormatToken *NextTok;
  580. do {
  581. NextTok = Tokens->getNextToken();
  582. } while (NextTok->is(tok::comment));
  583. switch (Tok->Tok.getKind()) {
  584. case tok::l_brace:
  585. if (Style.isJavaScript() && PrevTok) {
  586. if (PrevTok->isOneOf(tok::colon, tok::less)) {
  587. // A ':' indicates this code is in a type, or a braced list
  588. // following a label in an object literal ({a: {b: 1}}).
  589. // A '<' could be an object used in a comparison, but that is nonsense
  590. // code (can never return true), so more likely it is a generic type
  591. // argument (`X<{a: string; b: number}>`).
  592. // The code below could be confused by semicolons between the
  593. // individual members in a type member list, which would normally
  594. // trigger BK_Block. In both cases, this must be parsed as an inline
  595. // braced init.
  596. Tok->setBlockKind(BK_BracedInit);
  597. } else if (PrevTok->is(tok::r_paren)) {
  598. // `) { }` can only occur in function or method declarations in JS.
  599. Tok->setBlockKind(BK_Block);
  600. }
  601. } else {
  602. Tok->setBlockKind(BK_Unknown);
  603. }
  604. LBraceStack.push_back(Tok);
  605. break;
  606. case tok::r_brace:
  607. if (LBraceStack.empty())
  608. break;
  609. if (LBraceStack.back()->is(BK_Unknown)) {
  610. bool ProbablyBracedList = false;
  611. if (Style.Language == FormatStyle::LK_Proto) {
  612. ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
  613. } else {
  614. // Skip NextTok over preprocessor lines, otherwise we may not
  615. // properly diagnose the block as a braced intializer
  616. // if the comma separator appears after the pp directive.
  617. while (NextTok->is(tok::hash)) {
  618. ScopedMacroState MacroState(*Line, Tokens, NextTok);
  619. do {
  620. NextTok = Tokens->getNextToken();
  621. } while (NextTok->isNot(tok::eof));
  622. }
  623. // Using OriginalColumn to distinguish between ObjC methods and
  624. // binary operators is a bit hacky.
  625. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
  626. NextTok->OriginalColumn == 0;
  627. // Try to detect a braced list. Note that regardless how we mark inner
  628. // braces here, we will overwrite the BlockKind later if we parse a
  629. // braced list (where all blocks inside are by default braced lists),
  630. // or when we explicitly detect blocks (for example while parsing
  631. // lambdas).
  632. // If we already marked the opening brace as braced list, the closing
  633. // must also be part of it.
  634. ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
  635. ProbablyBracedList = ProbablyBracedList ||
  636. (Style.isJavaScript() &&
  637. NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
  638. Keywords.kw_as));
  639. ProbablyBracedList = ProbablyBracedList ||
  640. (Style.isCpp() && NextTok->is(tok::l_paren));
  641. // If there is a comma, semicolon or right paren after the closing
  642. // brace, we assume this is a braced initializer list.
  643. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
  644. // braced list in JS.
  645. ProbablyBracedList =
  646. ProbablyBracedList ||
  647. NextTok->isOneOf(tok::comma, tok::period, tok::colon,
  648. tok::r_paren, tok::r_square, tok::l_brace,
  649. tok::ellipsis);
  650. ProbablyBracedList =
  651. ProbablyBracedList ||
  652. (NextTok->is(tok::identifier) &&
  653. !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
  654. ProbablyBracedList = ProbablyBracedList ||
  655. (NextTok->is(tok::semi) &&
  656. (!ExpectClassBody || LBraceStack.size() != 1));
  657. ProbablyBracedList =
  658. ProbablyBracedList ||
  659. (NextTok->isBinaryOperator() && !NextIsObjCMethod);
  660. if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
  661. // We can have an array subscript after a braced init
  662. // list, but C++11 attributes are expected after blocks.
  663. NextTok = Tokens->getNextToken();
  664. ProbablyBracedList = NextTok->isNot(tok::l_square);
  665. }
  666. }
  667. if (ProbablyBracedList) {
  668. Tok->setBlockKind(BK_BracedInit);
  669. LBraceStack.back()->setBlockKind(BK_BracedInit);
  670. } else {
  671. Tok->setBlockKind(BK_Block);
  672. LBraceStack.back()->setBlockKind(BK_Block);
  673. }
  674. }
  675. LBraceStack.pop_back();
  676. break;
  677. case tok::identifier:
  678. if (!Tok->is(TT_StatementMacro))
  679. break;
  680. [[fallthrough]];
  681. case tok::at:
  682. case tok::semi:
  683. case tok::kw_if:
  684. case tok::kw_while:
  685. case tok::kw_for:
  686. case tok::kw_switch:
  687. case tok::kw_try:
  688. case tok::kw___try:
  689. if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
  690. LBraceStack.back()->setBlockKind(BK_Block);
  691. break;
  692. default:
  693. break;
  694. }
  695. PrevTok = Tok;
  696. Tok = NextTok;
  697. } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
  698. // Assume other blocks for all unclosed opening braces.
  699. for (FormatToken *LBrace : LBraceStack)
  700. if (LBrace->is(BK_Unknown))
  701. LBrace->setBlockKind(BK_Block);
  702. FormatTok = Tokens->setPosition(StoredPosition);
  703. }
  704. template <class T>
  705. static inline void hash_combine(std::size_t &seed, const T &v) {
  706. std::hash<T> hasher;
  707. seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
  708. }
  709. size_t UnwrappedLineParser::computePPHash() const {
  710. size_t h = 0;
  711. for (const auto &i : PPStack) {
  712. hash_combine(h, size_t(i.Kind));
  713. hash_combine(h, i.Line);
  714. }
  715. return h;
  716. }
  717. // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
  718. // is not null, subtracts its length (plus the preceding space) when computing
  719. // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
  720. // running the token annotator on it so that we can restore them afterward.
  721. bool UnwrappedLineParser::mightFitOnOneLine(
  722. UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
  723. const auto ColumnLimit = Style.ColumnLimit;
  724. if (ColumnLimit == 0)
  725. return true;
  726. auto &Tokens = ParsedLine.Tokens;
  727. assert(!Tokens.empty());
  728. const auto *LastToken = Tokens.back().Tok;
  729. assert(LastToken);
  730. SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
  731. int Index = 0;
  732. for (const auto &Token : Tokens) {
  733. assert(Token.Tok);
  734. auto &SavedToken = SavedTokens[Index++];
  735. SavedToken.Tok = new FormatToken;
  736. SavedToken.Tok->copyFrom(*Token.Tok);
  737. SavedToken.Children = std::move(Token.Children);
  738. }
  739. AnnotatedLine Line(ParsedLine);
  740. assert(Line.Last == LastToken);
  741. TokenAnnotator Annotator(Style, Keywords);
  742. Annotator.annotate(Line);
  743. Annotator.calculateFormattingInformation(Line);
  744. auto Length = LastToken->TotalLength;
  745. if (OpeningBrace) {
  746. assert(OpeningBrace != Tokens.front().Tok);
  747. if (auto Prev = OpeningBrace->Previous;
  748. Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
  749. Length -= ColumnLimit;
  750. }
  751. Length -= OpeningBrace->TokenText.size() + 1;
  752. }
  753. if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
  754. assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
  755. Length -= FirstToken->TokenText.size() + 1;
  756. }
  757. Index = 0;
  758. for (auto &Token : Tokens) {
  759. const auto &SavedToken = SavedTokens[Index++];
  760. Token.Tok->copyFrom(*SavedToken.Tok);
  761. Token.Children = std::move(SavedToken.Children);
  762. delete SavedToken.Tok;
  763. }
  764. // If these change PPLevel needs to be used for get correct indentation.
  765. assert(!Line.InMacroBody);
  766. assert(!Line.InPPDirective);
  767. return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
  768. }
  769. FormatToken *UnwrappedLineParser::parseBlock(
  770. bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
  771. IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
  772. bool CanContainBracedList, TokenType NextLBracesType) {
  773. auto HandleVerilogBlockLabel = [this]() {
  774. // ":" name
  775. if (Style.isVerilog() && FormatTok->is(tok::colon)) {
  776. nextToken();
  777. if (Keywords.isVerilogIdentifier(*FormatTok))
  778. nextToken();
  779. }
  780. };
  781. // Whether this is a Verilog-specific block that has a special header like a
  782. // module.
  783. const bool VerilogHierarchy =
  784. Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
  785. assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
  786. (Style.isVerilog() &&
  787. (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
  788. "'{' or macro block token expected");
  789. FormatToken *Tok = FormatTok;
  790. const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
  791. auto Index = CurrentLines->size();
  792. const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
  793. FormatTok->setBlockKind(BK_Block);
  794. // For Whitesmiths mode, jump to the next level prior to skipping over the
  795. // braces.
  796. if (!VerilogHierarchy && AddLevels > 0 &&
  797. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
  798. ++Line->Level;
  799. }
  800. size_t PPStartHash = computePPHash();
  801. const unsigned InitialLevel = Line->Level;
  802. if (VerilogHierarchy) {
  803. AddLevels += parseVerilogHierarchyHeader();
  804. } else {
  805. nextToken(/*LevelDifference=*/AddLevels);
  806. HandleVerilogBlockLabel();
  807. }
  808. // Bail out if there are too many levels. Otherwise, the stack might overflow.
  809. if (Line->Level > 300)
  810. return nullptr;
  811. if (MacroBlock && FormatTok->is(tok::l_paren))
  812. parseParens();
  813. size_t NbPreprocessorDirectives =
  814. CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
  815. addUnwrappedLine();
  816. size_t OpeningLineIndex =
  817. CurrentLines->empty()
  818. ? (UnwrappedLine::kInvalidIndex)
  819. : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
  820. // Whitesmiths is weird here. The brace needs to be indented for the namespace
  821. // block, but the block itself may not be indented depending on the style
  822. // settings. This allows the format to back up one level in those cases.
  823. if (UnindentWhitesmithsBraces)
  824. --Line->Level;
  825. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  826. MustBeDeclaration);
  827. if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
  828. Line->Level += AddLevels;
  829. FormatToken *IfLBrace = nullptr;
  830. const bool SimpleBlock =
  831. parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
  832. if (eof())
  833. return IfLBrace;
  834. if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
  835. : !FormatTok->is(tok::r_brace)) {
  836. Line->Level = InitialLevel;
  837. FormatTok->setBlockKind(BK_Block);
  838. return IfLBrace;
  839. }
  840. const bool IsFunctionRBrace =
  841. FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
  842. auto RemoveBraces = [=]() mutable {
  843. if (!SimpleBlock)
  844. return false;
  845. assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
  846. assert(FormatTok->is(tok::r_brace));
  847. const bool WrappedOpeningBrace = !Tok->Previous;
  848. if (WrappedOpeningBrace && FollowedByComment)
  849. return false;
  850. const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
  851. if (KeepBraces && !HasRequiredIfBraces)
  852. return false;
  853. if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
  854. const FormatToken *Previous = Tokens->getPreviousToken();
  855. assert(Previous);
  856. if (Previous->is(tok::r_brace) && !Previous->Optional)
  857. return false;
  858. }
  859. assert(!CurrentLines->empty());
  860. auto &LastLine = CurrentLines->back();
  861. if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
  862. return false;
  863. if (Tok->is(TT_ElseLBrace))
  864. return true;
  865. if (WrappedOpeningBrace) {
  866. assert(Index > 0);
  867. --Index; // The line above the wrapped l_brace.
  868. Tok = nullptr;
  869. }
  870. return mightFitOnOneLine((*CurrentLines)[Index], Tok);
  871. };
  872. if (RemoveBraces()) {
  873. Tok->MatchingParen = FormatTok;
  874. FormatTok->MatchingParen = Tok;
  875. }
  876. size_t PPEndHash = computePPHash();
  877. // Munch the closing brace.
  878. nextToken(/*LevelDifference=*/-AddLevels);
  879. // When this is a function block and there is an unnecessary semicolon
  880. // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
  881. // it later).
  882. if (Style.RemoveSemicolon && IsFunctionRBrace) {
  883. while (FormatTok->is(tok::semi)) {
  884. FormatTok->Optional = true;
  885. nextToken();
  886. }
  887. }
  888. HandleVerilogBlockLabel();
  889. if (MacroBlock && FormatTok->is(tok::l_paren))
  890. parseParens();
  891. Line->Level = InitialLevel;
  892. if (FormatTok->is(tok::kw_noexcept)) {
  893. // A noexcept in a requires expression.
  894. nextToken();
  895. }
  896. if (FormatTok->is(tok::arrow)) {
  897. // Following the } or noexcept we can find a trailing return type arrow
  898. // as part of an implicit conversion constraint.
  899. nextToken();
  900. parseStructuralElement();
  901. }
  902. if (MunchSemi && FormatTok->is(tok::semi))
  903. nextToken();
  904. if (PPStartHash == PPEndHash) {
  905. Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
  906. if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
  907. // Update the opening line to add the forward reference as well
  908. (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
  909. CurrentLines->size() - 1;
  910. }
  911. }
  912. return IfLBrace;
  913. }
  914. static bool isGoogScope(const UnwrappedLine &Line) {
  915. // FIXME: Closure-library specific stuff should not be hard-coded but be
  916. // configurable.
  917. if (Line.Tokens.size() < 4)
  918. return false;
  919. auto I = Line.Tokens.begin();
  920. if (I->Tok->TokenText != "goog")
  921. return false;
  922. ++I;
  923. if (I->Tok->isNot(tok::period))
  924. return false;
  925. ++I;
  926. if (I->Tok->TokenText != "scope")
  927. return false;
  928. ++I;
  929. return I->Tok->is(tok::l_paren);
  930. }
  931. static bool isIIFE(const UnwrappedLine &Line,
  932. const AdditionalKeywords &Keywords) {
  933. // Look for the start of an immediately invoked anonymous function.
  934. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
  935. // This is commonly done in JavaScript to create a new, anonymous scope.
  936. // Example: (function() { ... })()
  937. if (Line.Tokens.size() < 3)
  938. return false;
  939. auto I = Line.Tokens.begin();
  940. if (I->Tok->isNot(tok::l_paren))
  941. return false;
  942. ++I;
  943. if (I->Tok->isNot(Keywords.kw_function))
  944. return false;
  945. ++I;
  946. return I->Tok->is(tok::l_paren);
  947. }
  948. static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
  949. const FormatToken &InitialToken) {
  950. tok::TokenKind Kind = InitialToken.Tok.getKind();
  951. if (InitialToken.is(TT_NamespaceMacro))
  952. Kind = tok::kw_namespace;
  953. switch (Kind) {
  954. case tok::kw_namespace:
  955. return Style.BraceWrapping.AfterNamespace;
  956. case tok::kw_class:
  957. return Style.BraceWrapping.AfterClass;
  958. case tok::kw_union:
  959. return Style.BraceWrapping.AfterUnion;
  960. case tok::kw_struct:
  961. return Style.BraceWrapping.AfterStruct;
  962. case tok::kw_enum:
  963. return Style.BraceWrapping.AfterEnum;
  964. default:
  965. return false;
  966. }
  967. }
  968. void UnwrappedLineParser::parseChildBlock(
  969. bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
  970. assert(FormatTok->is(tok::l_brace));
  971. FormatTok->setBlockKind(BK_Block);
  972. const FormatToken *OpeningBrace = FormatTok;
  973. nextToken();
  974. {
  975. bool SkipIndent = (Style.isJavaScript() &&
  976. (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
  977. ScopedLineState LineState(*this);
  978. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  979. /*MustBeDeclaration=*/false);
  980. Line->Level += SkipIndent ? 0 : 1;
  981. parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
  982. flushComments(isOnNewLine(*FormatTok));
  983. Line->Level -= SkipIndent ? 0 : 1;
  984. }
  985. nextToken();
  986. }
  987. void UnwrappedLineParser::parsePPDirective() {
  988. assert(FormatTok->is(tok::hash) && "'#' expected");
  989. ScopedMacroState MacroState(*Line, Tokens, FormatTok);
  990. nextToken();
  991. if (!FormatTok->Tok.getIdentifierInfo()) {
  992. parsePPUnknown();
  993. return;
  994. }
  995. switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
  996. case tok::pp_define:
  997. parsePPDefine();
  998. return;
  999. case tok::pp_if:
  1000. parsePPIf(/*IfDef=*/false);
  1001. break;
  1002. case tok::pp_ifdef:
  1003. case tok::pp_ifndef:
  1004. parsePPIf(/*IfDef=*/true);
  1005. break;
  1006. case tok::pp_else:
  1007. case tok::pp_elifdef:
  1008. case tok::pp_elifndef:
  1009. case tok::pp_elif:
  1010. parsePPElse();
  1011. break;
  1012. case tok::pp_endif:
  1013. parsePPEndIf();
  1014. break;
  1015. case tok::pp_pragma:
  1016. parsePPPragma();
  1017. break;
  1018. default:
  1019. parsePPUnknown();
  1020. break;
  1021. }
  1022. }
  1023. void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
  1024. size_t Line = CurrentLines->size();
  1025. if (CurrentLines == &PreprocessorDirectives)
  1026. Line += Lines.size();
  1027. if (Unreachable ||
  1028. (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
  1029. PPStack.push_back({PP_Unreachable, Line});
  1030. } else {
  1031. PPStack.push_back({PP_Conditional, Line});
  1032. }
  1033. }
  1034. void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
  1035. ++PPBranchLevel;
  1036. assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
  1037. if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
  1038. PPLevelBranchIndex.push_back(0);
  1039. PPLevelBranchCount.push_back(0);
  1040. }
  1041. PPChainBranchIndex.push(Unreachable ? -1 : 0);
  1042. bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
  1043. conditionalCompilationCondition(Unreachable || Skip);
  1044. }
  1045. void UnwrappedLineParser::conditionalCompilationAlternative() {
  1046. if (!PPStack.empty())
  1047. PPStack.pop_back();
  1048. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  1049. if (!PPChainBranchIndex.empty())
  1050. ++PPChainBranchIndex.top();
  1051. conditionalCompilationCondition(
  1052. PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
  1053. PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
  1054. }
  1055. void UnwrappedLineParser::conditionalCompilationEnd() {
  1056. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  1057. if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
  1058. if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
  1059. PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
  1060. }
  1061. // Guard against #endif's without #if.
  1062. if (PPBranchLevel > -1)
  1063. --PPBranchLevel;
  1064. if (!PPChainBranchIndex.empty())
  1065. PPChainBranchIndex.pop();
  1066. if (!PPStack.empty())
  1067. PPStack.pop_back();
  1068. }
  1069. void UnwrappedLineParser::parsePPIf(bool IfDef) {
  1070. bool IfNDef = FormatTok->is(tok::pp_ifndef);
  1071. nextToken();
  1072. bool Unreachable = false;
  1073. if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
  1074. Unreachable = true;
  1075. if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
  1076. Unreachable = true;
  1077. conditionalCompilationStart(Unreachable);
  1078. FormatToken *IfCondition = FormatTok;
  1079. // If there's a #ifndef on the first line, and the only lines before it are
  1080. // comments, it could be an include guard.
  1081. bool MaybeIncludeGuard = IfNDef;
  1082. if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
  1083. for (auto &Line : Lines) {
  1084. if (!Line.Tokens.front().Tok->is(tok::comment)) {
  1085. MaybeIncludeGuard = false;
  1086. IncludeGuard = IG_Rejected;
  1087. break;
  1088. }
  1089. }
  1090. }
  1091. --PPBranchLevel;
  1092. parsePPUnknown();
  1093. ++PPBranchLevel;
  1094. if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
  1095. IncludeGuard = IG_IfNdefed;
  1096. IncludeGuardToken = IfCondition;
  1097. }
  1098. }
  1099. void UnwrappedLineParser::parsePPElse() {
  1100. // If a potential include guard has an #else, it's not an include guard.
  1101. if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
  1102. IncludeGuard = IG_Rejected;
  1103. // Don't crash when there is an #else without an #if.
  1104. assert(PPBranchLevel >= -1);
  1105. if (PPBranchLevel == -1)
  1106. conditionalCompilationStart(/*Unreachable=*/true);
  1107. conditionalCompilationAlternative();
  1108. --PPBranchLevel;
  1109. parsePPUnknown();
  1110. ++PPBranchLevel;
  1111. }
  1112. void UnwrappedLineParser::parsePPEndIf() {
  1113. conditionalCompilationEnd();
  1114. parsePPUnknown();
  1115. // If the #endif of a potential include guard is the last thing in the file,
  1116. // then we found an include guard.
  1117. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
  1118. Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
  1119. IncludeGuard = IG_Found;
  1120. }
  1121. }
  1122. void UnwrappedLineParser::parsePPDefine() {
  1123. nextToken();
  1124. if (!FormatTok->Tok.getIdentifierInfo()) {
  1125. IncludeGuard = IG_Rejected;
  1126. IncludeGuardToken = nullptr;
  1127. parsePPUnknown();
  1128. return;
  1129. }
  1130. if (IncludeGuard == IG_IfNdefed &&
  1131. IncludeGuardToken->TokenText == FormatTok->TokenText) {
  1132. IncludeGuard = IG_Defined;
  1133. IncludeGuardToken = nullptr;
  1134. for (auto &Line : Lines) {
  1135. if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
  1136. IncludeGuard = IG_Rejected;
  1137. break;
  1138. }
  1139. }
  1140. }
  1141. // In the context of a define, even keywords should be treated as normal
  1142. // identifiers. Setting the kind to identifier is not enough, because we need
  1143. // to treat additional keywords like __except as well, which are already
  1144. // identifiers. Setting the identifier info to null interferes with include
  1145. // guard processing above, and changes preprocessing nesting.
  1146. FormatTok->Tok.setKind(tok::identifier);
  1147. FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
  1148. nextToken();
  1149. if (FormatTok->Tok.getKind() == tok::l_paren &&
  1150. !FormatTok->hasWhitespaceBefore()) {
  1151. parseParens();
  1152. }
  1153. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  1154. Line->Level += PPBranchLevel + 1;
  1155. addUnwrappedLine();
  1156. ++Line->Level;
  1157. Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
  1158. assert((int)Line->PPLevel >= 0);
  1159. Line->InMacroBody = true;
  1160. // Errors during a preprocessor directive can only affect the layout of the
  1161. // preprocessor directive, and thus we ignore them. An alternative approach
  1162. // would be to use the same approach we use on the file level (no
  1163. // re-indentation if there was a structural error) within the macro
  1164. // definition.
  1165. parseFile();
  1166. }
  1167. void UnwrappedLineParser::parsePPPragma() {
  1168. Line->InPragmaDirective = true;
  1169. parsePPUnknown();
  1170. }
  1171. void UnwrappedLineParser::parsePPUnknown() {
  1172. do {
  1173. nextToken();
  1174. } while (!eof());
  1175. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  1176. Line->Level += PPBranchLevel + 1;
  1177. addUnwrappedLine();
  1178. }
  1179. // Here we exclude certain tokens that are not usually the first token in an
  1180. // unwrapped line. This is used in attempt to distinguish macro calls without
  1181. // trailing semicolons from other constructs split to several lines.
  1182. static bool tokenCanStartNewLine(const FormatToken &Tok) {
  1183. // Semicolon can be a null-statement, l_square can be a start of a macro or
  1184. // a C++11 attribute, but this doesn't seem to be common.
  1185. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
  1186. Tok.isNot(TT_AttributeSquare) &&
  1187. // Tokens that can only be used as binary operators and a part of
  1188. // overloaded operator names.
  1189. Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
  1190. Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
  1191. Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
  1192. Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
  1193. Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
  1194. Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
  1195. Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
  1196. Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
  1197. Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
  1198. Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
  1199. Tok.isNot(tok::lesslessequal) &&
  1200. // Colon is used in labels, base class lists, initializer lists,
  1201. // range-based for loops, ternary operator, but should never be the
  1202. // first token in an unwrapped line.
  1203. Tok.isNot(tok::colon) &&
  1204. // 'noexcept' is a trailing annotation.
  1205. Tok.isNot(tok::kw_noexcept);
  1206. }
  1207. static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
  1208. const FormatToken *FormatTok) {
  1209. // FIXME: This returns true for C/C++ keywords like 'struct'.
  1210. return FormatTok->is(tok::identifier) &&
  1211. (FormatTok->Tok.getIdentifierInfo() == nullptr ||
  1212. !FormatTok->isOneOf(
  1213. Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
  1214. Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
  1215. Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
  1216. Keywords.kw_let, Keywords.kw_var, tok::kw_const,
  1217. Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
  1218. Keywords.kw_instanceof, Keywords.kw_interface,
  1219. Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
  1220. }
  1221. static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
  1222. const FormatToken *FormatTok) {
  1223. return FormatTok->Tok.isLiteral() ||
  1224. FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
  1225. mustBeJSIdent(Keywords, FormatTok);
  1226. }
  1227. // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
  1228. // when encountered after a value (see mustBeJSIdentOrValue).
  1229. static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
  1230. const FormatToken *FormatTok) {
  1231. return FormatTok->isOneOf(
  1232. tok::kw_return, Keywords.kw_yield,
  1233. // conditionals
  1234. tok::kw_if, tok::kw_else,
  1235. // loops
  1236. tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
  1237. // switch/case
  1238. tok::kw_switch, tok::kw_case,
  1239. // exceptions
  1240. tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
  1241. // declaration
  1242. tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
  1243. Keywords.kw_async, Keywords.kw_function,
  1244. // import/export
  1245. Keywords.kw_import, tok::kw_export);
  1246. }
  1247. // Checks whether a token is a type in K&R C (aka C78).
  1248. static bool isC78Type(const FormatToken &Tok) {
  1249. return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
  1250. tok::kw_unsigned, tok::kw_float, tok::kw_double,
  1251. tok::identifier);
  1252. }
  1253. // This function checks whether a token starts the first parameter declaration
  1254. // in a K&R C (aka C78) function definition, e.g.:
  1255. // int f(a, b)
  1256. // short a, b;
  1257. // {
  1258. // return a + b;
  1259. // }
  1260. static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
  1261. const FormatToken *FuncName) {
  1262. assert(Tok);
  1263. assert(Next);
  1264. assert(FuncName);
  1265. if (FuncName->isNot(tok::identifier))
  1266. return false;
  1267. const FormatToken *Prev = FuncName->Previous;
  1268. if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
  1269. return false;
  1270. if (!isC78Type(*Tok) &&
  1271. !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
  1272. return false;
  1273. }
  1274. if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
  1275. return false;
  1276. Tok = Tok->Previous;
  1277. if (!Tok || Tok->isNot(tok::r_paren))
  1278. return false;
  1279. Tok = Tok->Previous;
  1280. if (!Tok || Tok->isNot(tok::identifier))
  1281. return false;
  1282. return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
  1283. }
  1284. bool UnwrappedLineParser::parseModuleImport() {
  1285. assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
  1286. if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
  1287. !Token->Tok.getIdentifierInfo() &&
  1288. !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
  1289. return false;
  1290. }
  1291. nextToken();
  1292. while (!eof()) {
  1293. if (FormatTok->is(tok::colon)) {
  1294. FormatTok->setFinalizedType(TT_ModulePartitionColon);
  1295. }
  1296. // Handle import <foo/bar.h> as we would an include statement.
  1297. else if (FormatTok->is(tok::less)) {
  1298. nextToken();
  1299. while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
  1300. // Mark tokens up to the trailing line comments as implicit string
  1301. // literals.
  1302. if (FormatTok->isNot(tok::comment) &&
  1303. !FormatTok->TokenText.startswith("//")) {
  1304. FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
  1305. }
  1306. nextToken();
  1307. }
  1308. }
  1309. if (FormatTok->is(tok::semi)) {
  1310. nextToken();
  1311. break;
  1312. }
  1313. nextToken();
  1314. }
  1315. addUnwrappedLine();
  1316. return true;
  1317. }
  1318. // readTokenWithJavaScriptASI reads the next token and terminates the current
  1319. // line if JavaScript Automatic Semicolon Insertion must
  1320. // happen between the current token and the next token.
  1321. //
  1322. // This method is conservative - it cannot cover all edge cases of JavaScript,
  1323. // but only aims to correctly handle certain well known cases. It *must not*
  1324. // return true in speculative cases.
  1325. void UnwrappedLineParser::readTokenWithJavaScriptASI() {
  1326. FormatToken *Previous = FormatTok;
  1327. readToken();
  1328. FormatToken *Next = FormatTok;
  1329. bool IsOnSameLine =
  1330. CommentsBeforeNextToken.empty()
  1331. ? Next->NewlinesBefore == 0
  1332. : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
  1333. if (IsOnSameLine)
  1334. return;
  1335. bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
  1336. bool PreviousStartsTemplateExpr =
  1337. Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
  1338. if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
  1339. // If the line contains an '@' sign, the previous token might be an
  1340. // annotation, which can precede another identifier/value.
  1341. bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
  1342. return LineNode.Tok->is(tok::at);
  1343. });
  1344. if (HasAt)
  1345. return;
  1346. }
  1347. if (Next->is(tok::exclaim) && PreviousMustBeValue)
  1348. return addUnwrappedLine();
  1349. bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
  1350. bool NextEndsTemplateExpr =
  1351. Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
  1352. if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
  1353. (PreviousMustBeValue ||
  1354. Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
  1355. tok::minusminus))) {
  1356. return addUnwrappedLine();
  1357. }
  1358. if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
  1359. isJSDeclOrStmt(Keywords, Next)) {
  1360. return addUnwrappedLine();
  1361. }
  1362. }
  1363. void UnwrappedLineParser::parseStructuralElement(
  1364. bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
  1365. FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
  1366. if (Style.Language == FormatStyle::LK_TableGen &&
  1367. FormatTok->is(tok::pp_include)) {
  1368. nextToken();
  1369. if (FormatTok->is(tok::string_literal))
  1370. nextToken();
  1371. addUnwrappedLine();
  1372. return;
  1373. }
  1374. if (Style.isVerilog()) {
  1375. // Skip things that can exist before keywords like 'if' and 'case'.
  1376. while (true) {
  1377. if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
  1378. Keywords.kw_unique0)) {
  1379. nextToken();
  1380. } else if (FormatTok->is(tok::l_paren) &&
  1381. Tokens->peekNextToken()->is(tok::star)) {
  1382. parseParens();
  1383. } else {
  1384. break;
  1385. }
  1386. }
  1387. }
  1388. // Tokens that only make sense at the beginning of a line.
  1389. switch (FormatTok->Tok.getKind()) {
  1390. case tok::kw_asm:
  1391. nextToken();
  1392. if (FormatTok->is(tok::l_brace)) {
  1393. FormatTok->setFinalizedType(TT_InlineASMBrace);
  1394. nextToken();
  1395. while (FormatTok && !eof()) {
  1396. if (FormatTok->is(tok::r_brace)) {
  1397. FormatTok->setFinalizedType(TT_InlineASMBrace);
  1398. nextToken();
  1399. addUnwrappedLine();
  1400. break;
  1401. }
  1402. FormatTok->Finalized = true;
  1403. nextToken();
  1404. }
  1405. }
  1406. break;
  1407. case tok::kw_namespace:
  1408. parseNamespace();
  1409. return;
  1410. case tok::kw_public:
  1411. case tok::kw_protected:
  1412. case tok::kw_private:
  1413. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
  1414. Style.isCSharp()) {
  1415. nextToken();
  1416. } else {
  1417. parseAccessSpecifier();
  1418. }
  1419. return;
  1420. case tok::kw_if: {
  1421. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1422. // field/method declaration.
  1423. break;
  1424. }
  1425. FormatToken *Tok = parseIfThenElse(IfKind);
  1426. if (IfLeftBrace)
  1427. *IfLeftBrace = Tok;
  1428. return;
  1429. }
  1430. case tok::kw_for:
  1431. case tok::kw_while:
  1432. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1433. // field/method declaration.
  1434. break;
  1435. }
  1436. parseForOrWhileLoop();
  1437. return;
  1438. case tok::kw_do:
  1439. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1440. // field/method declaration.
  1441. break;
  1442. }
  1443. parseDoWhile();
  1444. if (HasDoWhile)
  1445. *HasDoWhile = true;
  1446. return;
  1447. case tok::kw_switch:
  1448. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1449. // 'switch: string' field declaration.
  1450. break;
  1451. }
  1452. parseSwitch();
  1453. return;
  1454. case tok::kw_default:
  1455. // In Verilog default along with other labels are handled in the next loop.
  1456. if (Style.isVerilog())
  1457. break;
  1458. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1459. // 'default: string' field declaration.
  1460. break;
  1461. }
  1462. nextToken();
  1463. if (FormatTok->is(tok::colon)) {
  1464. parseLabel();
  1465. return;
  1466. }
  1467. // e.g. "default void f() {}" in a Java interface.
  1468. break;
  1469. case tok::kw_case:
  1470. // Proto: there are no switch/case statements.
  1471. if (Style.isProto()) {
  1472. nextToken();
  1473. return;
  1474. }
  1475. if (Style.isVerilog()) {
  1476. parseBlock();
  1477. addUnwrappedLine();
  1478. return;
  1479. }
  1480. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1481. // 'case: string' field declaration.
  1482. nextToken();
  1483. break;
  1484. }
  1485. parseCaseLabel();
  1486. return;
  1487. case tok::kw_try:
  1488. case tok::kw___try:
  1489. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1490. // field/method declaration.
  1491. break;
  1492. }
  1493. parseTryCatch();
  1494. return;
  1495. case tok::kw_extern:
  1496. nextToken();
  1497. if (Style.isVerilog()) {
  1498. // In Verilog and extern module declaration looks like a start of module.
  1499. // But there is no body and endmodule. So we handle it separately.
  1500. if (Keywords.isVerilogHierarchy(*FormatTok)) {
  1501. parseVerilogHierarchyHeader();
  1502. return;
  1503. }
  1504. } else if (FormatTok->is(tok::string_literal)) {
  1505. nextToken();
  1506. if (FormatTok->is(tok::l_brace)) {
  1507. if (Style.BraceWrapping.AfterExternBlock)
  1508. addUnwrappedLine();
  1509. // Either we indent or for backwards compatibility we follow the
  1510. // AfterExternBlock style.
  1511. unsigned AddLevels =
  1512. (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
  1513. (Style.BraceWrapping.AfterExternBlock &&
  1514. Style.IndentExternBlock ==
  1515. FormatStyle::IEBS_AfterExternBlock)
  1516. ? 1u
  1517. : 0u;
  1518. parseBlock(/*MustBeDeclaration=*/true, AddLevels);
  1519. addUnwrappedLine();
  1520. return;
  1521. }
  1522. }
  1523. break;
  1524. case tok::kw_export:
  1525. if (Style.isJavaScript()) {
  1526. parseJavaScriptEs6ImportExport();
  1527. return;
  1528. }
  1529. if (Style.isCpp()) {
  1530. nextToken();
  1531. if (FormatTok->is(tok::kw_namespace)) {
  1532. parseNamespace();
  1533. return;
  1534. }
  1535. if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
  1536. return;
  1537. }
  1538. break;
  1539. case tok::kw_inline:
  1540. nextToken();
  1541. if (FormatTok->is(tok::kw_namespace)) {
  1542. parseNamespace();
  1543. return;
  1544. }
  1545. break;
  1546. case tok::identifier:
  1547. if (FormatTok->is(TT_ForEachMacro)) {
  1548. parseForOrWhileLoop();
  1549. return;
  1550. }
  1551. if (FormatTok->is(TT_MacroBlockBegin)) {
  1552. parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
  1553. /*MunchSemi=*/false);
  1554. return;
  1555. }
  1556. if (FormatTok->is(Keywords.kw_import)) {
  1557. if (Style.isJavaScript()) {
  1558. parseJavaScriptEs6ImportExport();
  1559. return;
  1560. }
  1561. if (Style.Language == FormatStyle::LK_Proto) {
  1562. nextToken();
  1563. if (FormatTok->is(tok::kw_public))
  1564. nextToken();
  1565. if (!FormatTok->is(tok::string_literal))
  1566. return;
  1567. nextToken();
  1568. if (FormatTok->is(tok::semi))
  1569. nextToken();
  1570. addUnwrappedLine();
  1571. return;
  1572. }
  1573. if (Style.isCpp() && parseModuleImport())
  1574. return;
  1575. }
  1576. if (Style.isCpp() &&
  1577. FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
  1578. Keywords.kw_slots, Keywords.kw_qslots)) {
  1579. nextToken();
  1580. if (FormatTok->is(tok::colon)) {
  1581. nextToken();
  1582. addUnwrappedLine();
  1583. return;
  1584. }
  1585. }
  1586. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1587. parseStatementMacro();
  1588. return;
  1589. }
  1590. if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
  1591. parseNamespace();
  1592. return;
  1593. }
  1594. // In all other cases, parse the declaration.
  1595. break;
  1596. default:
  1597. break;
  1598. }
  1599. do {
  1600. const FormatToken *Previous = FormatTok->Previous;
  1601. switch (FormatTok->Tok.getKind()) {
  1602. case tok::at:
  1603. nextToken();
  1604. if (FormatTok->is(tok::l_brace)) {
  1605. nextToken();
  1606. parseBracedList();
  1607. break;
  1608. } else if (Style.Language == FormatStyle::LK_Java &&
  1609. FormatTok->is(Keywords.kw_interface)) {
  1610. nextToken();
  1611. break;
  1612. }
  1613. switch (FormatTok->Tok.getObjCKeywordID()) {
  1614. case tok::objc_public:
  1615. case tok::objc_protected:
  1616. case tok::objc_package:
  1617. case tok::objc_private:
  1618. return parseAccessSpecifier();
  1619. case tok::objc_interface:
  1620. case tok::objc_implementation:
  1621. return parseObjCInterfaceOrImplementation();
  1622. case tok::objc_protocol:
  1623. if (parseObjCProtocol())
  1624. return;
  1625. break;
  1626. case tok::objc_end:
  1627. return; // Handled by the caller.
  1628. case tok::objc_optional:
  1629. case tok::objc_required:
  1630. nextToken();
  1631. addUnwrappedLine();
  1632. return;
  1633. case tok::objc_autoreleasepool:
  1634. nextToken();
  1635. if (FormatTok->is(tok::l_brace)) {
  1636. if (Style.BraceWrapping.AfterControlStatement ==
  1637. FormatStyle::BWACS_Always) {
  1638. addUnwrappedLine();
  1639. }
  1640. parseBlock();
  1641. }
  1642. addUnwrappedLine();
  1643. return;
  1644. case tok::objc_synchronized:
  1645. nextToken();
  1646. if (FormatTok->is(tok::l_paren)) {
  1647. // Skip synchronization object
  1648. parseParens();
  1649. }
  1650. if (FormatTok->is(tok::l_brace)) {
  1651. if (Style.BraceWrapping.AfterControlStatement ==
  1652. FormatStyle::BWACS_Always) {
  1653. addUnwrappedLine();
  1654. }
  1655. parseBlock();
  1656. }
  1657. addUnwrappedLine();
  1658. return;
  1659. case tok::objc_try:
  1660. // This branch isn't strictly necessary (the kw_try case below would
  1661. // do this too after the tok::at is parsed above). But be explicit.
  1662. parseTryCatch();
  1663. return;
  1664. default:
  1665. break;
  1666. }
  1667. break;
  1668. case tok::kw_requires: {
  1669. if (Style.isCpp()) {
  1670. bool ParsedClause = parseRequires();
  1671. if (ParsedClause)
  1672. return;
  1673. } else {
  1674. nextToken();
  1675. }
  1676. break;
  1677. }
  1678. case tok::kw_enum:
  1679. // Ignore if this is part of "template <enum ...".
  1680. if (Previous && Previous->is(tok::less)) {
  1681. nextToken();
  1682. break;
  1683. }
  1684. // parseEnum falls through and does not yet add an unwrapped line as an
  1685. // enum definition can start a structural element.
  1686. if (!parseEnum())
  1687. break;
  1688. // This only applies for C++.
  1689. if (!Style.isCpp()) {
  1690. addUnwrappedLine();
  1691. return;
  1692. }
  1693. break;
  1694. case tok::kw_typedef:
  1695. nextToken();
  1696. if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
  1697. Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
  1698. Keywords.kw_CF_CLOSED_ENUM,
  1699. Keywords.kw_NS_CLOSED_ENUM)) {
  1700. parseEnum();
  1701. }
  1702. break;
  1703. case tok::kw_class:
  1704. if (Style.isVerilog()) {
  1705. parseBlock();
  1706. addUnwrappedLine();
  1707. return;
  1708. }
  1709. [[fallthrough]];
  1710. case tok::kw_struct:
  1711. case tok::kw_union:
  1712. if (parseStructLike())
  1713. return;
  1714. break;
  1715. case tok::period:
  1716. nextToken();
  1717. // In Java, classes have an implicit static member "class".
  1718. if (Style.Language == FormatStyle::LK_Java && FormatTok &&
  1719. FormatTok->is(tok::kw_class)) {
  1720. nextToken();
  1721. }
  1722. if (Style.isJavaScript() && FormatTok &&
  1723. FormatTok->Tok.getIdentifierInfo()) {
  1724. // JavaScript only has pseudo keywords, all keywords are allowed to
  1725. // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
  1726. nextToken();
  1727. }
  1728. break;
  1729. case tok::semi:
  1730. nextToken();
  1731. addUnwrappedLine();
  1732. return;
  1733. case tok::r_brace:
  1734. addUnwrappedLine();
  1735. return;
  1736. case tok::l_paren: {
  1737. parseParens();
  1738. // Break the unwrapped line if a K&R C function definition has a parameter
  1739. // declaration.
  1740. if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
  1741. break;
  1742. if (isC78ParameterDecl(FormatTok,
  1743. Tokens->peekNextToken(/*SkipComment=*/true),
  1744. Previous)) {
  1745. addUnwrappedLine();
  1746. return;
  1747. }
  1748. break;
  1749. }
  1750. case tok::kw_operator:
  1751. nextToken();
  1752. if (FormatTok->isBinaryOperator())
  1753. nextToken();
  1754. break;
  1755. case tok::caret:
  1756. nextToken();
  1757. if (FormatTok->Tok.isAnyIdentifier() ||
  1758. FormatTok->isSimpleTypeSpecifier()) {
  1759. nextToken();
  1760. }
  1761. if (FormatTok->is(tok::l_paren))
  1762. parseParens();
  1763. if (FormatTok->is(tok::l_brace))
  1764. parseChildBlock();
  1765. break;
  1766. case tok::l_brace:
  1767. if (NextLBracesType != TT_Unknown)
  1768. FormatTok->setFinalizedType(NextLBracesType);
  1769. if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
  1770. // A block outside of parentheses must be the last part of a
  1771. // structural element.
  1772. // FIXME: Figure out cases where this is not true, and add projections
  1773. // for them (the one we know is missing are lambdas).
  1774. if (Style.Language == FormatStyle::LK_Java &&
  1775. Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
  1776. // If necessary, we could set the type to something different than
  1777. // TT_FunctionLBrace.
  1778. if (Style.BraceWrapping.AfterControlStatement ==
  1779. FormatStyle::BWACS_Always) {
  1780. addUnwrappedLine();
  1781. }
  1782. } else if (Style.BraceWrapping.AfterFunction) {
  1783. addUnwrappedLine();
  1784. }
  1785. FormatTok->setFinalizedType(TT_FunctionLBrace);
  1786. parseBlock();
  1787. addUnwrappedLine();
  1788. return;
  1789. }
  1790. // Otherwise this was a braced init list, and the structural
  1791. // element continues.
  1792. break;
  1793. case tok::kw_try:
  1794. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1795. // field/method declaration.
  1796. nextToken();
  1797. break;
  1798. }
  1799. // We arrive here when parsing function-try blocks.
  1800. if (Style.BraceWrapping.AfterFunction)
  1801. addUnwrappedLine();
  1802. parseTryCatch();
  1803. return;
  1804. case tok::identifier: {
  1805. if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
  1806. Line->MustBeDeclaration) {
  1807. addUnwrappedLine();
  1808. parseCSharpGenericTypeConstraint();
  1809. break;
  1810. }
  1811. if (FormatTok->is(TT_MacroBlockEnd)) {
  1812. addUnwrappedLine();
  1813. return;
  1814. }
  1815. // Function declarations (as opposed to function expressions) are parsed
  1816. // on their own unwrapped line by continuing this loop. Function
  1817. // expressions (functions that are not on their own line) must not create
  1818. // a new unwrapped line, so they are special cased below.
  1819. size_t TokenCount = Line->Tokens.size();
  1820. if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
  1821. (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
  1822. Keywords.kw_async)))) {
  1823. tryToParseJSFunction();
  1824. break;
  1825. }
  1826. if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
  1827. FormatTok->is(Keywords.kw_interface)) {
  1828. if (Style.isJavaScript()) {
  1829. // In JavaScript/TypeScript, "interface" can be used as a standalone
  1830. // identifier, e.g. in `var interface = 1;`. If "interface" is
  1831. // followed by another identifier, it is very like to be an actual
  1832. // interface declaration.
  1833. unsigned StoredPosition = Tokens->getPosition();
  1834. FormatToken *Next = Tokens->getNextToken();
  1835. FormatTok = Tokens->setPosition(StoredPosition);
  1836. if (!mustBeJSIdent(Keywords, Next)) {
  1837. nextToken();
  1838. break;
  1839. }
  1840. }
  1841. parseRecord();
  1842. addUnwrappedLine();
  1843. return;
  1844. }
  1845. if (Style.isVerilog()) {
  1846. if (FormatTok->is(Keywords.kw_table)) {
  1847. parseVerilogTable();
  1848. return;
  1849. }
  1850. if (Keywords.isVerilogBegin(*FormatTok) ||
  1851. Keywords.isVerilogHierarchy(*FormatTok)) {
  1852. parseBlock();
  1853. addUnwrappedLine();
  1854. return;
  1855. }
  1856. }
  1857. if (FormatTok->is(Keywords.kw_interface)) {
  1858. if (parseStructLike())
  1859. return;
  1860. break;
  1861. }
  1862. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1863. parseStatementMacro();
  1864. return;
  1865. }
  1866. // See if the following token should start a new unwrapped line.
  1867. auto isAttr = FormatTok->is(TT_AttributeMacro);
  1868. StringRef Text = FormatTok->TokenText;
  1869. FormatToken *PreviousToken = FormatTok;
  1870. nextToken();
  1871. // JS doesn't have macros, and within classes colons indicate fields, not
  1872. // labels.
  1873. if (Style.isJavaScript())
  1874. break;
  1875. auto OneTokenSoFar = [&]() {
  1876. auto I = Line->Tokens.begin(), E = Line->Tokens.end();
  1877. while (I != E && I->Tok->is(tok::comment))
  1878. ++I;
  1879. while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
  1880. ++I;
  1881. return I != E && (++I == E);
  1882. };
  1883. if (OneTokenSoFar()) {
  1884. // In Verilog labels can be any expression, so we don't do them here.
  1885. if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
  1886. !Line->MustBeDeclaration) {
  1887. Line->Tokens.begin()->Tok->MustBreakBefore = true;
  1888. parseLabel(!Style.IndentGotoLabels);
  1889. if (HasLabel)
  1890. *HasLabel = true;
  1891. return;
  1892. }
  1893. // Recognize function-like macro usages without trailing semicolon as
  1894. // well as free-standing macros like Q_OBJECT.
  1895. bool FunctionLike = FormatTok->is(tok::l_paren);
  1896. if (FunctionLike)
  1897. parseParens();
  1898. bool FollowedByNewline =
  1899. CommentsBeforeNextToken.empty()
  1900. ? FormatTok->NewlinesBefore > 0
  1901. : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
  1902. if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
  1903. tokenCanStartNewLine(*FormatTok) && Text == Text.upper() && !isAttr) {
  1904. if (PreviousToken->isNot(TT_UntouchableMacroFunc))
  1905. PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
  1906. addUnwrappedLine();
  1907. return;
  1908. }
  1909. }
  1910. break;
  1911. }
  1912. case tok::equal:
  1913. if ((Style.isJavaScript() || Style.isCSharp()) &&
  1914. FormatTok->is(TT_FatArrow)) {
  1915. tryToParseChildBlock();
  1916. break;
  1917. }
  1918. nextToken();
  1919. if (FormatTok->is(tok::l_brace)) {
  1920. // Block kind should probably be set to BK_BracedInit for any language.
  1921. // C# needs this change to ensure that array initialisers and object
  1922. // initialisers are indented the same way.
  1923. if (Style.isCSharp())
  1924. FormatTok->setBlockKind(BK_BracedInit);
  1925. nextToken();
  1926. parseBracedList();
  1927. } else if (Style.Language == FormatStyle::LK_Proto &&
  1928. FormatTok->is(tok::less)) {
  1929. nextToken();
  1930. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  1931. /*ClosingBraceKind=*/tok::greater);
  1932. }
  1933. break;
  1934. case tok::l_square:
  1935. parseSquare();
  1936. break;
  1937. case tok::kw_new:
  1938. parseNew();
  1939. break;
  1940. case tok::kw_case:
  1941. // Proto: there are no switch/case statements.
  1942. if (Style.isProto()) {
  1943. nextToken();
  1944. return;
  1945. }
  1946. // In Verilog switch is called case.
  1947. if (Style.isVerilog()) {
  1948. parseBlock();
  1949. addUnwrappedLine();
  1950. return;
  1951. }
  1952. if (Style.isJavaScript() && Line->MustBeDeclaration) {
  1953. // 'case: string' field declaration.
  1954. nextToken();
  1955. break;
  1956. }
  1957. parseCaseLabel();
  1958. break;
  1959. case tok::kw_default:
  1960. nextToken();
  1961. if (Style.isVerilog()) {
  1962. if (FormatTok->is(tok::colon)) {
  1963. // The label will be handled in the next iteration.
  1964. break;
  1965. }
  1966. if (FormatTok->is(Keywords.kw_clocking)) {
  1967. // A default clocking block.
  1968. parseBlock();
  1969. addUnwrappedLine();
  1970. return;
  1971. }
  1972. parseVerilogCaseLabel();
  1973. return;
  1974. }
  1975. break;
  1976. case tok::colon:
  1977. nextToken();
  1978. if (Style.isVerilog()) {
  1979. parseVerilogCaseLabel();
  1980. return;
  1981. }
  1982. break;
  1983. default:
  1984. nextToken();
  1985. break;
  1986. }
  1987. } while (!eof());
  1988. }
  1989. bool UnwrappedLineParser::tryToParsePropertyAccessor() {
  1990. assert(FormatTok->is(tok::l_brace));
  1991. if (!Style.isCSharp())
  1992. return false;
  1993. // See if it's a property accessor.
  1994. if (FormatTok->Previous->isNot(tok::identifier))
  1995. return false;
  1996. // See if we are inside a property accessor.
  1997. //
  1998. // Record the current tokenPosition so that we can advance and
  1999. // reset the current token. `Next` is not set yet so we need
  2000. // another way to advance along the token stream.
  2001. unsigned int StoredPosition = Tokens->getPosition();
  2002. FormatToken *Tok = Tokens->getNextToken();
  2003. // A trivial property accessor is of the form:
  2004. // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
  2005. // Track these as they do not require line breaks to be introduced.
  2006. bool HasSpecialAccessor = false;
  2007. bool IsTrivialPropertyAccessor = true;
  2008. while (!eof()) {
  2009. if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
  2010. tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
  2011. Keywords.kw_init, Keywords.kw_set)) {
  2012. if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
  2013. HasSpecialAccessor = true;
  2014. Tok = Tokens->getNextToken();
  2015. continue;
  2016. }
  2017. if (Tok->isNot(tok::r_brace))
  2018. IsTrivialPropertyAccessor = false;
  2019. break;
  2020. }
  2021. if (!HasSpecialAccessor) {
  2022. Tokens->setPosition(StoredPosition);
  2023. return false;
  2024. }
  2025. // Try to parse the property accessor:
  2026. // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
  2027. Tokens->setPosition(StoredPosition);
  2028. if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
  2029. addUnwrappedLine();
  2030. nextToken();
  2031. do {
  2032. switch (FormatTok->Tok.getKind()) {
  2033. case tok::r_brace:
  2034. nextToken();
  2035. if (FormatTok->is(tok::equal)) {
  2036. while (!eof() && FormatTok->isNot(tok::semi))
  2037. nextToken();
  2038. nextToken();
  2039. }
  2040. addUnwrappedLine();
  2041. return true;
  2042. case tok::l_brace:
  2043. ++Line->Level;
  2044. parseBlock(/*MustBeDeclaration=*/true);
  2045. addUnwrappedLine();
  2046. --Line->Level;
  2047. break;
  2048. case tok::equal:
  2049. if (FormatTok->is(TT_FatArrow)) {
  2050. ++Line->Level;
  2051. do {
  2052. nextToken();
  2053. } while (!eof() && FormatTok->isNot(tok::semi));
  2054. nextToken();
  2055. addUnwrappedLine();
  2056. --Line->Level;
  2057. break;
  2058. }
  2059. nextToken();
  2060. break;
  2061. default:
  2062. if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
  2063. Keywords.kw_set) &&
  2064. !IsTrivialPropertyAccessor) {
  2065. // Non-trivial get/set needs to be on its own line.
  2066. addUnwrappedLine();
  2067. }
  2068. nextToken();
  2069. }
  2070. } while (!eof());
  2071. // Unreachable for well-formed code (paired '{' and '}').
  2072. return true;
  2073. }
  2074. bool UnwrappedLineParser::tryToParseLambda() {
  2075. assert(FormatTok->is(tok::l_square));
  2076. if (!Style.isCpp()) {
  2077. nextToken();
  2078. return false;
  2079. }
  2080. FormatToken &LSquare = *FormatTok;
  2081. if (!tryToParseLambdaIntroducer())
  2082. return false;
  2083. bool SeenArrow = false;
  2084. bool InTemplateParameterList = false;
  2085. while (FormatTok->isNot(tok::l_brace)) {
  2086. if (FormatTok->isSimpleTypeSpecifier()) {
  2087. nextToken();
  2088. continue;
  2089. }
  2090. switch (FormatTok->Tok.getKind()) {
  2091. case tok::l_brace:
  2092. break;
  2093. case tok::l_paren:
  2094. parseParens();
  2095. break;
  2096. case tok::l_square:
  2097. parseSquare();
  2098. break;
  2099. case tok::less:
  2100. assert(FormatTok->Previous);
  2101. if (FormatTok->Previous->is(tok::r_square))
  2102. InTemplateParameterList = true;
  2103. nextToken();
  2104. break;
  2105. case tok::kw_auto:
  2106. case tok::kw_class:
  2107. case tok::kw_template:
  2108. case tok::kw_typename:
  2109. case tok::amp:
  2110. case tok::star:
  2111. case tok::kw_const:
  2112. case tok::kw_constexpr:
  2113. case tok::kw_consteval:
  2114. case tok::comma:
  2115. case tok::greater:
  2116. case tok::identifier:
  2117. case tok::numeric_constant:
  2118. case tok::coloncolon:
  2119. case tok::kw_mutable:
  2120. case tok::kw_noexcept:
  2121. case tok::kw_static:
  2122. nextToken();
  2123. break;
  2124. // Specialization of a template with an integer parameter can contain
  2125. // arithmetic, logical, comparison and ternary operators.
  2126. //
  2127. // FIXME: This also accepts sequences of operators that are not in the scope
  2128. // of a template argument list.
  2129. //
  2130. // In a C++ lambda a template type can only occur after an arrow. We use
  2131. // this as an heuristic to distinguish between Objective-C expressions
  2132. // followed by an `a->b` expression, such as:
  2133. // ([obj func:arg] + a->b)
  2134. // Otherwise the code below would parse as a lambda.
  2135. //
  2136. // FIXME: This heuristic is incorrect for C++20 generic lambdas with
  2137. // explicit template lists: []<bool b = true && false>(U &&u){}
  2138. case tok::plus:
  2139. case tok::minus:
  2140. case tok::exclaim:
  2141. case tok::tilde:
  2142. case tok::slash:
  2143. case tok::percent:
  2144. case tok::lessless:
  2145. case tok::pipe:
  2146. case tok::pipepipe:
  2147. case tok::ampamp:
  2148. case tok::caret:
  2149. case tok::equalequal:
  2150. case tok::exclaimequal:
  2151. case tok::greaterequal:
  2152. case tok::lessequal:
  2153. case tok::question:
  2154. case tok::colon:
  2155. case tok::ellipsis:
  2156. case tok::kw_true:
  2157. case tok::kw_false:
  2158. if (SeenArrow || InTemplateParameterList) {
  2159. nextToken();
  2160. break;
  2161. }
  2162. return true;
  2163. case tok::arrow:
  2164. // This might or might not actually be a lambda arrow (this could be an
  2165. // ObjC method invocation followed by a dereferencing arrow). We might
  2166. // reset this back to TT_Unknown in TokenAnnotator.
  2167. FormatTok->setFinalizedType(TT_LambdaArrow);
  2168. SeenArrow = true;
  2169. nextToken();
  2170. break;
  2171. default:
  2172. return true;
  2173. }
  2174. }
  2175. FormatTok->setFinalizedType(TT_LambdaLBrace);
  2176. LSquare.setFinalizedType(TT_LambdaLSquare);
  2177. parseChildBlock();
  2178. return true;
  2179. }
  2180. bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
  2181. const FormatToken *Previous = FormatTok->Previous;
  2182. const FormatToken *LeftSquare = FormatTok;
  2183. nextToken();
  2184. if (Previous &&
  2185. (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
  2186. tok::kw_delete, tok::l_square) ||
  2187. LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
  2188. Previous->isSimpleTypeSpecifier())) {
  2189. return false;
  2190. }
  2191. if (FormatTok->is(tok::l_square))
  2192. return false;
  2193. if (FormatTok->is(tok::r_square)) {
  2194. const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
  2195. if (Next->is(tok::greater))
  2196. return false;
  2197. }
  2198. parseSquare(/*LambdaIntroducer=*/true);
  2199. return true;
  2200. }
  2201. void UnwrappedLineParser::tryToParseJSFunction() {
  2202. assert(FormatTok->is(Keywords.kw_function) ||
  2203. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
  2204. if (FormatTok->is(Keywords.kw_async))
  2205. nextToken();
  2206. // Consume "function".
  2207. nextToken();
  2208. // Consume * (generator function). Treat it like C++'s overloaded operators.
  2209. if (FormatTok->is(tok::star)) {
  2210. FormatTok->setFinalizedType(TT_OverloadedOperator);
  2211. nextToken();
  2212. }
  2213. // Consume function name.
  2214. if (FormatTok->is(tok::identifier))
  2215. nextToken();
  2216. if (FormatTok->isNot(tok::l_paren))
  2217. return;
  2218. // Parse formal parameter list.
  2219. parseParens();
  2220. if (FormatTok->is(tok::colon)) {
  2221. // Parse a type definition.
  2222. nextToken();
  2223. // Eat the type declaration. For braced inline object types, balance braces,
  2224. // otherwise just parse until finding an l_brace for the function body.
  2225. if (FormatTok->is(tok::l_brace))
  2226. tryToParseBracedList();
  2227. else
  2228. while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
  2229. nextToken();
  2230. }
  2231. if (FormatTok->is(tok::semi))
  2232. return;
  2233. parseChildBlock();
  2234. }
  2235. bool UnwrappedLineParser::tryToParseBracedList() {
  2236. if (FormatTok->is(BK_Unknown))
  2237. calculateBraceTypes();
  2238. assert(FormatTok->isNot(BK_Unknown));
  2239. if (FormatTok->is(BK_Block))
  2240. return false;
  2241. nextToken();
  2242. parseBracedList();
  2243. return true;
  2244. }
  2245. bool UnwrappedLineParser::tryToParseChildBlock() {
  2246. assert(Style.isJavaScript() || Style.isCSharp());
  2247. assert(FormatTok->is(TT_FatArrow));
  2248. // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
  2249. // They always start an expression or a child block if followed by a curly
  2250. // brace.
  2251. nextToken();
  2252. if (FormatTok->isNot(tok::l_brace))
  2253. return false;
  2254. parseChildBlock();
  2255. return true;
  2256. }
  2257. bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
  2258. bool IsEnum,
  2259. tok::TokenKind ClosingBraceKind) {
  2260. bool HasError = false;
  2261. // FIXME: Once we have an expression parser in the UnwrappedLineParser,
  2262. // replace this by using parseAssignmentExpression() inside.
  2263. do {
  2264. if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
  2265. tryToParseChildBlock()) {
  2266. continue;
  2267. }
  2268. if (Style.isJavaScript()) {
  2269. if (FormatTok->is(Keywords.kw_function) ||
  2270. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
  2271. tryToParseJSFunction();
  2272. continue;
  2273. }
  2274. if (FormatTok->is(tok::l_brace)) {
  2275. // Could be a method inside of a braced list `{a() { return 1; }}`.
  2276. if (tryToParseBracedList())
  2277. continue;
  2278. parseChildBlock();
  2279. }
  2280. }
  2281. if (FormatTok->Tok.getKind() == ClosingBraceKind) {
  2282. if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
  2283. addUnwrappedLine();
  2284. nextToken();
  2285. return !HasError;
  2286. }
  2287. switch (FormatTok->Tok.getKind()) {
  2288. case tok::l_square:
  2289. if (Style.isCSharp())
  2290. parseSquare();
  2291. else
  2292. tryToParseLambda();
  2293. break;
  2294. case tok::l_paren:
  2295. parseParens();
  2296. // JavaScript can just have free standing methods and getters/setters in
  2297. // object literals. Detect them by a "{" following ")".
  2298. if (Style.isJavaScript()) {
  2299. if (FormatTok->is(tok::l_brace))
  2300. parseChildBlock();
  2301. break;
  2302. }
  2303. break;
  2304. case tok::l_brace:
  2305. // Assume there are no blocks inside a braced init list apart
  2306. // from the ones we explicitly parse out (like lambdas).
  2307. FormatTok->setBlockKind(BK_BracedInit);
  2308. nextToken();
  2309. parseBracedList();
  2310. break;
  2311. case tok::less:
  2312. if (Style.Language == FormatStyle::LK_Proto ||
  2313. ClosingBraceKind == tok::greater) {
  2314. nextToken();
  2315. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  2316. /*ClosingBraceKind=*/tok::greater);
  2317. } else {
  2318. nextToken();
  2319. }
  2320. break;
  2321. case tok::semi:
  2322. // JavaScript (or more precisely TypeScript) can have semicolons in braced
  2323. // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
  2324. // used for error recovery if we have otherwise determined that this is
  2325. // a braced list.
  2326. if (Style.isJavaScript()) {
  2327. nextToken();
  2328. break;
  2329. }
  2330. HasError = true;
  2331. if (!ContinueOnSemicolons)
  2332. return !HasError;
  2333. nextToken();
  2334. break;
  2335. case tok::comma:
  2336. nextToken();
  2337. if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
  2338. addUnwrappedLine();
  2339. break;
  2340. default:
  2341. nextToken();
  2342. break;
  2343. }
  2344. } while (!eof());
  2345. return false;
  2346. }
  2347. /// \brief Parses a pair of parentheses (and everything between them).
  2348. /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
  2349. /// double ampersands. This only counts for the current parens scope.
  2350. void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
  2351. assert(FormatTok->is(tok::l_paren) && "'(' expected.");
  2352. nextToken();
  2353. do {
  2354. switch (FormatTok->Tok.getKind()) {
  2355. case tok::l_paren:
  2356. parseParens();
  2357. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
  2358. parseChildBlock();
  2359. break;
  2360. case tok::r_paren:
  2361. nextToken();
  2362. return;
  2363. case tok::r_brace:
  2364. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  2365. return;
  2366. case tok::l_square:
  2367. tryToParseLambda();
  2368. break;
  2369. case tok::l_brace:
  2370. if (!tryToParseBracedList())
  2371. parseChildBlock();
  2372. break;
  2373. case tok::at:
  2374. nextToken();
  2375. if (FormatTok->is(tok::l_brace)) {
  2376. nextToken();
  2377. parseBracedList();
  2378. }
  2379. break;
  2380. case tok::equal:
  2381. if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
  2382. tryToParseChildBlock();
  2383. else
  2384. nextToken();
  2385. break;
  2386. case tok::kw_class:
  2387. if (Style.isJavaScript())
  2388. parseRecord(/*ParseAsExpr=*/true);
  2389. else
  2390. nextToken();
  2391. break;
  2392. case tok::identifier:
  2393. if (Style.isJavaScript() &&
  2394. (FormatTok->is(Keywords.kw_function) ||
  2395. FormatTok->startsSequence(Keywords.kw_async,
  2396. Keywords.kw_function))) {
  2397. tryToParseJSFunction();
  2398. } else {
  2399. nextToken();
  2400. }
  2401. break;
  2402. case tok::kw_requires: {
  2403. auto RequiresToken = FormatTok;
  2404. nextToken();
  2405. parseRequiresExpression(RequiresToken);
  2406. break;
  2407. }
  2408. case tok::ampamp:
  2409. if (AmpAmpTokenType != TT_Unknown)
  2410. FormatTok->setFinalizedType(AmpAmpTokenType);
  2411. [[fallthrough]];
  2412. default:
  2413. nextToken();
  2414. break;
  2415. }
  2416. } while (!eof());
  2417. }
  2418. void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
  2419. if (!LambdaIntroducer) {
  2420. assert(FormatTok->is(tok::l_square) && "'[' expected.");
  2421. if (tryToParseLambda())
  2422. return;
  2423. }
  2424. do {
  2425. switch (FormatTok->Tok.getKind()) {
  2426. case tok::l_paren:
  2427. parseParens();
  2428. break;
  2429. case tok::r_square:
  2430. nextToken();
  2431. return;
  2432. case tok::r_brace:
  2433. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  2434. return;
  2435. case tok::l_square:
  2436. parseSquare();
  2437. break;
  2438. case tok::l_brace: {
  2439. if (!tryToParseBracedList())
  2440. parseChildBlock();
  2441. break;
  2442. }
  2443. case tok::at:
  2444. nextToken();
  2445. if (FormatTok->is(tok::l_brace)) {
  2446. nextToken();
  2447. parseBracedList();
  2448. }
  2449. break;
  2450. default:
  2451. nextToken();
  2452. break;
  2453. }
  2454. } while (!eof());
  2455. }
  2456. void UnwrappedLineParser::keepAncestorBraces() {
  2457. if (!Style.RemoveBracesLLVM)
  2458. return;
  2459. const int MaxNestingLevels = 2;
  2460. const int Size = NestedTooDeep.size();
  2461. if (Size >= MaxNestingLevels)
  2462. NestedTooDeep[Size - MaxNestingLevels] = true;
  2463. NestedTooDeep.push_back(false);
  2464. }
  2465. static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
  2466. for (const auto &Token : llvm::reverse(Line.Tokens))
  2467. if (Token.Tok->isNot(tok::comment))
  2468. return Token.Tok;
  2469. return nullptr;
  2470. }
  2471. void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
  2472. FormatToken *Tok = nullptr;
  2473. if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
  2474. PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
  2475. Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
  2476. ? getLastNonComment(*Line)
  2477. : Line->Tokens.back().Tok;
  2478. assert(Tok);
  2479. if (Tok->BraceCount < 0) {
  2480. assert(Tok->BraceCount == -1);
  2481. Tok = nullptr;
  2482. } else {
  2483. Tok->BraceCount = -1;
  2484. }
  2485. }
  2486. addUnwrappedLine();
  2487. ++Line->Level;
  2488. parseStructuralElement();
  2489. if (Tok) {
  2490. assert(!Line->InPPDirective);
  2491. Tok = nullptr;
  2492. for (const auto &L : llvm::reverse(*CurrentLines)) {
  2493. if (!L.InPPDirective && getLastNonComment(L)) {
  2494. Tok = L.Tokens.back().Tok;
  2495. break;
  2496. }
  2497. }
  2498. assert(Tok);
  2499. ++Tok->BraceCount;
  2500. }
  2501. if (CheckEOF && eof())
  2502. addUnwrappedLine();
  2503. --Line->Level;
  2504. }
  2505. static void markOptionalBraces(FormatToken *LeftBrace) {
  2506. if (!LeftBrace)
  2507. return;
  2508. assert(LeftBrace->is(tok::l_brace));
  2509. FormatToken *RightBrace = LeftBrace->MatchingParen;
  2510. if (!RightBrace) {
  2511. assert(!LeftBrace->Optional);
  2512. return;
  2513. }
  2514. assert(RightBrace->is(tok::r_brace));
  2515. assert(RightBrace->MatchingParen == LeftBrace);
  2516. assert(LeftBrace->Optional == RightBrace->Optional);
  2517. LeftBrace->Optional = true;
  2518. RightBrace->Optional = true;
  2519. }
  2520. void UnwrappedLineParser::handleAttributes() {
  2521. // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
  2522. if (FormatTok->is(TT_AttributeMacro))
  2523. nextToken();
  2524. handleCppAttributes();
  2525. }
  2526. bool UnwrappedLineParser::handleCppAttributes() {
  2527. // Handle [[likely]] / [[unlikely]] attributes.
  2528. if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
  2529. parseSquare();
  2530. return true;
  2531. }
  2532. return false;
  2533. }
  2534. /// Returns whether \c Tok begins a block.
  2535. bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
  2536. // FIXME: rename the function or make
  2537. // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
  2538. return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
  2539. : Tok.is(tok::l_brace);
  2540. }
  2541. FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
  2542. bool KeepBraces) {
  2543. assert(FormatTok->is(tok::kw_if) && "'if' expected");
  2544. nextToken();
  2545. if (FormatTok->is(tok::exclaim))
  2546. nextToken();
  2547. bool KeepIfBraces = true;
  2548. if (FormatTok->is(tok::kw_consteval)) {
  2549. nextToken();
  2550. } else {
  2551. KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
  2552. if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
  2553. nextToken();
  2554. if (FormatTok->is(tok::l_paren))
  2555. parseParens();
  2556. }
  2557. handleAttributes();
  2558. bool NeedsUnwrappedLine = false;
  2559. keepAncestorBraces();
  2560. FormatToken *IfLeftBrace = nullptr;
  2561. IfStmtKind IfBlockKind = IfStmtKind::NotIf;
  2562. if (isBlockBegin(*FormatTok)) {
  2563. FormatTok->setFinalizedType(TT_ControlStatementLBrace);
  2564. IfLeftBrace = FormatTok;
  2565. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2566. parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
  2567. /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
  2568. if (Style.BraceWrapping.BeforeElse)
  2569. addUnwrappedLine();
  2570. else
  2571. NeedsUnwrappedLine = true;
  2572. } else {
  2573. parseUnbracedBody();
  2574. }
  2575. if (Style.RemoveBracesLLVM) {
  2576. assert(!NestedTooDeep.empty());
  2577. KeepIfBraces = KeepIfBraces ||
  2578. (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
  2579. NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
  2580. IfBlockKind == IfStmtKind::IfElseIf;
  2581. }
  2582. bool KeepElseBraces = KeepIfBraces;
  2583. FormatToken *ElseLeftBrace = nullptr;
  2584. IfStmtKind Kind = IfStmtKind::IfOnly;
  2585. if (FormatTok->is(tok::kw_else)) {
  2586. if (Style.RemoveBracesLLVM) {
  2587. NestedTooDeep.back() = false;
  2588. Kind = IfStmtKind::IfElse;
  2589. }
  2590. nextToken();
  2591. handleAttributes();
  2592. if (isBlockBegin(*FormatTok)) {
  2593. const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
  2594. FormatTok->setFinalizedType(TT_ElseLBrace);
  2595. ElseLeftBrace = FormatTok;
  2596. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2597. IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
  2598. FormatToken *IfLBrace =
  2599. parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
  2600. /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
  2601. if (FormatTok->is(tok::kw_else)) {
  2602. KeepElseBraces = KeepElseBraces ||
  2603. ElseBlockKind == IfStmtKind::IfOnly ||
  2604. ElseBlockKind == IfStmtKind::IfElseIf;
  2605. } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
  2606. KeepElseBraces = true;
  2607. assert(ElseLeftBrace->MatchingParen);
  2608. markOptionalBraces(ElseLeftBrace);
  2609. }
  2610. addUnwrappedLine();
  2611. } else if (FormatTok->is(tok::kw_if)) {
  2612. const FormatToken *Previous = Tokens->getPreviousToken();
  2613. assert(Previous);
  2614. const bool IsPrecededByComment = Previous->is(tok::comment);
  2615. if (IsPrecededByComment) {
  2616. addUnwrappedLine();
  2617. ++Line->Level;
  2618. }
  2619. bool TooDeep = true;
  2620. if (Style.RemoveBracesLLVM) {
  2621. Kind = IfStmtKind::IfElseIf;
  2622. TooDeep = NestedTooDeep.pop_back_val();
  2623. }
  2624. ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
  2625. if (Style.RemoveBracesLLVM)
  2626. NestedTooDeep.push_back(TooDeep);
  2627. if (IsPrecededByComment)
  2628. --Line->Level;
  2629. } else {
  2630. parseUnbracedBody(/*CheckEOF=*/true);
  2631. }
  2632. } else {
  2633. KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
  2634. if (NeedsUnwrappedLine)
  2635. addUnwrappedLine();
  2636. }
  2637. if (!Style.RemoveBracesLLVM)
  2638. return nullptr;
  2639. assert(!NestedTooDeep.empty());
  2640. KeepElseBraces = KeepElseBraces ||
  2641. (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
  2642. NestedTooDeep.back();
  2643. NestedTooDeep.pop_back();
  2644. if (!KeepIfBraces && !KeepElseBraces) {
  2645. markOptionalBraces(IfLeftBrace);
  2646. markOptionalBraces(ElseLeftBrace);
  2647. } else if (IfLeftBrace) {
  2648. FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
  2649. if (IfRightBrace) {
  2650. assert(IfRightBrace->MatchingParen == IfLeftBrace);
  2651. assert(!IfLeftBrace->Optional);
  2652. assert(!IfRightBrace->Optional);
  2653. IfLeftBrace->MatchingParen = nullptr;
  2654. IfRightBrace->MatchingParen = nullptr;
  2655. }
  2656. }
  2657. if (IfKind)
  2658. *IfKind = Kind;
  2659. return IfLeftBrace;
  2660. }
  2661. void UnwrappedLineParser::parseTryCatch() {
  2662. assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
  2663. nextToken();
  2664. bool NeedsUnwrappedLine = false;
  2665. if (FormatTok->is(tok::colon)) {
  2666. // We are in a function try block, what comes is an initializer list.
  2667. nextToken();
  2668. // In case identifiers were removed by clang-tidy, what might follow is
  2669. // multiple commas in sequence - before the first identifier.
  2670. while (FormatTok->is(tok::comma))
  2671. nextToken();
  2672. while (FormatTok->is(tok::identifier)) {
  2673. nextToken();
  2674. if (FormatTok->is(tok::l_paren))
  2675. parseParens();
  2676. if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
  2677. FormatTok->is(tok::l_brace)) {
  2678. do {
  2679. nextToken();
  2680. } while (!FormatTok->is(tok::r_brace));
  2681. nextToken();
  2682. }
  2683. // In case identifiers were removed by clang-tidy, what might follow is
  2684. // multiple commas in sequence - after the first identifier.
  2685. while (FormatTok->is(tok::comma))
  2686. nextToken();
  2687. }
  2688. }
  2689. // Parse try with resource.
  2690. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
  2691. parseParens();
  2692. keepAncestorBraces();
  2693. if (FormatTok->is(tok::l_brace)) {
  2694. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2695. parseBlock();
  2696. if (Style.BraceWrapping.BeforeCatch)
  2697. addUnwrappedLine();
  2698. else
  2699. NeedsUnwrappedLine = true;
  2700. } else if (!FormatTok->is(tok::kw_catch)) {
  2701. // The C++ standard requires a compound-statement after a try.
  2702. // If there's none, we try to assume there's a structuralElement
  2703. // and try to continue.
  2704. addUnwrappedLine();
  2705. ++Line->Level;
  2706. parseStructuralElement();
  2707. --Line->Level;
  2708. }
  2709. while (true) {
  2710. if (FormatTok->is(tok::at))
  2711. nextToken();
  2712. if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
  2713. tok::kw___finally) ||
  2714. ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
  2715. FormatTok->is(Keywords.kw_finally)) ||
  2716. (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
  2717. FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
  2718. break;
  2719. }
  2720. nextToken();
  2721. while (FormatTok->isNot(tok::l_brace)) {
  2722. if (FormatTok->is(tok::l_paren)) {
  2723. parseParens();
  2724. continue;
  2725. }
  2726. if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
  2727. if (Style.RemoveBracesLLVM)
  2728. NestedTooDeep.pop_back();
  2729. return;
  2730. }
  2731. nextToken();
  2732. }
  2733. NeedsUnwrappedLine = false;
  2734. Line->MustBeDeclaration = false;
  2735. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2736. parseBlock();
  2737. if (Style.BraceWrapping.BeforeCatch)
  2738. addUnwrappedLine();
  2739. else
  2740. NeedsUnwrappedLine = true;
  2741. }
  2742. if (Style.RemoveBracesLLVM)
  2743. NestedTooDeep.pop_back();
  2744. if (NeedsUnwrappedLine)
  2745. addUnwrappedLine();
  2746. }
  2747. void UnwrappedLineParser::parseNamespace() {
  2748. assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  2749. "'namespace' expected");
  2750. const FormatToken &InitialToken = *FormatTok;
  2751. nextToken();
  2752. if (InitialToken.is(TT_NamespaceMacro)) {
  2753. parseParens();
  2754. } else {
  2755. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
  2756. tok::l_square, tok::period, tok::l_paren) ||
  2757. (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
  2758. if (FormatTok->is(tok::l_square))
  2759. parseSquare();
  2760. else if (FormatTok->is(tok::l_paren))
  2761. parseParens();
  2762. else
  2763. nextToken();
  2764. }
  2765. }
  2766. if (FormatTok->is(tok::l_brace)) {
  2767. if (ShouldBreakBeforeBrace(Style, InitialToken))
  2768. addUnwrappedLine();
  2769. unsigned AddLevels =
  2770. Style.NamespaceIndentation == FormatStyle::NI_All ||
  2771. (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
  2772. DeclarationScopeStack.size() > 1)
  2773. ? 1u
  2774. : 0u;
  2775. bool ManageWhitesmithsBraces =
  2776. AddLevels == 0u &&
  2777. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
  2778. // If we're in Whitesmiths mode, indent the brace if we're not indenting
  2779. // the whole block.
  2780. if (ManageWhitesmithsBraces)
  2781. ++Line->Level;
  2782. parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
  2783. /*KeepBraces=*/true, /*IfKind=*/nullptr,
  2784. ManageWhitesmithsBraces);
  2785. // Munch the semicolon after a namespace. This is more common than one would
  2786. // think. Putting the semicolon into its own line is very ugly.
  2787. if (FormatTok->is(tok::semi))
  2788. nextToken();
  2789. addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
  2790. if (ManageWhitesmithsBraces)
  2791. --Line->Level;
  2792. }
  2793. // FIXME: Add error handling.
  2794. }
  2795. void UnwrappedLineParser::parseNew() {
  2796. assert(FormatTok->is(tok::kw_new) && "'new' expected");
  2797. nextToken();
  2798. if (Style.isCSharp()) {
  2799. do {
  2800. // Handle constructor invocation, e.g. `new(field: value)`.
  2801. if (FormatTok->is(tok::l_paren))
  2802. parseParens();
  2803. // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
  2804. if (FormatTok->is(tok::l_brace))
  2805. parseBracedList();
  2806. if (FormatTok->isOneOf(tok::semi, tok::comma))
  2807. return;
  2808. nextToken();
  2809. } while (!eof());
  2810. }
  2811. if (Style.Language != FormatStyle::LK_Java)
  2812. return;
  2813. // In Java, we can parse everything up to the parens, which aren't optional.
  2814. do {
  2815. // There should not be a ;, { or } before the new's open paren.
  2816. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
  2817. return;
  2818. // Consume the parens.
  2819. if (FormatTok->is(tok::l_paren)) {
  2820. parseParens();
  2821. // If there is a class body of an anonymous class, consume that as child.
  2822. if (FormatTok->is(tok::l_brace))
  2823. parseChildBlock();
  2824. return;
  2825. }
  2826. nextToken();
  2827. } while (!eof());
  2828. }
  2829. void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
  2830. keepAncestorBraces();
  2831. if (isBlockBegin(*FormatTok)) {
  2832. if (!KeepBraces)
  2833. FormatTok->setFinalizedType(TT_ControlStatementLBrace);
  2834. FormatToken *LeftBrace = FormatTok;
  2835. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2836. parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
  2837. /*MunchSemi=*/true, KeepBraces);
  2838. if (!KeepBraces) {
  2839. assert(!NestedTooDeep.empty());
  2840. if (!NestedTooDeep.back())
  2841. markOptionalBraces(LeftBrace);
  2842. }
  2843. if (WrapRightBrace)
  2844. addUnwrappedLine();
  2845. } else {
  2846. parseUnbracedBody();
  2847. }
  2848. if (!KeepBraces)
  2849. NestedTooDeep.pop_back();
  2850. }
  2851. void UnwrappedLineParser::parseForOrWhileLoop() {
  2852. assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
  2853. "'for', 'while' or foreach macro expected");
  2854. const bool KeepBraces = !Style.RemoveBracesLLVM ||
  2855. !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
  2856. nextToken();
  2857. // JS' for await ( ...
  2858. if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
  2859. nextToken();
  2860. if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
  2861. nextToken();
  2862. if (FormatTok->is(tok::l_paren))
  2863. parseParens();
  2864. handleAttributes();
  2865. parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
  2866. }
  2867. void UnwrappedLineParser::parseDoWhile() {
  2868. assert(FormatTok->is(tok::kw_do) && "'do' expected");
  2869. nextToken();
  2870. parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
  2871. // FIXME: Add error handling.
  2872. if (!FormatTok->is(tok::kw_while)) {
  2873. addUnwrappedLine();
  2874. return;
  2875. }
  2876. // If in Whitesmiths mode, the line with the while() needs to be indented
  2877. // to the same level as the block.
  2878. if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
  2879. ++Line->Level;
  2880. nextToken();
  2881. parseStructuralElement();
  2882. }
  2883. void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
  2884. nextToken();
  2885. unsigned OldLineLevel = Line->Level;
  2886. if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
  2887. --Line->Level;
  2888. if (LeftAlignLabel)
  2889. Line->Level = 0;
  2890. if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
  2891. FormatTok->is(tok::l_brace)) {
  2892. CompoundStatementIndenter Indenter(this, Line->Level,
  2893. Style.BraceWrapping.AfterCaseLabel,
  2894. Style.BraceWrapping.IndentBraces);
  2895. parseBlock();
  2896. if (FormatTok->is(tok::kw_break)) {
  2897. if (Style.BraceWrapping.AfterControlStatement ==
  2898. FormatStyle::BWACS_Always) {
  2899. addUnwrappedLine();
  2900. if (!Style.IndentCaseBlocks &&
  2901. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
  2902. ++Line->Level;
  2903. }
  2904. }
  2905. parseStructuralElement();
  2906. }
  2907. addUnwrappedLine();
  2908. } else {
  2909. if (FormatTok->is(tok::semi))
  2910. nextToken();
  2911. addUnwrappedLine();
  2912. }
  2913. Line->Level = OldLineLevel;
  2914. if (FormatTok->isNot(tok::l_brace)) {
  2915. parseStructuralElement();
  2916. addUnwrappedLine();
  2917. }
  2918. }
  2919. void UnwrappedLineParser::parseCaseLabel() {
  2920. assert(FormatTok->is(tok::kw_case) && "'case' expected");
  2921. // FIXME: fix handling of complex expressions here.
  2922. do {
  2923. nextToken();
  2924. } while (!eof() && !FormatTok->is(tok::colon));
  2925. parseLabel();
  2926. }
  2927. void UnwrappedLineParser::parseSwitch() {
  2928. assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
  2929. nextToken();
  2930. if (FormatTok->is(tok::l_paren))
  2931. parseParens();
  2932. keepAncestorBraces();
  2933. if (FormatTok->is(tok::l_brace)) {
  2934. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  2935. parseBlock();
  2936. addUnwrappedLine();
  2937. } else {
  2938. addUnwrappedLine();
  2939. ++Line->Level;
  2940. parseStructuralElement();
  2941. --Line->Level;
  2942. }
  2943. if (Style.RemoveBracesLLVM)
  2944. NestedTooDeep.pop_back();
  2945. }
  2946. // Operators that can follow a C variable.
  2947. static bool isCOperatorFollowingVar(tok::TokenKind kind) {
  2948. switch (kind) {
  2949. case tok::ampamp:
  2950. case tok::ampequal:
  2951. case tok::arrow:
  2952. case tok::caret:
  2953. case tok::caretequal:
  2954. case tok::comma:
  2955. case tok::ellipsis:
  2956. case tok::equal:
  2957. case tok::equalequal:
  2958. case tok::exclaim:
  2959. case tok::exclaimequal:
  2960. case tok::greater:
  2961. case tok::greaterequal:
  2962. case tok::greatergreater:
  2963. case tok::greatergreaterequal:
  2964. case tok::l_paren:
  2965. case tok::l_square:
  2966. case tok::less:
  2967. case tok::lessequal:
  2968. case tok::lessless:
  2969. case tok::lesslessequal:
  2970. case tok::minus:
  2971. case tok::minusequal:
  2972. case tok::minusminus:
  2973. case tok::percent:
  2974. case tok::percentequal:
  2975. case tok::period:
  2976. case tok::pipe:
  2977. case tok::pipeequal:
  2978. case tok::pipepipe:
  2979. case tok::plus:
  2980. case tok::plusequal:
  2981. case tok::plusplus:
  2982. case tok::question:
  2983. case tok::r_brace:
  2984. case tok::r_paren:
  2985. case tok::r_square:
  2986. case tok::semi:
  2987. case tok::slash:
  2988. case tok::slashequal:
  2989. case tok::star:
  2990. case tok::starequal:
  2991. return true;
  2992. default:
  2993. return false;
  2994. }
  2995. }
  2996. void UnwrappedLineParser::parseAccessSpecifier() {
  2997. FormatToken *AccessSpecifierCandidate = FormatTok;
  2998. nextToken();
  2999. // Understand Qt's slots.
  3000. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
  3001. nextToken();
  3002. // Otherwise, we don't know what it is, and we'd better keep the next token.
  3003. if (FormatTok->is(tok::colon)) {
  3004. nextToken();
  3005. addUnwrappedLine();
  3006. } else if (!FormatTok->is(tok::coloncolon) &&
  3007. !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
  3008. // Not a variable name nor namespace name.
  3009. addUnwrappedLine();
  3010. } else if (AccessSpecifierCandidate) {
  3011. // Consider the access specifier to be a C identifier.
  3012. AccessSpecifierCandidate->Tok.setKind(tok::identifier);
  3013. }
  3014. }
  3015. /// \brief Parses a requires, decides if it is a clause or an expression.
  3016. /// \pre The current token has to be the requires keyword.
  3017. /// \returns true if it parsed a clause.
  3018. bool clang::format::UnwrappedLineParser::parseRequires() {
  3019. assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
  3020. auto RequiresToken = FormatTok;
  3021. // We try to guess if it is a requires clause, or a requires expression. For
  3022. // that we first consume the keyword and check the next token.
  3023. nextToken();
  3024. switch (FormatTok->Tok.getKind()) {
  3025. case tok::l_brace:
  3026. // This can only be an expression, never a clause.
  3027. parseRequiresExpression(RequiresToken);
  3028. return false;
  3029. case tok::l_paren:
  3030. // Clauses and expression can start with a paren, it's unclear what we have.
  3031. break;
  3032. default:
  3033. // All other tokens can only be a clause.
  3034. parseRequiresClause(RequiresToken);
  3035. return true;
  3036. }
  3037. // Looking forward we would have to decide if there are function declaration
  3038. // like arguments to the requires expression:
  3039. // requires (T t) {
  3040. // Or there is a constraint expression for the requires clause:
  3041. // requires (C<T> && ...
  3042. // But first let's look behind.
  3043. auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
  3044. if (!PreviousNonComment ||
  3045. PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
  3046. // If there is no token, or an expression left brace, we are a requires
  3047. // clause within a requires expression.
  3048. parseRequiresClause(RequiresToken);
  3049. return true;
  3050. }
  3051. switch (PreviousNonComment->Tok.getKind()) {
  3052. case tok::greater:
  3053. case tok::r_paren:
  3054. case tok::kw_noexcept:
  3055. case tok::kw_const:
  3056. // This is a requires clause.
  3057. parseRequiresClause(RequiresToken);
  3058. return true;
  3059. case tok::amp:
  3060. case tok::ampamp: {
  3061. // This can be either:
  3062. // if (... && requires (T t) ...)
  3063. // Or
  3064. // void member(...) && requires (C<T> ...
  3065. // We check the one token before that for a const:
  3066. // void member(...) const && requires (C<T> ...
  3067. auto PrevPrev = PreviousNonComment->getPreviousNonComment();
  3068. if (PrevPrev && PrevPrev->is(tok::kw_const)) {
  3069. parseRequiresClause(RequiresToken);
  3070. return true;
  3071. }
  3072. break;
  3073. }
  3074. default:
  3075. if (PreviousNonComment->isTypeOrIdentifier()) {
  3076. // This is a requires clause.
  3077. parseRequiresClause(RequiresToken);
  3078. return true;
  3079. }
  3080. // It's an expression.
  3081. parseRequiresExpression(RequiresToken);
  3082. return false;
  3083. }
  3084. // Now we look forward and try to check if the paren content is a parameter
  3085. // list. The parameters can be cv-qualified and contain references or
  3086. // pointers.
  3087. // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
  3088. // of stuff: typename, const, *, &, &&, ::, identifiers.
  3089. unsigned StoredPosition = Tokens->getPosition();
  3090. FormatToken *NextToken = Tokens->getNextToken();
  3091. int Lookahead = 0;
  3092. auto PeekNext = [&Lookahead, &NextToken, this] {
  3093. ++Lookahead;
  3094. NextToken = Tokens->getNextToken();
  3095. };
  3096. bool FoundType = false;
  3097. bool LastWasColonColon = false;
  3098. int OpenAngles = 0;
  3099. for (; Lookahead < 50; PeekNext()) {
  3100. switch (NextToken->Tok.getKind()) {
  3101. case tok::kw_volatile:
  3102. case tok::kw_const:
  3103. case tok::comma:
  3104. FormatTok = Tokens->setPosition(StoredPosition);
  3105. parseRequiresExpression(RequiresToken);
  3106. return false;
  3107. case tok::r_paren:
  3108. case tok::pipepipe:
  3109. FormatTok = Tokens->setPosition(StoredPosition);
  3110. parseRequiresClause(RequiresToken);
  3111. return true;
  3112. case tok::eof:
  3113. // Break out of the loop.
  3114. Lookahead = 50;
  3115. break;
  3116. case tok::coloncolon:
  3117. LastWasColonColon = true;
  3118. break;
  3119. case tok::identifier:
  3120. if (FoundType && !LastWasColonColon && OpenAngles == 0) {
  3121. FormatTok = Tokens->setPosition(StoredPosition);
  3122. parseRequiresExpression(RequiresToken);
  3123. return false;
  3124. }
  3125. FoundType = true;
  3126. LastWasColonColon = false;
  3127. break;
  3128. case tok::less:
  3129. ++OpenAngles;
  3130. break;
  3131. case tok::greater:
  3132. --OpenAngles;
  3133. break;
  3134. default:
  3135. if (NextToken->isSimpleTypeSpecifier()) {
  3136. FormatTok = Tokens->setPosition(StoredPosition);
  3137. parseRequiresExpression(RequiresToken);
  3138. return false;
  3139. }
  3140. break;
  3141. }
  3142. }
  3143. // This seems to be a complicated expression, just assume it's a clause.
  3144. FormatTok = Tokens->setPosition(StoredPosition);
  3145. parseRequiresClause(RequiresToken);
  3146. return true;
  3147. }
  3148. /// \brief Parses a requires clause.
  3149. /// \param RequiresToken The requires keyword token, which starts this clause.
  3150. /// \pre We need to be on the next token after the requires keyword.
  3151. /// \sa parseRequiresExpression
  3152. ///
  3153. /// Returns if it either has finished parsing the clause, or it detects, that
  3154. /// the clause is incorrect.
  3155. void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
  3156. assert(FormatTok->getPreviousNonComment() == RequiresToken);
  3157. assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
  3158. // If there is no previous token, we are within a requires expression,
  3159. // otherwise we will always have the template or function declaration in front
  3160. // of it.
  3161. bool InRequiresExpression =
  3162. !RequiresToken->Previous ||
  3163. RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
  3164. RequiresToken->setFinalizedType(InRequiresExpression
  3165. ? TT_RequiresClauseInARequiresExpression
  3166. : TT_RequiresClause);
  3167. // NOTE: parseConstraintExpression is only ever called from this function.
  3168. // It could be inlined into here.
  3169. parseConstraintExpression();
  3170. if (!InRequiresExpression)
  3171. FormatTok->Previous->ClosesRequiresClause = true;
  3172. }
  3173. /// \brief Parses a requires expression.
  3174. /// \param RequiresToken The requires keyword token, which starts this clause.
  3175. /// \pre We need to be on the next token after the requires keyword.
  3176. /// \sa parseRequiresClause
  3177. ///
  3178. /// Returns if it either has finished parsing the expression, or it detects,
  3179. /// that the expression is incorrect.
  3180. void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
  3181. assert(FormatTok->getPreviousNonComment() == RequiresToken);
  3182. assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
  3183. RequiresToken->setFinalizedType(TT_RequiresExpression);
  3184. if (FormatTok->is(tok::l_paren)) {
  3185. FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
  3186. parseParens();
  3187. }
  3188. if (FormatTok->is(tok::l_brace)) {
  3189. FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
  3190. parseChildBlock(/*CanContainBracedList=*/false,
  3191. /*NextLBracesType=*/TT_CompoundRequirementLBrace);
  3192. }
  3193. }
  3194. /// \brief Parses a constraint expression.
  3195. ///
  3196. /// This is the body of a requires clause. It returns, when the parsing is
  3197. /// complete, or the expression is incorrect.
  3198. void UnwrappedLineParser::parseConstraintExpression() {
  3199. // The special handling for lambdas is needed since tryToParseLambda() eats a
  3200. // token and if a requires expression is the last part of a requires clause
  3201. // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
  3202. // not set on the correct token. Thus we need to be aware if we even expect a
  3203. // lambda to be possible.
  3204. // template <typename T> requires requires { ... } [[nodiscard]] ...;
  3205. bool LambdaNextTimeAllowed = true;
  3206. do {
  3207. bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
  3208. switch (FormatTok->Tok.getKind()) {
  3209. case tok::kw_requires: {
  3210. auto RequiresToken = FormatTok;
  3211. nextToken();
  3212. parseRequiresExpression(RequiresToken);
  3213. break;
  3214. }
  3215. case tok::l_paren:
  3216. parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
  3217. break;
  3218. case tok::l_square:
  3219. if (!LambdaThisTimeAllowed || !tryToParseLambda())
  3220. return;
  3221. break;
  3222. case tok::kw_const:
  3223. case tok::semi:
  3224. case tok::kw_class:
  3225. case tok::kw_struct:
  3226. case tok::kw_union:
  3227. return;
  3228. case tok::l_brace:
  3229. // Potential function body.
  3230. return;
  3231. case tok::ampamp:
  3232. case tok::pipepipe:
  3233. FormatTok->setFinalizedType(TT_BinaryOperator);
  3234. nextToken();
  3235. LambdaNextTimeAllowed = true;
  3236. break;
  3237. case tok::comma:
  3238. case tok::comment:
  3239. LambdaNextTimeAllowed = LambdaThisTimeAllowed;
  3240. nextToken();
  3241. break;
  3242. case tok::kw_sizeof:
  3243. case tok::greater:
  3244. case tok::greaterequal:
  3245. case tok::greatergreater:
  3246. case tok::less:
  3247. case tok::lessequal:
  3248. case tok::lessless:
  3249. case tok::equalequal:
  3250. case tok::exclaim:
  3251. case tok::exclaimequal:
  3252. case tok::plus:
  3253. case tok::minus:
  3254. case tok::star:
  3255. case tok::slash:
  3256. LambdaNextTimeAllowed = true;
  3257. // Just eat them.
  3258. nextToken();
  3259. break;
  3260. case tok::numeric_constant:
  3261. case tok::coloncolon:
  3262. case tok::kw_true:
  3263. case tok::kw_false:
  3264. // Just eat them.
  3265. nextToken();
  3266. break;
  3267. case tok::kw_static_cast:
  3268. case tok::kw_const_cast:
  3269. case tok::kw_reinterpret_cast:
  3270. case tok::kw_dynamic_cast:
  3271. nextToken();
  3272. if (!FormatTok->is(tok::less))
  3273. return;
  3274. nextToken();
  3275. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  3276. /*ClosingBraceKind=*/tok::greater);
  3277. break;
  3278. case tok::kw_bool:
  3279. // bool is only allowed if it is directly followed by a paren for a cast:
  3280. // concept C = bool(...);
  3281. // and bool is the only type, all other types as cast must be inside a
  3282. // cast to bool an thus are handled by the other cases.
  3283. if (Tokens->peekNextToken()->isNot(tok::l_paren))
  3284. return;
  3285. nextToken();
  3286. parseParens();
  3287. break;
  3288. default:
  3289. if (!FormatTok->Tok.getIdentifierInfo()) {
  3290. // Identifiers are part of the default case, we check for more then
  3291. // tok::identifier to handle builtin type traits.
  3292. return;
  3293. }
  3294. // We need to differentiate identifiers for a template deduction guide,
  3295. // variables, or function return types (the constraint expression has
  3296. // ended before that), and basically all other cases. But it's easier to
  3297. // check the other way around.
  3298. assert(FormatTok->Previous);
  3299. switch (FormatTok->Previous->Tok.getKind()) {
  3300. case tok::coloncolon: // Nested identifier.
  3301. case tok::ampamp: // Start of a function or variable for the
  3302. case tok::pipepipe: // constraint expression. (binary)
  3303. case tok::exclaim: // The same as above, but unary.
  3304. case tok::kw_requires: // Initial identifier of a requires clause.
  3305. case tok::equal: // Initial identifier of a concept declaration.
  3306. break;
  3307. default:
  3308. return;
  3309. }
  3310. // Read identifier with optional template declaration.
  3311. nextToken();
  3312. if (FormatTok->is(tok::less)) {
  3313. nextToken();
  3314. parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
  3315. /*ClosingBraceKind=*/tok::greater);
  3316. }
  3317. break;
  3318. }
  3319. } while (!eof());
  3320. }
  3321. bool UnwrappedLineParser::parseEnum() {
  3322. const FormatToken &InitialToken = *FormatTok;
  3323. // Won't be 'enum' for NS_ENUMs.
  3324. if (FormatTok->is(tok::kw_enum))
  3325. nextToken();
  3326. // In TypeScript, "enum" can also be used as property name, e.g. in interface
  3327. // declarations. An "enum" keyword followed by a colon would be a syntax
  3328. // error and thus assume it is just an identifier.
  3329. if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
  3330. return false;
  3331. // In protobuf, "enum" can be used as a field name.
  3332. if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
  3333. return false;
  3334. // Eat up enum class ...
  3335. if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
  3336. nextToken();
  3337. while (FormatTok->Tok.getIdentifierInfo() ||
  3338. FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
  3339. tok::greater, tok::comma, tok::question,
  3340. tok::l_square, tok::r_square)) {
  3341. nextToken();
  3342. // We can have macros or attributes in between 'enum' and the enum name.
  3343. if (FormatTok->is(tok::l_paren))
  3344. parseParens();
  3345. if (FormatTok->is(TT_AttributeSquare)) {
  3346. parseSquare();
  3347. // Consume the closing TT_AttributeSquare.
  3348. if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
  3349. nextToken();
  3350. }
  3351. if (FormatTok->is(tok::identifier)) {
  3352. nextToken();
  3353. // If there are two identifiers in a row, this is likely an elaborate
  3354. // return type. In Java, this can be "implements", etc.
  3355. if (Style.isCpp() && FormatTok->is(tok::identifier))
  3356. return false;
  3357. }
  3358. }
  3359. // Just a declaration or something is wrong.
  3360. if (FormatTok->isNot(tok::l_brace))
  3361. return true;
  3362. FormatTok->setFinalizedType(TT_EnumLBrace);
  3363. FormatTok->setBlockKind(BK_Block);
  3364. if (Style.Language == FormatStyle::LK_Java) {
  3365. // Java enums are different.
  3366. parseJavaEnumBody();
  3367. return true;
  3368. }
  3369. if (Style.Language == FormatStyle::LK_Proto) {
  3370. parseBlock(/*MustBeDeclaration=*/true);
  3371. return true;
  3372. }
  3373. if (!Style.AllowShortEnumsOnASingleLine &&
  3374. ShouldBreakBeforeBrace(Style, InitialToken)) {
  3375. addUnwrappedLine();
  3376. }
  3377. // Parse enum body.
  3378. nextToken();
  3379. if (!Style.AllowShortEnumsOnASingleLine) {
  3380. addUnwrappedLine();
  3381. Line->Level += 1;
  3382. }
  3383. bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
  3384. /*IsEnum=*/true);
  3385. if (!Style.AllowShortEnumsOnASingleLine)
  3386. Line->Level -= 1;
  3387. if (HasError) {
  3388. if (FormatTok->is(tok::semi))
  3389. nextToken();
  3390. addUnwrappedLine();
  3391. }
  3392. return true;
  3393. // There is no addUnwrappedLine() here so that we fall through to parsing a
  3394. // structural element afterwards. Thus, in "enum A {} n, m;",
  3395. // "} n, m;" will end up in one unwrapped line.
  3396. }
  3397. bool UnwrappedLineParser::parseStructLike() {
  3398. // parseRecord falls through and does not yet add an unwrapped line as a
  3399. // record declaration or definition can start a structural element.
  3400. parseRecord();
  3401. // This does not apply to Java, JavaScript and C#.
  3402. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
  3403. Style.isCSharp()) {
  3404. if (FormatTok->is(tok::semi))
  3405. nextToken();
  3406. addUnwrappedLine();
  3407. return true;
  3408. }
  3409. return false;
  3410. }
  3411. namespace {
  3412. // A class used to set and restore the Token position when peeking
  3413. // ahead in the token source.
  3414. class ScopedTokenPosition {
  3415. unsigned StoredPosition;
  3416. FormatTokenSource *Tokens;
  3417. public:
  3418. ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
  3419. assert(Tokens && "Tokens expected to not be null");
  3420. StoredPosition = Tokens->getPosition();
  3421. }
  3422. ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
  3423. };
  3424. } // namespace
  3425. // Look to see if we have [[ by looking ahead, if
  3426. // its not then rewind to the original position.
  3427. bool UnwrappedLineParser::tryToParseSimpleAttribute() {
  3428. ScopedTokenPosition AutoPosition(Tokens);
  3429. FormatToken *Tok = Tokens->getNextToken();
  3430. // We already read the first [ check for the second.
  3431. if (!Tok->is(tok::l_square))
  3432. return false;
  3433. // Double check that the attribute is just something
  3434. // fairly simple.
  3435. while (Tok->isNot(tok::eof)) {
  3436. if (Tok->is(tok::r_square))
  3437. break;
  3438. Tok = Tokens->getNextToken();
  3439. }
  3440. if (Tok->is(tok::eof))
  3441. return false;
  3442. Tok = Tokens->getNextToken();
  3443. if (!Tok->is(tok::r_square))
  3444. return false;
  3445. Tok = Tokens->getNextToken();
  3446. if (Tok->is(tok::semi))
  3447. return false;
  3448. return true;
  3449. }
  3450. void UnwrappedLineParser::parseJavaEnumBody() {
  3451. assert(FormatTok->is(tok::l_brace));
  3452. const FormatToken *OpeningBrace = FormatTok;
  3453. // Determine whether the enum is simple, i.e. does not have a semicolon or
  3454. // constants with class bodies. Simple enums can be formatted like braced
  3455. // lists, contracted to a single line, etc.
  3456. unsigned StoredPosition = Tokens->getPosition();
  3457. bool IsSimple = true;
  3458. FormatToken *Tok = Tokens->getNextToken();
  3459. while (!Tok->is(tok::eof)) {
  3460. if (Tok->is(tok::r_brace))
  3461. break;
  3462. if (Tok->isOneOf(tok::l_brace, tok::semi)) {
  3463. IsSimple = false;
  3464. break;
  3465. }
  3466. // FIXME: This will also mark enums with braces in the arguments to enum
  3467. // constants as "not simple". This is probably fine in practice, though.
  3468. Tok = Tokens->getNextToken();
  3469. }
  3470. FormatTok = Tokens->setPosition(StoredPosition);
  3471. if (IsSimple) {
  3472. nextToken();
  3473. parseBracedList();
  3474. addUnwrappedLine();
  3475. return;
  3476. }
  3477. // Parse the body of a more complex enum.
  3478. // First add a line for everything up to the "{".
  3479. nextToken();
  3480. addUnwrappedLine();
  3481. ++Line->Level;
  3482. // Parse the enum constants.
  3483. while (!eof()) {
  3484. if (FormatTok->is(tok::l_brace)) {
  3485. // Parse the constant's class body.
  3486. parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
  3487. /*MunchSemi=*/false);
  3488. } else if (FormatTok->is(tok::l_paren)) {
  3489. parseParens();
  3490. } else if (FormatTok->is(tok::comma)) {
  3491. nextToken();
  3492. addUnwrappedLine();
  3493. } else if (FormatTok->is(tok::semi)) {
  3494. nextToken();
  3495. addUnwrappedLine();
  3496. break;
  3497. } else if (FormatTok->is(tok::r_brace)) {
  3498. addUnwrappedLine();
  3499. break;
  3500. } else {
  3501. nextToken();
  3502. }
  3503. }
  3504. // Parse the class body after the enum's ";" if any.
  3505. parseLevel(OpeningBrace);
  3506. nextToken();
  3507. --Line->Level;
  3508. addUnwrappedLine();
  3509. }
  3510. void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
  3511. const FormatToken &InitialToken = *FormatTok;
  3512. nextToken();
  3513. handleAttributes();
  3514. // The actual identifier can be a nested name specifier, and in macros
  3515. // it is often token-pasted.
  3516. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
  3517. tok::kw___attribute, tok::kw___declspec,
  3518. tok::kw_alignas) ||
  3519. ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
  3520. FormatTok->isOneOf(tok::period, tok::comma))) {
  3521. if (Style.isJavaScript() &&
  3522. FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
  3523. // JavaScript/TypeScript supports inline object types in
  3524. // extends/implements positions:
  3525. // class Foo implements {bar: number} { }
  3526. nextToken();
  3527. if (FormatTok->is(tok::l_brace)) {
  3528. tryToParseBracedList();
  3529. continue;
  3530. }
  3531. }
  3532. bool IsNonMacroIdentifier =
  3533. FormatTok->is(tok::identifier) &&
  3534. FormatTok->TokenText != FormatTok->TokenText.upper();
  3535. nextToken();
  3536. // We can have macros in between 'class' and the class name.
  3537. if (!IsNonMacroIdentifier) {
  3538. if (FormatTok->is(tok::l_paren)) {
  3539. parseParens();
  3540. }
  3541. }
  3542. }
  3543. // Note that parsing away template declarations here leads to incorrectly
  3544. // accepting function declarations as record declarations.
  3545. // In general, we cannot solve this problem. Consider:
  3546. // class A<int> B() {}
  3547. // which can be a function definition or a class definition when B() is a
  3548. // macro. If we find enough real-world cases where this is a problem, we
  3549. // can parse for the 'template' keyword in the beginning of the statement,
  3550. // and thus rule out the record production in case there is no template
  3551. // (this would still leave us with an ambiguity between template function
  3552. // and class declarations).
  3553. if (FormatTok->isOneOf(tok::colon, tok::less)) {
  3554. do {
  3555. if (FormatTok->is(tok::l_brace)) {
  3556. calculateBraceTypes(/*ExpectClassBody=*/true);
  3557. if (!tryToParseBracedList())
  3558. break;
  3559. }
  3560. if (FormatTok->is(tok::l_square)) {
  3561. FormatToken *Previous = FormatTok->Previous;
  3562. if (!Previous ||
  3563. !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
  3564. // Don't try parsing a lambda if we had a closing parenthesis before,
  3565. // it was probably a pointer to an array: int (*)[].
  3566. if (!tryToParseLambda())
  3567. break;
  3568. } else {
  3569. parseSquare();
  3570. continue;
  3571. }
  3572. }
  3573. if (FormatTok->is(tok::semi))
  3574. return;
  3575. if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
  3576. addUnwrappedLine();
  3577. nextToken();
  3578. parseCSharpGenericTypeConstraint();
  3579. break;
  3580. }
  3581. nextToken();
  3582. } while (!eof());
  3583. }
  3584. auto GetBraceType = [](const FormatToken &RecordTok) {
  3585. switch (RecordTok.Tok.getKind()) {
  3586. case tok::kw_class:
  3587. return TT_ClassLBrace;
  3588. case tok::kw_struct:
  3589. return TT_StructLBrace;
  3590. case tok::kw_union:
  3591. return TT_UnionLBrace;
  3592. default:
  3593. // Useful for e.g. interface.
  3594. return TT_RecordLBrace;
  3595. }
  3596. };
  3597. if (FormatTok->is(tok::l_brace)) {
  3598. FormatTok->setFinalizedType(GetBraceType(InitialToken));
  3599. if (ParseAsExpr) {
  3600. parseChildBlock();
  3601. } else {
  3602. if (ShouldBreakBeforeBrace(Style, InitialToken))
  3603. addUnwrappedLine();
  3604. unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
  3605. parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
  3606. }
  3607. }
  3608. // There is no addUnwrappedLine() here so that we fall through to parsing a
  3609. // structural element afterwards. Thus, in "class A {} n, m;",
  3610. // "} n, m;" will end up in one unwrapped line.
  3611. }
  3612. void UnwrappedLineParser::parseObjCMethod() {
  3613. assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
  3614. "'(' or identifier expected.");
  3615. do {
  3616. if (FormatTok->is(tok::semi)) {
  3617. nextToken();
  3618. addUnwrappedLine();
  3619. return;
  3620. } else if (FormatTok->is(tok::l_brace)) {
  3621. if (Style.BraceWrapping.AfterFunction)
  3622. addUnwrappedLine();
  3623. parseBlock();
  3624. addUnwrappedLine();
  3625. return;
  3626. } else {
  3627. nextToken();
  3628. }
  3629. } while (!eof());
  3630. }
  3631. void UnwrappedLineParser::parseObjCProtocolList() {
  3632. assert(FormatTok->is(tok::less) && "'<' expected.");
  3633. do {
  3634. nextToken();
  3635. // Early exit in case someone forgot a close angle.
  3636. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  3637. FormatTok->isObjCAtKeyword(tok::objc_end)) {
  3638. return;
  3639. }
  3640. } while (!eof() && FormatTok->isNot(tok::greater));
  3641. nextToken(); // Skip '>'.
  3642. }
  3643. void UnwrappedLineParser::parseObjCUntilAtEnd() {
  3644. do {
  3645. if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
  3646. nextToken();
  3647. addUnwrappedLine();
  3648. break;
  3649. }
  3650. if (FormatTok->is(tok::l_brace)) {
  3651. parseBlock();
  3652. // In ObjC interfaces, nothing should be following the "}".
  3653. addUnwrappedLine();
  3654. } else if (FormatTok->is(tok::r_brace)) {
  3655. // Ignore stray "}". parseStructuralElement doesn't consume them.
  3656. nextToken();
  3657. addUnwrappedLine();
  3658. } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
  3659. nextToken();
  3660. parseObjCMethod();
  3661. } else {
  3662. parseStructuralElement();
  3663. }
  3664. } while (!eof());
  3665. }
  3666. void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
  3667. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
  3668. FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
  3669. nextToken();
  3670. nextToken(); // interface name
  3671. // @interface can be followed by a lightweight generic
  3672. // specialization list, then either a base class or a category.
  3673. if (FormatTok->is(tok::less))
  3674. parseObjCLightweightGenerics();
  3675. if (FormatTok->is(tok::colon)) {
  3676. nextToken();
  3677. nextToken(); // base class name
  3678. // The base class can also have lightweight generics applied to it.
  3679. if (FormatTok->is(tok::less))
  3680. parseObjCLightweightGenerics();
  3681. } else if (FormatTok->is(tok::l_paren)) {
  3682. // Skip category, if present.
  3683. parseParens();
  3684. }
  3685. if (FormatTok->is(tok::less))
  3686. parseObjCProtocolList();
  3687. if (FormatTok->is(tok::l_brace)) {
  3688. if (Style.BraceWrapping.AfterObjCDeclaration)
  3689. addUnwrappedLine();
  3690. parseBlock(/*MustBeDeclaration=*/true);
  3691. }
  3692. // With instance variables, this puts '}' on its own line. Without instance
  3693. // variables, this ends the @interface line.
  3694. addUnwrappedLine();
  3695. parseObjCUntilAtEnd();
  3696. }
  3697. void UnwrappedLineParser::parseObjCLightweightGenerics() {
  3698. assert(FormatTok->is(tok::less));
  3699. // Unlike protocol lists, generic parameterizations support
  3700. // nested angles:
  3701. //
  3702. // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
  3703. // NSObject <NSCopying, NSSecureCoding>
  3704. //
  3705. // so we need to count how many open angles we have left.
  3706. unsigned NumOpenAngles = 1;
  3707. do {
  3708. nextToken();
  3709. // Early exit in case someone forgot a close angle.
  3710. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  3711. FormatTok->isObjCAtKeyword(tok::objc_end)) {
  3712. break;
  3713. }
  3714. if (FormatTok->is(tok::less)) {
  3715. ++NumOpenAngles;
  3716. } else if (FormatTok->is(tok::greater)) {
  3717. assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
  3718. --NumOpenAngles;
  3719. }
  3720. } while (!eof() && NumOpenAngles != 0);
  3721. nextToken(); // Skip '>'.
  3722. }
  3723. // Returns true for the declaration/definition form of @protocol,
  3724. // false for the expression form.
  3725. bool UnwrappedLineParser::parseObjCProtocol() {
  3726. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
  3727. nextToken();
  3728. if (FormatTok->is(tok::l_paren)) {
  3729. // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
  3730. return false;
  3731. }
  3732. // The definition/declaration form,
  3733. // @protocol Foo
  3734. // - (int)someMethod;
  3735. // @end
  3736. nextToken(); // protocol name
  3737. if (FormatTok->is(tok::less))
  3738. parseObjCProtocolList();
  3739. // Check for protocol declaration.
  3740. if (FormatTok->is(tok::semi)) {
  3741. nextToken();
  3742. addUnwrappedLine();
  3743. return true;
  3744. }
  3745. addUnwrappedLine();
  3746. parseObjCUntilAtEnd();
  3747. return true;
  3748. }
  3749. void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
  3750. bool IsImport = FormatTok->is(Keywords.kw_import);
  3751. assert(IsImport || FormatTok->is(tok::kw_export));
  3752. nextToken();
  3753. // Consume the "default" in "export default class/function".
  3754. if (FormatTok->is(tok::kw_default))
  3755. nextToken();
  3756. // Consume "async function", "function" and "default function", so that these
  3757. // get parsed as free-standing JS functions, i.e. do not require a trailing
  3758. // semicolon.
  3759. if (FormatTok->is(Keywords.kw_async))
  3760. nextToken();
  3761. if (FormatTok->is(Keywords.kw_function)) {
  3762. nextToken();
  3763. return;
  3764. }
  3765. // For imports, `export *`, `export {...}`, consume the rest of the line up
  3766. // to the terminating `;`. For everything else, just return and continue
  3767. // parsing the structural element, i.e. the declaration or expression for
  3768. // `export default`.
  3769. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
  3770. !FormatTok->isStringLiteral()) {
  3771. return;
  3772. }
  3773. while (!eof()) {
  3774. if (FormatTok->is(tok::semi))
  3775. return;
  3776. if (Line->Tokens.empty()) {
  3777. // Common issue: Automatic Semicolon Insertion wrapped the line, so the
  3778. // import statement should terminate.
  3779. return;
  3780. }
  3781. if (FormatTok->is(tok::l_brace)) {
  3782. FormatTok->setBlockKind(BK_Block);
  3783. nextToken();
  3784. parseBracedList();
  3785. } else {
  3786. nextToken();
  3787. }
  3788. }
  3789. }
  3790. void UnwrappedLineParser::parseStatementMacro() {
  3791. nextToken();
  3792. if (FormatTok->is(tok::l_paren))
  3793. parseParens();
  3794. if (FormatTok->is(tok::semi))
  3795. nextToken();
  3796. addUnwrappedLine();
  3797. }
  3798. void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
  3799. // consume things like a::`b.c[d:e] or a::*
  3800. while (true) {
  3801. if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
  3802. tok::coloncolon, tok::hash) ||
  3803. Keywords.isVerilogIdentifier(*FormatTok)) {
  3804. nextToken();
  3805. } else if (FormatTok->is(tok::l_square)) {
  3806. parseSquare();
  3807. } else {
  3808. break;
  3809. }
  3810. }
  3811. }
  3812. void UnwrappedLineParser::parseVerilogSensitivityList() {
  3813. if (!FormatTok->is(tok::at))
  3814. return;
  3815. nextToken();
  3816. // A block event expression has 2 at signs.
  3817. if (FormatTok->is(tok::at))
  3818. nextToken();
  3819. switch (FormatTok->Tok.getKind()) {
  3820. case tok::star:
  3821. nextToken();
  3822. break;
  3823. case tok::l_paren:
  3824. parseParens();
  3825. break;
  3826. default:
  3827. parseVerilogHierarchyIdentifier();
  3828. break;
  3829. }
  3830. }
  3831. unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
  3832. unsigned AddLevels = 0;
  3833. if (FormatTok->is(Keywords.kw_clocking)) {
  3834. nextToken();
  3835. if (Keywords.isVerilogIdentifier(*FormatTok))
  3836. nextToken();
  3837. parseVerilogSensitivityList();
  3838. if (FormatTok->is(tok::semi))
  3839. nextToken();
  3840. } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
  3841. Keywords.kw_casez, Keywords.kw_randcase,
  3842. Keywords.kw_randsequence)) {
  3843. if (Style.IndentCaseLabels)
  3844. AddLevels++;
  3845. nextToken();
  3846. if (FormatTok->is(tok::l_paren)) {
  3847. FormatTok->setFinalizedType(TT_ConditionLParen);
  3848. parseParens();
  3849. }
  3850. if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
  3851. nextToken();
  3852. // The case header has no semicolon.
  3853. } else {
  3854. // "module" etc.
  3855. nextToken();
  3856. // all the words like the name of the module and specifiers like
  3857. // "automatic" and the width of function return type
  3858. while (true) {
  3859. if (FormatTok->is(tok::l_square)) {
  3860. auto Prev = FormatTok->getPreviousNonComment();
  3861. if (Prev && Keywords.isVerilogIdentifier(*Prev))
  3862. Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
  3863. parseSquare();
  3864. } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
  3865. FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
  3866. nextToken();
  3867. } else {
  3868. break;
  3869. }
  3870. }
  3871. auto NewLine = [this]() {
  3872. addUnwrappedLine();
  3873. Line->IsContinuation = true;
  3874. };
  3875. // package imports
  3876. while (FormatTok->is(Keywords.kw_import)) {
  3877. NewLine();
  3878. nextToken();
  3879. parseVerilogHierarchyIdentifier();
  3880. if (FormatTok->is(tok::semi))
  3881. nextToken();
  3882. }
  3883. // parameters and ports
  3884. if (FormatTok->is(Keywords.kw_verilogHash)) {
  3885. NewLine();
  3886. nextToken();
  3887. if (FormatTok->is(tok::l_paren))
  3888. parseParens();
  3889. }
  3890. if (FormatTok->is(tok::l_paren)) {
  3891. NewLine();
  3892. parseParens();
  3893. }
  3894. // extends and implements
  3895. if (FormatTok->is(Keywords.kw_extends)) {
  3896. NewLine();
  3897. nextToken();
  3898. parseVerilogHierarchyIdentifier();
  3899. if (FormatTok->is(tok::l_paren))
  3900. parseParens();
  3901. }
  3902. if (FormatTok->is(Keywords.kw_implements)) {
  3903. NewLine();
  3904. do {
  3905. nextToken();
  3906. parseVerilogHierarchyIdentifier();
  3907. } while (FormatTok->is(tok::comma));
  3908. }
  3909. // Coverage event for cover groups.
  3910. if (FormatTok->is(tok::at)) {
  3911. NewLine();
  3912. parseVerilogSensitivityList();
  3913. }
  3914. if (FormatTok->is(tok::semi))
  3915. nextToken(/*LevelDifference=*/1);
  3916. addUnwrappedLine();
  3917. }
  3918. return AddLevels;
  3919. }
  3920. void UnwrappedLineParser::parseVerilogTable() {
  3921. assert(FormatTok->is(Keywords.kw_table));
  3922. nextToken(/*LevelDifference=*/1);
  3923. addUnwrappedLine();
  3924. auto InitialLevel = Line->Level++;
  3925. while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
  3926. FormatToken *Tok = FormatTok;
  3927. nextToken();
  3928. if (Tok->is(tok::semi))
  3929. addUnwrappedLine();
  3930. else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
  3931. Tok->setFinalizedType(TT_VerilogTableItem);
  3932. }
  3933. Line->Level = InitialLevel;
  3934. nextToken(/*LevelDifference=*/-1);
  3935. addUnwrappedLine();
  3936. }
  3937. void UnwrappedLineParser::parseVerilogCaseLabel() {
  3938. // The label will get unindented in AnnotatingParser. If there are no leading
  3939. // spaces, indent the rest here so that things inside the block will be
  3940. // indented relative to things outside. We don't use parseLabel because we
  3941. // don't know whether this colon is a label or a ternary expression at this
  3942. // point.
  3943. auto OrigLevel = Line->Level;
  3944. auto FirstLine = CurrentLines->size();
  3945. if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
  3946. ++Line->Level;
  3947. else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
  3948. --Line->Level;
  3949. parseStructuralElement();
  3950. // Restore the indentation in both the new line and the line that has the
  3951. // label.
  3952. if (CurrentLines->size() > FirstLine)
  3953. (*CurrentLines)[FirstLine].Level = OrigLevel;
  3954. Line->Level = OrigLevel;
  3955. }
  3956. void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
  3957. if (Line->Tokens.empty())
  3958. return;
  3959. LLVM_DEBUG({
  3960. if (CurrentLines == &Lines)
  3961. printDebugInfo(*Line);
  3962. });
  3963. // If this line closes a block when in Whitesmiths mode, remember that
  3964. // information so that the level can be decreased after the line is added.
  3965. // This has to happen after the addition of the line since the line itself
  3966. // needs to be indented.
  3967. bool ClosesWhitesmithsBlock =
  3968. Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
  3969. Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
  3970. CurrentLines->push_back(std::move(*Line));
  3971. Line->Tokens.clear();
  3972. Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
  3973. Line->FirstStartColumn = 0;
  3974. Line->IsContinuation = false;
  3975. if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
  3976. --Line->Level;
  3977. if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
  3978. CurrentLines->append(
  3979. std::make_move_iterator(PreprocessorDirectives.begin()),
  3980. std::make_move_iterator(PreprocessorDirectives.end()));
  3981. PreprocessorDirectives.clear();
  3982. }
  3983. // Disconnect the current token from the last token on the previous line.
  3984. FormatTok->Previous = nullptr;
  3985. }
  3986. bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
  3987. bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
  3988. return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
  3989. FormatTok.NewlinesBefore > 0;
  3990. }
  3991. // Checks if \p FormatTok is a line comment that continues the line comment
  3992. // section on \p Line.
  3993. static bool
  3994. continuesLineCommentSection(const FormatToken &FormatTok,
  3995. const UnwrappedLine &Line,
  3996. const llvm::Regex &CommentPragmasRegex) {
  3997. if (Line.Tokens.empty())
  3998. return false;
  3999. StringRef IndentContent = FormatTok.TokenText;
  4000. if (FormatTok.TokenText.startswith("//") ||
  4001. FormatTok.TokenText.startswith("/*")) {
  4002. IndentContent = FormatTok.TokenText.substr(2);
  4003. }
  4004. if (CommentPragmasRegex.match(IndentContent))
  4005. return false;
  4006. // If Line starts with a line comment, then FormatTok continues the comment
  4007. // section if its original column is greater or equal to the original start
  4008. // column of the line.
  4009. //
  4010. // Define the min column token of a line as follows: if a line ends in '{' or
  4011. // contains a '{' followed by a line comment, then the min column token is
  4012. // that '{'. Otherwise, the min column token of the line is the first token of
  4013. // the line.
  4014. //
  4015. // If Line starts with a token other than a line comment, then FormatTok
  4016. // continues the comment section if its original column is greater than the
  4017. // original start column of the min column token of the line.
  4018. //
  4019. // For example, the second line comment continues the first in these cases:
  4020. //
  4021. // // first line
  4022. // // second line
  4023. //
  4024. // and:
  4025. //
  4026. // // first line
  4027. // // second line
  4028. //
  4029. // and:
  4030. //
  4031. // int i; // first line
  4032. // // second line
  4033. //
  4034. // and:
  4035. //
  4036. // do { // first line
  4037. // // second line
  4038. // int i;
  4039. // } while (true);
  4040. //
  4041. // and:
  4042. //
  4043. // enum {
  4044. // a, // first line
  4045. // // second line
  4046. // b
  4047. // };
  4048. //
  4049. // The second line comment doesn't continue the first in these cases:
  4050. //
  4051. // // first line
  4052. // // second line
  4053. //
  4054. // and:
  4055. //
  4056. // int i; // first line
  4057. // // second line
  4058. //
  4059. // and:
  4060. //
  4061. // do { // first line
  4062. // // second line
  4063. // int i;
  4064. // } while (true);
  4065. //
  4066. // and:
  4067. //
  4068. // enum {
  4069. // a, // first line
  4070. // // second line
  4071. // };
  4072. const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
  4073. // Scan for '{//'. If found, use the column of '{' as a min column for line
  4074. // comment section continuation.
  4075. const FormatToken *PreviousToken = nullptr;
  4076. for (const UnwrappedLineNode &Node : Line.Tokens) {
  4077. if (PreviousToken && PreviousToken->is(tok::l_brace) &&
  4078. isLineComment(*Node.Tok)) {
  4079. MinColumnToken = PreviousToken;
  4080. break;
  4081. }
  4082. PreviousToken = Node.Tok;
  4083. // Grab the last newline preceding a token in this unwrapped line.
  4084. if (Node.Tok->NewlinesBefore > 0)
  4085. MinColumnToken = Node.Tok;
  4086. }
  4087. if (PreviousToken && PreviousToken->is(tok::l_brace))
  4088. MinColumnToken = PreviousToken;
  4089. return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
  4090. MinColumnToken);
  4091. }
  4092. void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
  4093. bool JustComments = Line->Tokens.empty();
  4094. for (FormatToken *Tok : CommentsBeforeNextToken) {
  4095. // Line comments that belong to the same line comment section are put on the
  4096. // same line since later we might want to reflow content between them.
  4097. // Additional fine-grained breaking of line comment sections is controlled
  4098. // by the class BreakableLineCommentSection in case it is desirable to keep
  4099. // several line comment sections in the same unwrapped line.
  4100. //
  4101. // FIXME: Consider putting separate line comment sections as children to the
  4102. // unwrapped line instead.
  4103. Tok->ContinuesLineCommentSection =
  4104. continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
  4105. if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
  4106. addUnwrappedLine();
  4107. pushToken(Tok);
  4108. }
  4109. if (NewlineBeforeNext && JustComments)
  4110. addUnwrappedLine();
  4111. CommentsBeforeNextToken.clear();
  4112. }
  4113. void UnwrappedLineParser::nextToken(int LevelDifference) {
  4114. if (eof())
  4115. return;
  4116. flushComments(isOnNewLine(*FormatTok));
  4117. pushToken(FormatTok);
  4118. FormatToken *Previous = FormatTok;
  4119. if (!Style.isJavaScript())
  4120. readToken(LevelDifference);
  4121. else
  4122. readTokenWithJavaScriptASI();
  4123. FormatTok->Previous = Previous;
  4124. if (Style.isVerilog()) {
  4125. // Blocks in Verilog can have `begin` and `end` instead of braces. For
  4126. // keywords like `begin`, we can't treat them the same as left braces
  4127. // because some contexts require one of them. For example structs use
  4128. // braces and if blocks use keywords, and a left brace can occur in an if
  4129. // statement, but it is not a block. For keywords like `end`, we simply
  4130. // treat them the same as right braces.
  4131. if (Keywords.isVerilogEnd(*FormatTok))
  4132. FormatTok->Tok.setKind(tok::r_brace);
  4133. }
  4134. }
  4135. void UnwrappedLineParser::distributeComments(
  4136. const SmallVectorImpl<FormatToken *> &Comments,
  4137. const FormatToken *NextTok) {
  4138. // Whether or not a line comment token continues a line is controlled by
  4139. // the method continuesLineCommentSection, with the following caveat:
  4140. //
  4141. // Define a trail of Comments to be a nonempty proper postfix of Comments such
  4142. // that each comment line from the trail is aligned with the next token, if
  4143. // the next token exists. If a trail exists, the beginning of the maximal
  4144. // trail is marked as a start of a new comment section.
  4145. //
  4146. // For example in this code:
  4147. //
  4148. // int a; // line about a
  4149. // // line 1 about b
  4150. // // line 2 about b
  4151. // int b;
  4152. //
  4153. // the two lines about b form a maximal trail, so there are two sections, the
  4154. // first one consisting of the single comment "// line about a" and the
  4155. // second one consisting of the next two comments.
  4156. if (Comments.empty())
  4157. return;
  4158. bool ShouldPushCommentsInCurrentLine = true;
  4159. bool HasTrailAlignedWithNextToken = false;
  4160. unsigned StartOfTrailAlignedWithNextToken = 0;
  4161. if (NextTok) {
  4162. // We are skipping the first element intentionally.
  4163. for (unsigned i = Comments.size() - 1; i > 0; --i) {
  4164. if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
  4165. HasTrailAlignedWithNextToken = true;
  4166. StartOfTrailAlignedWithNextToken = i;
  4167. }
  4168. }
  4169. }
  4170. for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
  4171. FormatToken *FormatTok = Comments[i];
  4172. if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
  4173. FormatTok->ContinuesLineCommentSection = false;
  4174. } else {
  4175. FormatTok->ContinuesLineCommentSection =
  4176. continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
  4177. }
  4178. if (!FormatTok->ContinuesLineCommentSection &&
  4179. (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
  4180. ShouldPushCommentsInCurrentLine = false;
  4181. }
  4182. if (ShouldPushCommentsInCurrentLine)
  4183. pushToken(FormatTok);
  4184. else
  4185. CommentsBeforeNextToken.push_back(FormatTok);
  4186. }
  4187. }
  4188. void UnwrappedLineParser::readToken(int LevelDifference) {
  4189. SmallVector<FormatToken *, 1> Comments;
  4190. bool PreviousWasComment = false;
  4191. bool FirstNonCommentOnLine = false;
  4192. do {
  4193. FormatTok = Tokens->getNextToken();
  4194. assert(FormatTok);
  4195. while (FormatTok->getType() == TT_ConflictStart ||
  4196. FormatTok->getType() == TT_ConflictEnd ||
  4197. FormatTok->getType() == TT_ConflictAlternative) {
  4198. if (FormatTok->getType() == TT_ConflictStart)
  4199. conditionalCompilationStart(/*Unreachable=*/false);
  4200. else if (FormatTok->getType() == TT_ConflictAlternative)
  4201. conditionalCompilationAlternative();
  4202. else if (FormatTok->getType() == TT_ConflictEnd)
  4203. conditionalCompilationEnd();
  4204. FormatTok = Tokens->getNextToken();
  4205. FormatTok->MustBreakBefore = true;
  4206. }
  4207. auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
  4208. const FormatToken &Tok,
  4209. bool PreviousWasComment) {
  4210. auto IsFirstOnLine = [](const FormatToken &Tok) {
  4211. return Tok.HasUnescapedNewline || Tok.IsFirst;
  4212. };
  4213. // Consider preprocessor directives preceded by block comments as first
  4214. // on line.
  4215. if (PreviousWasComment)
  4216. return FirstNonCommentOnLine || IsFirstOnLine(Tok);
  4217. return IsFirstOnLine(Tok);
  4218. };
  4219. FirstNonCommentOnLine = IsFirstNonCommentOnLine(
  4220. FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
  4221. PreviousWasComment = FormatTok->is(tok::comment);
  4222. while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
  4223. (!Style.isVerilog() ||
  4224. Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
  4225. FirstNonCommentOnLine) {
  4226. distributeComments(Comments, FormatTok);
  4227. Comments.clear();
  4228. // If there is an unfinished unwrapped line, we flush the preprocessor
  4229. // directives only after that unwrapped line was finished later.
  4230. bool SwitchToPreprocessorLines = !Line->Tokens.empty();
  4231. ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
  4232. assert((LevelDifference >= 0 ||
  4233. static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
  4234. "LevelDifference makes Line->Level negative");
  4235. Line->Level += LevelDifference;
  4236. // Comments stored before the preprocessor directive need to be output
  4237. // before the preprocessor directive, at the same level as the
  4238. // preprocessor directive, as we consider them to apply to the directive.
  4239. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
  4240. PPBranchLevel > 0) {
  4241. Line->Level += PPBranchLevel;
  4242. }
  4243. flushComments(isOnNewLine(*FormatTok));
  4244. parsePPDirective();
  4245. PreviousWasComment = FormatTok->is(tok::comment);
  4246. FirstNonCommentOnLine = IsFirstNonCommentOnLine(
  4247. FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
  4248. }
  4249. if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
  4250. !Line->InPPDirective) {
  4251. continue;
  4252. }
  4253. if (!FormatTok->is(tok::comment)) {
  4254. distributeComments(Comments, FormatTok);
  4255. Comments.clear();
  4256. return;
  4257. }
  4258. Comments.push_back(FormatTok);
  4259. } while (!eof());
  4260. distributeComments(Comments, nullptr);
  4261. Comments.clear();
  4262. }
  4263. void UnwrappedLineParser::pushToken(FormatToken *Tok) {
  4264. Line->Tokens.push_back(UnwrappedLineNode(Tok));
  4265. if (MustBreakBeforeNextToken) {
  4266. Line->Tokens.back().Tok->MustBreakBefore = true;
  4267. MustBreakBeforeNextToken = false;
  4268. }
  4269. }
  4270. } // end namespace format
  4271. } // end namespace clang