ContinuationIndenter.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file implements an indenter that manages the indentation of
  11. /// continuations.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
  15. #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
  16. #include "Encoding.h"
  17. #include "FormatToken.h"
  18. #include "clang/Format/Format.h"
  19. #include "llvm/Support/Regex.h"
  20. #include <map>
  21. #include <tuple>
  22. namespace clang {
  23. class SourceManager;
  24. namespace format {
  25. class AnnotatedLine;
  26. class BreakableToken;
  27. struct FormatToken;
  28. struct LineState;
  29. struct ParenState;
  30. struct RawStringFormatStyleManager;
  31. class WhitespaceManager;
  32. struct RawStringFormatStyleManager {
  33. llvm::StringMap<FormatStyle> DelimiterStyle;
  34. llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
  35. RawStringFormatStyleManager(const FormatStyle &CodeStyle);
  36. llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
  37. llvm::Optional<FormatStyle>
  38. getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
  39. };
  40. class ContinuationIndenter {
  41. public:
  42. /// Constructs a \c ContinuationIndenter to format \p Line starting in
  43. /// column \p FirstIndent.
  44. ContinuationIndenter(const FormatStyle &Style,
  45. const AdditionalKeywords &Keywords,
  46. const SourceManager &SourceMgr,
  47. WhitespaceManager &Whitespaces,
  48. encoding::Encoding Encoding,
  49. bool BinPackInconclusiveFunctions);
  50. /// Get the initial state, i.e. the state after placing \p Line's
  51. /// first token at \p FirstIndent. When reformatting a fragment of code, as in
  52. /// the case of formatting inside raw string literals, \p FirstStartColumn is
  53. /// the column at which the state of the parent formatter is.
  54. LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
  55. const AnnotatedLine *Line, bool DryRun);
  56. // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
  57. // better home.
  58. /// Returns \c true, if a line break after \p State is allowed.
  59. bool canBreak(const LineState &State);
  60. /// Returns \c true, if a line break after \p State is mandatory.
  61. bool mustBreak(const LineState &State);
  62. /// Appends the next token to \p State and updates information
  63. /// necessary for indentation.
  64. ///
  65. /// Puts the token on the current line if \p Newline is \c false and adds a
  66. /// line break and necessary indentation otherwise.
  67. ///
  68. /// If \p DryRun is \c false, also creates and stores the required
  69. /// \c Replacement.
  70. unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
  71. unsigned ExtraSpaces = 0);
  72. /// Get the column limit for this line. This is the style's column
  73. /// limit, potentially reduced for preprocessor definitions.
  74. unsigned getColumnLimit(const LineState &State) const;
  75. private:
  76. /// Mark the next token as consumed in \p State and modify its stacks
  77. /// accordingly.
  78. unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
  79. /// Update 'State' according to the next token's fake left parentheses.
  80. void moveStatePastFakeLParens(LineState &State, bool Newline);
  81. /// Update 'State' according to the next token's fake r_parens.
  82. void moveStatePastFakeRParens(LineState &State);
  83. /// Update 'State' according to the next token being one of "(<{[".
  84. void moveStatePastScopeOpener(LineState &State, bool Newline);
  85. /// Update 'State' according to the next token being one of ")>}]".
  86. void moveStatePastScopeCloser(LineState &State);
  87. /// Update 'State' with the next token opening a nested block.
  88. void moveStateToNewBlock(LineState &State);
  89. /// Reformats a raw string literal.
  90. ///
  91. /// \returns An extra penalty induced by reformatting the token.
  92. unsigned reformatRawStringLiteral(const FormatToken &Current,
  93. LineState &State,
  94. const FormatStyle &RawStringStyle,
  95. bool DryRun, bool Newline);
  96. /// If the current token is at the end of the current line, handle
  97. /// the transition to the next line.
  98. unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
  99. bool DryRun, bool AllowBreak, bool Newline);
  100. /// If \p Current is a raw string that is configured to be reformatted,
  101. /// return the style to be used.
  102. llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
  103. const LineState &State);
  104. /// If the current token sticks out over the end of the line, break
  105. /// it if possible.
  106. ///
  107. /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
  108. /// when tokens are broken or lines exceed the column limit, and exceeded
  109. /// indicates whether the algorithm purposefully left lines exceeding the
  110. /// column limit.
  111. ///
  112. /// The returned penalty will cover the cost of the additional line breaks
  113. /// and column limit violation in all lines except for the last one. The
  114. /// penalty for the column limit violation in the last line (and in single
  115. /// line tokens) is handled in \c addNextStateToQueue.
  116. ///
  117. /// \p Strict indicates whether reflowing is allowed to leave characters
  118. /// protruding the column limit; if true, lines will be split strictly within
  119. /// the column limit where possible; if false, words are allowed to protrude
  120. /// over the column limit as long as the penalty is less than the penalty
  121. /// of a break.
  122. std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
  123. LineState &State,
  124. bool AllowBreak, bool DryRun,
  125. bool Strict);
  126. /// Returns the \c BreakableToken starting at \p Current, or nullptr
  127. /// if the current token cannot be broken.
  128. std::unique_ptr<BreakableToken>
  129. createBreakableToken(const FormatToken &Current, LineState &State,
  130. bool AllowBreak);
  131. /// Appends the next token to \p State and updates information
  132. /// necessary for indentation.
  133. ///
  134. /// Puts the token on the current line.
  135. ///
  136. /// If \p DryRun is \c false, also creates and stores the required
  137. /// \c Replacement.
  138. void addTokenOnCurrentLine(LineState &State, bool DryRun,
  139. unsigned ExtraSpaces);
  140. /// Appends the next token to \p State and updates information
  141. /// necessary for indentation.
  142. ///
  143. /// Adds a line break and necessary indentation.
  144. ///
  145. /// If \p DryRun is \c false, also creates and stores the required
  146. /// \c Replacement.
  147. unsigned addTokenOnNewLine(LineState &State, bool DryRun);
  148. /// Calculate the new column for a line wrap before the next token.
  149. unsigned getNewLineColumn(const LineState &State);
  150. /// Adds a multiline token to the \p State.
  151. ///
  152. /// \returns Extra penalty for the first line of the literal: last line is
  153. /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
  154. /// matter, as we don't change them.
  155. unsigned addMultilineToken(const FormatToken &Current, LineState &State);
  156. /// Returns \c true if the next token starts a multiline string
  157. /// literal.
  158. ///
  159. /// This includes implicitly concatenated strings, strings that will be broken
  160. /// by clang-format and string literals with escaped newlines.
  161. bool nextIsMultilineString(const LineState &State);
  162. FormatStyle Style;
  163. const AdditionalKeywords &Keywords;
  164. const SourceManager &SourceMgr;
  165. WhitespaceManager &Whitespaces;
  166. encoding::Encoding Encoding;
  167. bool BinPackInconclusiveFunctions;
  168. llvm::Regex CommentPragmasRegex;
  169. const RawStringFormatStyleManager RawStringFormats;
  170. };
  171. struct ParenState {
  172. ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
  173. bool AvoidBinPacking, bool NoLineBreak)
  174. : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
  175. NestedBlockIndent(Indent), IsAligned(false),
  176. BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
  177. AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
  178. NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
  179. LastOperatorWrapped(true), ContainsLineBreak(false),
  180. ContainsUnwrappedBuilder(false), AlignColons(true),
  181. ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
  182. NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
  183. IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
  184. IsWrappedConditional(false), UnindentOperator(false) {}
  185. /// \brief The token opening this parenthesis level, or nullptr if this level
  186. /// is opened by fake parenthesis.
  187. ///
  188. /// Not considered for memoization as it will always have the same value at
  189. /// the same token.
  190. const FormatToken *Tok;
  191. /// The position to which a specific parenthesis level needs to be
  192. /// indented.
  193. unsigned Indent;
  194. /// The position of the last space on each level.
  195. ///
  196. /// Used e.g. to break like:
  197. /// functionCall(Parameter, otherCall(
  198. /// OtherParameter));
  199. unsigned LastSpace;
  200. /// If a block relative to this parenthesis level gets wrapped, indent
  201. /// it this much.
  202. unsigned NestedBlockIndent;
  203. /// The position the first "<<" operator encountered on each level.
  204. ///
  205. /// Used to align "<<" operators. 0 if no such operator has been encountered
  206. /// on a level.
  207. unsigned FirstLessLess = 0;
  208. /// The column of a \c ? in a conditional expression;
  209. unsigned QuestionColumn = 0;
  210. /// The position of the colon in an ObjC method declaration/call.
  211. unsigned ColonPos = 0;
  212. /// The start of the most recent function in a builder-type call.
  213. unsigned StartOfFunctionCall = 0;
  214. /// Contains the start of array subscript expressions, so that they
  215. /// can be aligned.
  216. unsigned StartOfArraySubscripts = 0;
  217. /// If a nested name specifier was broken over multiple lines, this
  218. /// contains the start column of the second line. Otherwise 0.
  219. unsigned NestedNameSpecifierContinuation = 0;
  220. /// If a call expression was broken over multiple lines, this
  221. /// contains the start column of the second line. Otherwise 0.
  222. unsigned CallContinuation = 0;
  223. /// The column of the first variable name in a variable declaration.
  224. ///
  225. /// Used to align further variables if necessary.
  226. unsigned VariablePos = 0;
  227. /// Whether this block's indentation is used for alignment.
  228. bool IsAligned : 1;
  229. /// Whether a newline needs to be inserted before the block's closing
  230. /// brace.
  231. ///
  232. /// We only want to insert a newline before the closing brace if there also
  233. /// was a newline after the beginning left brace.
  234. bool BreakBeforeClosingBrace : 1;
  235. /// Whether a newline needs to be inserted before the block's closing
  236. /// paren.
  237. ///
  238. /// We only want to insert a newline before the closing paren if there also
  239. /// was a newline after the beginning left paren.
  240. bool BreakBeforeClosingParen : 1;
  241. /// Avoid bin packing, i.e. multiple parameters/elements on multiple
  242. /// lines, in this context.
  243. bool AvoidBinPacking : 1;
  244. /// Break after the next comma (or all the commas in this context if
  245. /// \c AvoidBinPacking is \c true).
  246. bool BreakBeforeParameter : 1;
  247. /// Line breaking in this context would break a formatting rule.
  248. bool NoLineBreak : 1;
  249. /// Same as \c NoLineBreak, but is restricted until the end of the
  250. /// operand (including the next ",").
  251. bool NoLineBreakInOperand : 1;
  252. /// True if the last binary operator on this level was wrapped to the
  253. /// next line.
  254. bool LastOperatorWrapped : 1;
  255. /// \c true if this \c ParenState already contains a line-break.
  256. ///
  257. /// The first line break in a certain \c ParenState causes extra penalty so
  258. /// that clang-format prefers similar breaks, i.e. breaks in the same
  259. /// parenthesis.
  260. bool ContainsLineBreak : 1;
  261. /// \c true if this \c ParenState contains multiple segments of a
  262. /// builder-type call on one line.
  263. bool ContainsUnwrappedBuilder : 1;
  264. /// \c true if the colons of the curren ObjC method expression should
  265. /// be aligned.
  266. ///
  267. /// Not considered for memoization as it will always have the same value at
  268. /// the same token.
  269. bool AlignColons : 1;
  270. /// \c true if at least one selector name was found in the current
  271. /// ObjC method expression.
  272. ///
  273. /// Not considered for memoization as it will always have the same value at
  274. /// the same token.
  275. bool ObjCSelectorNameFound : 1;
  276. /// \c true if there are multiple nested blocks inside these parens.
  277. ///
  278. /// Not considered for memoization as it will always have the same value at
  279. /// the same token.
  280. bool HasMultipleNestedBlocks : 1;
  281. /// The start of a nested block (e.g. lambda introducer in C++ or
  282. /// "function" in JavaScript) is not wrapped to a new line.
  283. bool NestedBlockInlined : 1;
  284. /// \c true if the current \c ParenState represents an Objective-C
  285. /// array literal.
  286. bool IsInsideObjCArrayLiteral : 1;
  287. bool IsCSharpGenericTypeConstraint : 1;
  288. /// \brief true if the current \c ParenState represents the false branch of
  289. /// a chained conditional expression (e.g. else-if)
  290. bool IsChainedConditional : 1;
  291. /// \brief true if there conditionnal was wrapped on the first operator (the
  292. /// question mark)
  293. bool IsWrappedConditional : 1;
  294. /// \brief Indicates the indent should be reduced by the length of the
  295. /// operator.
  296. bool UnindentOperator : 1;
  297. bool operator<(const ParenState &Other) const {
  298. if (Indent != Other.Indent)
  299. return Indent < Other.Indent;
  300. if (LastSpace != Other.LastSpace)
  301. return LastSpace < Other.LastSpace;
  302. if (NestedBlockIndent != Other.NestedBlockIndent)
  303. return NestedBlockIndent < Other.NestedBlockIndent;
  304. if (FirstLessLess != Other.FirstLessLess)
  305. return FirstLessLess < Other.FirstLessLess;
  306. if (IsAligned != Other.IsAligned)
  307. return IsAligned;
  308. if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
  309. return BreakBeforeClosingBrace;
  310. if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
  311. return BreakBeforeClosingParen;
  312. if (QuestionColumn != Other.QuestionColumn)
  313. return QuestionColumn < Other.QuestionColumn;
  314. if (AvoidBinPacking != Other.AvoidBinPacking)
  315. return AvoidBinPacking;
  316. if (BreakBeforeParameter != Other.BreakBeforeParameter)
  317. return BreakBeforeParameter;
  318. if (NoLineBreak != Other.NoLineBreak)
  319. return NoLineBreak;
  320. if (LastOperatorWrapped != Other.LastOperatorWrapped)
  321. return LastOperatorWrapped;
  322. if (ColonPos != Other.ColonPos)
  323. return ColonPos < Other.ColonPos;
  324. if (StartOfFunctionCall != Other.StartOfFunctionCall)
  325. return StartOfFunctionCall < Other.StartOfFunctionCall;
  326. if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
  327. return StartOfArraySubscripts < Other.StartOfArraySubscripts;
  328. if (CallContinuation != Other.CallContinuation)
  329. return CallContinuation < Other.CallContinuation;
  330. if (VariablePos != Other.VariablePos)
  331. return VariablePos < Other.VariablePos;
  332. if (ContainsLineBreak != Other.ContainsLineBreak)
  333. return ContainsLineBreak;
  334. if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
  335. return ContainsUnwrappedBuilder;
  336. if (NestedBlockInlined != Other.NestedBlockInlined)
  337. return NestedBlockInlined;
  338. if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
  339. return IsCSharpGenericTypeConstraint;
  340. if (IsChainedConditional != Other.IsChainedConditional)
  341. return IsChainedConditional;
  342. if (IsWrappedConditional != Other.IsWrappedConditional)
  343. return IsWrappedConditional;
  344. if (UnindentOperator != Other.UnindentOperator)
  345. return UnindentOperator;
  346. return false;
  347. }
  348. };
  349. /// The current state when indenting a unwrapped line.
  350. ///
  351. /// As the indenting tries different combinations this is copied by value.
  352. struct LineState {
  353. /// The number of used columns in the current line.
  354. unsigned Column;
  355. /// The token that needs to be next formatted.
  356. FormatToken *NextToken;
  357. /// \c true if this line contains a continued for-loop section.
  358. bool LineContainsContinuedForLoopSection;
  359. /// \c true if \p NextToken should not continue this line.
  360. bool NoContinuation;
  361. /// The \c NestingLevel at the start of this line.
  362. unsigned StartOfLineLevel;
  363. /// The lowest \c NestingLevel on the current line.
  364. unsigned LowestLevelOnLine;
  365. /// The start column of the string literal, if we're in a string
  366. /// literal sequence, 0 otherwise.
  367. unsigned StartOfStringLiteral;
  368. /// A stack keeping track of properties applying to parenthesis
  369. /// levels.
  370. std::vector<ParenState> Stack;
  371. /// Ignore the stack of \c ParenStates for state comparison.
  372. ///
  373. /// In long and deeply nested unwrapped lines, the current algorithm can
  374. /// be insufficient for finding the best formatting with a reasonable amount
  375. /// of time and memory. Setting this flag will effectively lead to the
  376. /// algorithm not analyzing some combinations. However, these combinations
  377. /// rarely contain the optimal solution: In short, accepting a higher
  378. /// penalty early would need to lead to different values in the \c
  379. /// ParenState stack (in an otherwise identical state) and these different
  380. /// values would need to lead to a significant amount of avoided penalty
  381. /// later.
  382. ///
  383. /// FIXME: Come up with a better algorithm instead.
  384. bool IgnoreStackForComparison;
  385. /// The indent of the first token.
  386. unsigned FirstIndent;
  387. /// The line that is being formatted.
  388. ///
  389. /// Does not need to be considered for memoization because it doesn't change.
  390. const AnnotatedLine *Line;
  391. /// Comparison operator to be able to used \c LineState in \c map.
  392. bool operator<(const LineState &Other) const {
  393. if (NextToken != Other.NextToken)
  394. return NextToken < Other.NextToken;
  395. if (Column != Other.Column)
  396. return Column < Other.Column;
  397. if (LineContainsContinuedForLoopSection !=
  398. Other.LineContainsContinuedForLoopSection)
  399. return LineContainsContinuedForLoopSection;
  400. if (NoContinuation != Other.NoContinuation)
  401. return NoContinuation;
  402. if (StartOfLineLevel != Other.StartOfLineLevel)
  403. return StartOfLineLevel < Other.StartOfLineLevel;
  404. if (LowestLevelOnLine != Other.LowestLevelOnLine)
  405. return LowestLevelOnLine < Other.LowestLevelOnLine;
  406. if (StartOfStringLiteral != Other.StartOfStringLiteral)
  407. return StartOfStringLiteral < Other.StartOfStringLiteral;
  408. if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
  409. return false;
  410. return Stack < Other.Stack;
  411. }
  412. };
  413. } // end namespace format
  414. } // end namespace clang
  415. #endif