WhitespaceManager.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// WhitespaceManager class manages whitespace around tokens and their
  11. /// replacements.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
  15. #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
  16. #include "TokenAnnotator.h"
  17. #include "clang/Basic/SourceManager.h"
  18. #include "clang/Format/Format.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include <algorithm>
  21. #include <string>
  22. #include <tuple>
  23. namespace clang {
  24. namespace format {
  25. /// Manages the whitespaces around tokens and their replacements.
  26. ///
  27. /// This includes special handling for certain constructs, e.g. the alignment of
  28. /// trailing line comments.
  29. ///
  30. /// To guarantee correctness of alignment operations, the \c WhitespaceManager
  31. /// must be informed about every token in the source file; for each token, there
  32. /// must be exactly one call to either \c replaceWhitespace or
  33. /// \c addUntouchableToken.
  34. ///
  35. /// There may be multiple calls to \c breakToken for a given token.
  36. class WhitespaceManager {
  37. public:
  38. WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
  39. bool UseCRLF)
  40. : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
  41. bool useCRLF() const { return UseCRLF; }
  42. /// Infers whether the input is using CRLF.
  43. static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
  44. /// Replaces the whitespace in front of \p Tok. Only call once for
  45. /// each \c AnnotatedToken.
  46. ///
  47. /// \p StartOfTokenColumn is the column at which the token will start after
  48. /// this replacement. It is needed for determining how \p Spaces is turned
  49. /// into tabs and spaces for some format styles.
  50. void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
  51. unsigned StartOfTokenColumn, bool isAligned = false,
  52. bool InPPDirective = false);
  53. /// Adds information about an unchangeable token's whitespace.
  54. ///
  55. /// Needs to be called for every token for which \c replaceWhitespace
  56. /// was not called.
  57. void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
  58. llvm::Error addReplacement(const tooling::Replacement &Replacement);
  59. /// Inserts or replaces whitespace in the middle of a token.
  60. ///
  61. /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
  62. /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
  63. /// characters.
  64. ///
  65. /// Note: \p Spaces can be negative to retain information about initial
  66. /// relative column offset between a line of a block comment and the start of
  67. /// the comment. This negative offset may be compensated by trailing comment
  68. /// alignment here. In all other cases negative \p Spaces will be truncated to
  69. /// 0.
  70. ///
  71. /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
  72. /// used to align backslashes correctly.
  73. void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
  74. unsigned ReplaceChars,
  75. StringRef PreviousPostfix,
  76. StringRef CurrentPrefix, bool InPPDirective,
  77. unsigned Newlines, int Spaces);
  78. /// Returns all the \c Replacements created during formatting.
  79. const tooling::Replacements &generateReplacements();
  80. /// Represents a change before a token, a break inside a token,
  81. /// or the layout of an unchanged token (or whitespace within).
  82. struct Change {
  83. /// Functor to sort changes in original source order.
  84. class IsBeforeInFile {
  85. public:
  86. IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
  87. bool operator()(const Change &C1, const Change &C2) const;
  88. private:
  89. const SourceManager &SourceMgr;
  90. };
  91. /// Creates a \c Change.
  92. ///
  93. /// The generated \c Change will replace the characters at
  94. /// \p OriginalWhitespaceRange with a concatenation of
  95. /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
  96. /// and \p CurrentLinePrefix.
  97. ///
  98. /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
  99. /// trailing comments and escaped newlines.
  100. Change(const FormatToken &Tok, bool CreateReplacement,
  101. SourceRange OriginalWhitespaceRange, int Spaces,
  102. unsigned StartOfTokenColumn, unsigned NewlinesBefore,
  103. StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
  104. bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken);
  105. // The kind of the token whose whitespace this change replaces, or in which
  106. // this change inserts whitespace.
  107. // FIXME: Currently this is not set correctly for breaks inside comments, as
  108. // the \c BreakableToken is still doing its own alignment.
  109. const FormatToken *Tok;
  110. bool CreateReplacement;
  111. // Changes might be in the middle of a token, so we cannot just keep the
  112. // FormatToken around to query its information.
  113. SourceRange OriginalWhitespaceRange;
  114. unsigned StartOfTokenColumn;
  115. unsigned NewlinesBefore;
  116. std::string PreviousLinePostfix;
  117. std::string CurrentLinePrefix;
  118. bool IsAligned;
  119. bool ContinuesPPDirective;
  120. // The number of spaces in front of the token or broken part of the token.
  121. // This will be adapted when aligning tokens.
  122. // Can be negative to retain information about the initial relative offset
  123. // of the lines in a block comment. This is used when aligning trailing
  124. // comments. Uncompensated negative offset is truncated to 0.
  125. int Spaces;
  126. // If this change is inside of a token but not at the start of the token or
  127. // directly after a newline.
  128. bool IsInsideToken;
  129. // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
  130. // \c EscapedNewlineColumn will be calculated in
  131. // \c calculateLineBreakInformation.
  132. bool IsTrailingComment;
  133. unsigned TokenLength;
  134. unsigned PreviousEndOfTokenColumn;
  135. unsigned EscapedNewlineColumn;
  136. // These fields are used to retain correct relative line indentation in a
  137. // block comment when aligning trailing comments.
  138. //
  139. // If this Change represents a continuation of a block comment,
  140. // \c StartOfBlockComment is pointer to the first Change in the block
  141. // comment. \c IndentationOffset is a relative column offset to this
  142. // change, so that the correct column can be reconstructed at the end of
  143. // the alignment process.
  144. const Change *StartOfBlockComment;
  145. int IndentationOffset;
  146. // Depth of conditionals. Computed from tracking fake parenthesis, except
  147. // it does not increase the indent for "chained" conditionals.
  148. int ConditionalsLevel;
  149. // A combination of indent, nesting and conditionals levels, which are used
  150. // in tandem to compute lexical scope, for the purposes of deciding
  151. // when to stop consecutive alignment runs.
  152. std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
  153. return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
  154. ConditionalsLevel);
  155. }
  156. };
  157. private:
  158. struct CellDescription {
  159. unsigned Index = 0;
  160. unsigned Cell = 0;
  161. unsigned EndIndex = 0;
  162. bool HasSplit = false;
  163. CellDescription *NextColumnElement = nullptr;
  164. constexpr bool operator==(const CellDescription &Other) const {
  165. return Index == Other.Index && Cell == Other.Cell &&
  166. EndIndex == Other.EndIndex;
  167. }
  168. constexpr bool operator!=(const CellDescription &Other) const {
  169. return !(*this == Other);
  170. }
  171. };
  172. struct CellDescriptions {
  173. SmallVector<CellDescription> Cells;
  174. SmallVector<unsigned> CellCounts;
  175. unsigned InitialSpaces = 0;
  176. // Determine if every row in the array
  177. // has the same number of columns.
  178. bool isRectangular() const {
  179. if (CellCounts.empty())
  180. return false;
  181. for (auto NumberOfColumns : CellCounts)
  182. if (NumberOfColumns != CellCounts[0])
  183. return false;
  184. return true;
  185. }
  186. };
  187. /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
  188. /// or token parts in a line and \c PreviousEndOfTokenColumn and
  189. /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
  190. void calculateLineBreakInformation();
  191. /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
  192. void alignConsecutiveMacros();
  193. /// Align consecutive assignments over all \c Changes.
  194. void alignConsecutiveAssignments();
  195. /// Align consecutive bitfields over all \c Changes.
  196. void alignConsecutiveBitFields();
  197. /// Align consecutive declarations over all \c Changes.
  198. void alignConsecutiveDeclarations();
  199. /// Align consecutive declarations over all \c Changes.
  200. void alignChainedConditionals();
  201. /// Align trailing comments over all \c Changes.
  202. void alignTrailingComments();
  203. /// Align trailing comments from change \p Start to change \p End at
  204. /// the specified \p Column.
  205. void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
  206. /// Align escaped newlines over all \c Changes.
  207. void alignEscapedNewlines();
  208. /// Align escaped newlines from change \p Start to change \p End at
  209. /// the specified \p Column.
  210. void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
  211. /// Align Array Initializers over all \c Changes.
  212. void alignArrayInitializers();
  213. /// Align Array Initializers from change \p Start to change \p End at
  214. /// the specified \p Column.
  215. void alignArrayInitializers(unsigned Start, unsigned End);
  216. /// Align Array Initializers being careful to right justify the columns
  217. /// as described by \p CellDescs.
  218. void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
  219. /// Align Array Initializers being careful to left justify the columns
  220. /// as described by \p CellDescs.
  221. void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
  222. /// Calculate the cell width between two indexes.
  223. unsigned calculateCellWidth(unsigned Start, unsigned End,
  224. bool WithSpaces = false) const;
  225. /// Get a set of fully specified CellDescriptions between \p Start and
  226. /// \p End of the change list.
  227. CellDescriptions getCells(unsigned Start, unsigned End);
  228. /// Does this \p Cell contain a split element?
  229. static bool isSplitCell(const CellDescription &Cell);
  230. /// Get the width of the preceding cells from \p Start to \p End.
  231. template <typename I>
  232. auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
  233. auto NetWidth = InitialSpaces;
  234. for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
  235. // If we broke the line the initial spaces are already
  236. // accounted for.
  237. if (Changes[PrevIter->Index].NewlinesBefore > 0)
  238. NetWidth = 0;
  239. NetWidth +=
  240. calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
  241. }
  242. return NetWidth;
  243. }
  244. /// Get the maximum width of a cell in a sequence of columns.
  245. template <typename I>
  246. unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
  247. unsigned CellWidth =
  248. calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
  249. if (Changes[CellIter->Index].NewlinesBefore == 0)
  250. CellWidth += NetWidth;
  251. for (const auto *Next = CellIter->NextColumnElement; Next != nullptr;
  252. Next = Next->NextColumnElement) {
  253. auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
  254. if (Changes[Next->Index].NewlinesBefore == 0)
  255. ThisWidth += NetWidth;
  256. CellWidth = std::max(CellWidth, ThisWidth);
  257. }
  258. return CellWidth;
  259. }
  260. /// Get The maximum width of all columns to a given cell.
  261. template <typename I>
  262. unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
  263. unsigned InitialSpaces, unsigned CellCount,
  264. unsigned MaxRowCount) const {
  265. auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
  266. auto RowCount = 1U;
  267. auto Offset = std::distance(CellStart, CellStop);
  268. for (const auto *Next = CellStop->NextColumnElement; Next != nullptr;
  269. Next = Next->NextColumnElement) {
  270. if (RowCount > MaxRowCount)
  271. break;
  272. auto Start = (CellStart + RowCount * CellCount);
  273. auto End = Start + Offset;
  274. MaxNetWidth =
  275. std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
  276. ++RowCount;
  277. }
  278. return MaxNetWidth;
  279. }
  280. /// Align a split cell with a newline to the first element in the cell.
  281. void alignToStartOfCell(unsigned Start, unsigned End);
  282. /// Link the Cell pointers in the list of Cells.
  283. static CellDescriptions linkCells(CellDescriptions &&CellDesc);
  284. /// Fill \c Replaces with the replacements for all effective changes.
  285. void generateChanges();
  286. /// Stores \p Text as the replacement for the whitespace in \p Range.
  287. void storeReplacement(SourceRange Range, StringRef Text);
  288. void appendNewlineText(std::string &Text, unsigned Newlines);
  289. void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
  290. unsigned PreviousEndOfTokenColumn,
  291. unsigned EscapedNewlineColumn);
  292. void appendIndentText(std::string &Text, unsigned IndentLevel,
  293. unsigned Spaces, unsigned WhitespaceStartColumn,
  294. bool IsAligned);
  295. unsigned appendTabIndent(std::string &Text, unsigned Spaces,
  296. unsigned Indentation);
  297. SmallVector<Change, 16> Changes;
  298. const SourceManager &SourceMgr;
  299. tooling::Replacements Replaces;
  300. const FormatStyle &Style;
  301. bool UseCRLF;
  302. };
  303. } // namespace format
  304. } // namespace clang
  305. #endif