TokenAnnotator.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file implements a token annotator, i.e. creates
  11. /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
  15. #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
  16. #include "UnwrappedLineParser.h"
  17. #include "clang/Format/Format.h"
  18. namespace clang {
  19. namespace format {
  20. enum LineType {
  21. LT_Invalid,
  22. LT_ImportStatement,
  23. LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
  24. LT_ObjCMethodDecl,
  25. LT_ObjCProperty, // An @property line.
  26. LT_Other,
  27. LT_PreprocessorDirective,
  28. LT_VirtualFunctionDecl,
  29. LT_ArrayOfStructInitializer,
  30. LT_CommentAbovePPDirective,
  31. };
  32. class AnnotatedLine {
  33. public:
  34. AnnotatedLine(const UnwrappedLine &Line)
  35. : First(Line.Tokens.front().Tok), Level(Line.Level),
  36. PPLevel(Line.PPLevel),
  37. MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
  38. MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
  39. InPPDirective(Line.InPPDirective),
  40. InPragmaDirective(Line.InPragmaDirective),
  41. InMacroBody(Line.InMacroBody),
  42. MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
  43. IsMultiVariableDeclStmt(false), Affected(false),
  44. LeadingEmptyLinesAffected(false), ChildrenAffected(false),
  45. ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
  46. FirstStartColumn(Line.FirstStartColumn) {
  47. assert(!Line.Tokens.empty());
  48. // Calculate Next and Previous for all tokens. Note that we must overwrite
  49. // Next and Previous for every token, as previous formatting runs might have
  50. // left them in a different state.
  51. First->Previous = nullptr;
  52. FormatToken *Current = First;
  53. for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
  54. Current->Next = Node.Tok;
  55. Node.Tok->Previous = Current;
  56. Current = Current->Next;
  57. Current->Children.clear();
  58. for (const auto &Child : Node.Children) {
  59. Children.push_back(new AnnotatedLine(Child));
  60. Current->Children.push_back(Children.back());
  61. }
  62. }
  63. Last = Current;
  64. Last->Next = nullptr;
  65. }
  66. ~AnnotatedLine() {
  67. for (AnnotatedLine *Child : Children)
  68. delete Child;
  69. FormatToken *Current = First;
  70. while (Current) {
  71. Current->Children.clear();
  72. Current->Role.reset();
  73. Current = Current->Next;
  74. }
  75. }
  76. bool isComment() const {
  77. return First && First->is(tok::comment) && !First->getNextNonComment();
  78. }
  79. /// \c true if this line starts with the given tokens in order, ignoring
  80. /// comments.
  81. template <typename... Ts> bool startsWith(Ts... Tokens) const {
  82. return First && First->startsSequence(Tokens...);
  83. }
  84. /// \c true if this line ends with the given tokens in reversed order,
  85. /// ignoring comments.
  86. /// For example, given tokens [T1, T2, T3, ...], the function returns true if
  87. /// this line is like "... T3 T2 T1".
  88. template <typename... Ts> bool endsWith(Ts... Tokens) const {
  89. return Last && Last->endsSequence(Tokens...);
  90. }
  91. /// \c true if this line looks like a function definition instead of a
  92. /// function declaration. Asserts MightBeFunctionDecl.
  93. bool mightBeFunctionDefinition() const {
  94. assert(MightBeFunctionDecl);
  95. // Try to determine if the end of a stream of tokens is either the
  96. // Definition or the Declaration for a function. It does this by looking for
  97. // the ';' in foo(); and using that it ends with a ; to know this is the
  98. // Definition, however the line could end with
  99. // foo(); /* comment */
  100. // or
  101. // foo(); // comment
  102. // or
  103. // foo() // comment
  104. // endsWith() ignores the comment.
  105. return !endsWith(tok::semi);
  106. }
  107. /// \c true if this line starts a namespace definition.
  108. bool startsWithNamespace() const {
  109. return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
  110. startsWith(tok::kw_inline, tok::kw_namespace) ||
  111. startsWith(tok::kw_export, tok::kw_namespace);
  112. }
  113. FormatToken *First;
  114. FormatToken *Last;
  115. SmallVector<AnnotatedLine *, 0> Children;
  116. LineType Type;
  117. unsigned Level;
  118. unsigned PPLevel;
  119. size_t MatchingOpeningBlockLineIndex;
  120. size_t MatchingClosingBlockLineIndex;
  121. bool InPPDirective;
  122. bool InPragmaDirective;
  123. bool InMacroBody;
  124. bool MustBeDeclaration;
  125. bool MightBeFunctionDecl;
  126. bool IsMultiVariableDeclStmt;
  127. /// \c True if this line should be formatted, i.e. intersects directly or
  128. /// indirectly with one of the input ranges.
  129. bool Affected;
  130. /// \c True if the leading empty lines of this line intersect with one of the
  131. /// input ranges.
  132. bool LeadingEmptyLinesAffected;
  133. /// \c True if one of this line's children intersects with an input range.
  134. bool ChildrenAffected;
  135. /// \c True if breaking after last attribute group in function return type.
  136. bool ReturnTypeWrapped;
  137. /// \c True if this line should be indented by ContinuationIndent in addition
  138. /// to the normal indention level.
  139. bool IsContinuation;
  140. unsigned FirstStartColumn;
  141. private:
  142. // Disallow copying.
  143. AnnotatedLine(const AnnotatedLine &) = delete;
  144. void operator=(const AnnotatedLine &) = delete;
  145. };
  146. /// Determines extra information about the tokens comprising an
  147. /// \c UnwrappedLine.
  148. class TokenAnnotator {
  149. public:
  150. TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
  151. : Style(Style), Keywords(Keywords) {}
  152. /// Adapts the indent levels of comment lines to the indent of the
  153. /// subsequent line.
  154. // FIXME: Can/should this be done in the UnwrappedLineParser?
  155. void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
  156. void annotate(AnnotatedLine &Line) const;
  157. void calculateFormattingInformation(AnnotatedLine &Line) const;
  158. private:
  159. /// Calculate the penalty for splitting before \c Tok.
  160. unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
  161. bool InFunctionDecl) const;
  162. bool spaceRequiredBeforeParens(const FormatToken &Right) const;
  163. bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
  164. const FormatToken &Right) const;
  165. bool spaceRequiredBefore(const AnnotatedLine &Line,
  166. const FormatToken &Right) const;
  167. bool mustBreakBefore(const AnnotatedLine &Line,
  168. const FormatToken &Right) const;
  169. bool canBreakBefore(const AnnotatedLine &Line,
  170. const FormatToken &Right) const;
  171. bool mustBreakForReturnType(const AnnotatedLine &Line) const;
  172. void printDebugInfo(const AnnotatedLine &Line) const;
  173. void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
  174. void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
  175. FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
  176. FormatToken *CurrentToken,
  177. unsigned Depth) const;
  178. FormatStyle::PointerAlignmentStyle
  179. getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
  180. FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
  181. const FormatToken &PointerOrReference) const;
  182. const FormatStyle &Style;
  183. const AdditionalKeywords &Keywords;
  184. };
  185. } // end namespace format
  186. } // end namespace clang
  187. #endif