NamespaceEndCommentsFixer.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
  11. /// fixes namespace end comments.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "NamespaceEndCommentsFixer.h"
  15. #include "clang/Basic/TokenKinds.h"
  16. #include "llvm/Support/Debug.h"
  17. #include "llvm/Support/Regex.h"
  18. #define DEBUG_TYPE "namespace-end-comments-fixer"
  19. namespace clang {
  20. namespace format {
  21. namespace {
  22. // Iterates all tokens starting from StartTok to EndTok and apply Fn to all
  23. // tokens between them including StartTok and EndTok. Returns the token after
  24. // EndTok.
  25. const FormatToken *
  26. processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
  27. tok::TokenKind EndTok,
  28. llvm::function_ref<void(const FormatToken *)> Fn) {
  29. if (!Tok || Tok->isNot(StartTok))
  30. return Tok;
  31. int NestLevel = 0;
  32. do {
  33. if (Tok->is(StartTok))
  34. ++NestLevel;
  35. else if (Tok->is(EndTok))
  36. --NestLevel;
  37. if (Fn)
  38. Fn(Tok);
  39. Tok = Tok->getNextNonComment();
  40. } while (Tok && NestLevel > 0);
  41. return Tok;
  42. }
  43. const FormatToken *skipAttribute(const FormatToken *Tok) {
  44. if (!Tok)
  45. return nullptr;
  46. if (Tok->is(tok::kw___attribute)) {
  47. Tok = Tok->getNextNonComment();
  48. Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
  49. } else if (Tok->is(tok::l_square)) {
  50. Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
  51. }
  52. return Tok;
  53. }
  54. // Computes the name of a namespace given the namespace token.
  55. // Returns "" for anonymous namespace.
  56. std::string computeName(const FormatToken *NamespaceTok) {
  57. assert(NamespaceTok &&
  58. NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  59. "expecting a namespace token");
  60. std::string name;
  61. const FormatToken *Tok = NamespaceTok->getNextNonComment();
  62. if (NamespaceTok->is(TT_NamespaceMacro)) {
  63. // Collects all the non-comment tokens between opening parenthesis
  64. // and closing parenthesis or comma.
  65. assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
  66. Tok = Tok->getNextNonComment();
  67. while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
  68. name += Tok->TokenText;
  69. Tok = Tok->getNextNonComment();
  70. }
  71. return name;
  72. }
  73. Tok = skipAttribute(Tok);
  74. std::string FirstNSName;
  75. // For `namespace [[foo]] A::B::inline C {` or
  76. // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
  77. // Peek for the first '::' (or '{' or '(')) and then return all tokens from
  78. // one token before that up until the '{'. A '(' might be a macro with
  79. // arguments.
  80. const FormatToken *FirstNSTok = nullptr;
  81. while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
  82. if (FirstNSTok)
  83. FirstNSName += FirstNSTok->TokenText;
  84. FirstNSTok = Tok;
  85. Tok = Tok->getNextNonComment();
  86. }
  87. if (FirstNSTok)
  88. Tok = FirstNSTok;
  89. Tok = skipAttribute(Tok);
  90. FirstNSTok = nullptr;
  91. // Add everything from '(' to ')'.
  92. auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
  93. bool IsPrevColoncolon = false;
  94. bool HasColoncolon = false;
  95. bool IsPrevInline = false;
  96. bool NameFinished = false;
  97. // If we found '::' in name, then it's the name. Otherwise, we can't tell
  98. // which one is name. For example, `namespace A B {`.
  99. while (Tok && Tok->isNot(tok::l_brace)) {
  100. if (FirstNSTok) {
  101. if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
  102. if (FirstNSTok->is(tok::l_paren)) {
  103. FirstNSTok = Tok =
  104. processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
  105. continue;
  106. }
  107. if (FirstNSTok->isNot(tok::coloncolon)) {
  108. NameFinished = true;
  109. break;
  110. }
  111. }
  112. name += FirstNSTok->TokenText;
  113. IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
  114. HasColoncolon = HasColoncolon || IsPrevColoncolon;
  115. if (FirstNSTok->is(tok::kw_inline)) {
  116. name += " ";
  117. IsPrevInline = true;
  118. }
  119. }
  120. FirstNSTok = Tok;
  121. Tok = Tok->getNextNonComment();
  122. const FormatToken *TokAfterAttr = skipAttribute(Tok);
  123. if (TokAfterAttr != Tok)
  124. FirstNSTok = Tok = TokAfterAttr;
  125. }
  126. if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
  127. name += FirstNSTok->TokenText;
  128. if (FirstNSName.empty() || HasColoncolon)
  129. return name;
  130. return name.empty() ? FirstNSName : FirstNSName + " " + name;
  131. }
  132. std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
  133. const FormatToken *NamespaceTok,
  134. unsigned SpacesToAdd) {
  135. std::string text = "//";
  136. text.append(SpacesToAdd, ' ');
  137. text += NamespaceTok->TokenText;
  138. if (NamespaceTok->is(TT_NamespaceMacro))
  139. text += "(";
  140. else if (!NamespaceName.empty())
  141. text += ' ';
  142. text += NamespaceName;
  143. if (NamespaceTok->is(TT_NamespaceMacro))
  144. text += ")";
  145. if (AddNewline)
  146. text += '\n';
  147. return text;
  148. }
  149. bool hasEndComment(const FormatToken *RBraceTok) {
  150. return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
  151. }
  152. bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
  153. const FormatToken *NamespaceTok) {
  154. assert(hasEndComment(RBraceTok));
  155. const FormatToken *Comment = RBraceTok->Next;
  156. // Matches a valid namespace end comment.
  157. // Valid namespace end comments don't need to be edited.
  158. static const llvm::Regex NamespaceCommentPattern =
  159. llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
  160. "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
  161. llvm::Regex::IgnoreCase);
  162. static const llvm::Regex NamespaceMacroCommentPattern =
  163. llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
  164. "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
  165. llvm::Regex::IgnoreCase);
  166. SmallVector<StringRef, 8> Groups;
  167. if (NamespaceTok->is(TT_NamespaceMacro) &&
  168. NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
  169. StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
  170. // The name of the macro must be used.
  171. if (NamespaceTokenText != NamespaceTok->TokenText)
  172. return false;
  173. } else if (NamespaceTok->isNot(tok::kw_namespace) ||
  174. !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
  175. // Comment does not match regex.
  176. return false;
  177. }
  178. StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
  179. // Anonymous namespace comments must not mention a namespace name.
  180. if (NamespaceName.empty() && !NamespaceNameInComment.empty())
  181. return false;
  182. StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
  183. // Named namespace comments must not mention anonymous namespace.
  184. if (!NamespaceName.empty() && !AnonymousInComment.empty())
  185. return false;
  186. if (NamespaceNameInComment == NamespaceName)
  187. return true;
  188. // Has namespace comment flowed onto the next line.
  189. // } // namespace
  190. // // verylongnamespacenamethatdidnotfitonthepreviouscommentline
  191. if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
  192. return false;
  193. static const llvm::Regex CommentPattern = llvm::Regex(
  194. "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
  195. // Pull out just the comment text.
  196. if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
  197. return false;
  198. NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
  199. return NamespaceNameInComment == NamespaceName;
  200. }
  201. void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
  202. const SourceManager &SourceMgr,
  203. tooling::Replacements *Fixes) {
  204. auto EndLoc = RBraceTok->Tok.getEndLoc();
  205. auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
  206. auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
  207. if (Err) {
  208. llvm::errs() << "Error while adding namespace end comment: "
  209. << llvm::toString(std::move(Err)) << "\n";
  210. }
  211. }
  212. void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
  213. const SourceManager &SourceMgr,
  214. tooling::Replacements *Fixes) {
  215. assert(hasEndComment(RBraceTok));
  216. const FormatToken *Comment = RBraceTok->Next;
  217. auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
  218. Comment->Tok.getEndLoc());
  219. auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
  220. if (Err) {
  221. llvm::errs() << "Error while updating namespace end comment: "
  222. << llvm::toString(std::move(Err)) << "\n";
  223. }
  224. }
  225. } // namespace
  226. const FormatToken *
  227. getNamespaceToken(const AnnotatedLine *Line,
  228. const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
  229. if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
  230. return nullptr;
  231. size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
  232. if (StartLineIndex == UnwrappedLine::kInvalidIndex)
  233. return nullptr;
  234. assert(StartLineIndex < AnnotatedLines.size());
  235. const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
  236. if (NamespaceTok->is(tok::l_brace)) {
  237. // "namespace" keyword can be on the line preceding '{', e.g. in styles
  238. // where BraceWrapping.AfterNamespace is true.
  239. if (StartLineIndex > 0) {
  240. NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
  241. if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
  242. return nullptr;
  243. }
  244. }
  245. return NamespaceTok->getNamespaceToken();
  246. }
  247. StringRef
  248. getNamespaceTokenText(const AnnotatedLine *Line,
  249. const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
  250. const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
  251. return NamespaceTok ? NamespaceTok->TokenText : StringRef();
  252. }
  253. NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
  254. const FormatStyle &Style)
  255. : TokenAnalyzer(Env, Style) {}
  256. std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
  257. TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
  258. FormatTokenLexer &Tokens) {
  259. const SourceManager &SourceMgr = Env.getSourceManager();
  260. AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
  261. tooling::Replacements Fixes;
  262. // Spin through the lines and ensure we have balanced braces.
  263. int Braces = 0;
  264. for (AnnotatedLine *Line : AnnotatedLines) {
  265. FormatToken *Tok = Line->First;
  266. while (Tok) {
  267. Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
  268. Tok = Tok->Next;
  269. }
  270. }
  271. // Don't attempt to comment unbalanced braces or this can
  272. // lead to comments being placed on the closing brace which isn't
  273. // the matching brace of the namespace. (occurs during incomplete editing).
  274. if (Braces != 0)
  275. return {Fixes, 0};
  276. std::string AllNamespaceNames;
  277. size_t StartLineIndex = SIZE_MAX;
  278. StringRef NamespaceTokenText;
  279. unsigned int CompactedNamespacesCount = 0;
  280. for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
  281. const AnnotatedLine *EndLine = AnnotatedLines[I];
  282. const FormatToken *NamespaceTok =
  283. getNamespaceToken(EndLine, AnnotatedLines);
  284. if (!NamespaceTok)
  285. continue;
  286. FormatToken *RBraceTok = EndLine->First;
  287. if (RBraceTok->Finalized)
  288. continue;
  289. RBraceTok->Finalized = true;
  290. const FormatToken *EndCommentPrevTok = RBraceTok;
  291. // Namespaces often end with '};'. In that case, attach namespace end
  292. // comments to the semicolon tokens.
  293. if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
  294. EndCommentPrevTok = RBraceTok->Next;
  295. if (StartLineIndex == SIZE_MAX)
  296. StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
  297. std::string NamespaceName = computeName(NamespaceTok);
  298. if (Style.CompactNamespaces) {
  299. if (CompactedNamespacesCount == 0)
  300. NamespaceTokenText = NamespaceTok->TokenText;
  301. if ((I + 1 < E) &&
  302. NamespaceTokenText ==
  303. getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
  304. StartLineIndex - CompactedNamespacesCount - 1 ==
  305. AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
  306. !AnnotatedLines[I + 1]->First->Finalized) {
  307. if (hasEndComment(EndCommentPrevTok)) {
  308. // remove end comment, it will be merged in next one
  309. updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
  310. }
  311. ++CompactedNamespacesCount;
  312. if (!NamespaceName.empty())
  313. AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
  314. continue;
  315. }
  316. NamespaceName += AllNamespaceNames;
  317. CompactedNamespacesCount = 0;
  318. AllNamespaceNames = std::string();
  319. }
  320. // The next token in the token stream after the place where the end comment
  321. // token must be. This is either the next token on the current line or the
  322. // first token on the next line.
  323. const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
  324. if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
  325. EndCommentNextTok = EndCommentNextTok->Next;
  326. if (!EndCommentNextTok && I + 1 < E)
  327. EndCommentNextTok = AnnotatedLines[I + 1]->First;
  328. bool AddNewline = EndCommentNextTok &&
  329. EndCommentNextTok->NewlinesBefore == 0 &&
  330. EndCommentNextTok->isNot(tok::eof);
  331. const std::string EndCommentText =
  332. computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
  333. Style.SpacesInLineCommentPrefix.Minimum);
  334. if (!hasEndComment(EndCommentPrevTok)) {
  335. bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
  336. if (!isShort)
  337. addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
  338. } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
  339. NamespaceTok)) {
  340. updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
  341. }
  342. StartLineIndex = SIZE_MAX;
  343. }
  344. return {Fixes, 0};
  345. }
  346. } // namespace format
  347. } // namespace clang