NamespaceEndCommentsFixer.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
  11. /// fixes namespace end comments.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "NamespaceEndCommentsFixer.h"
  15. #include "clang/Basic/TokenKinds.h"
  16. #include "llvm/Support/Debug.h"
  17. #include "llvm/Support/Regex.h"
  18. #define DEBUG_TYPE "namespace-end-comments-fixer"
  19. namespace clang {
  20. namespace format {
  21. namespace {
  22. // Iterates all tokens starting from StartTok to EndTok and apply Fn to all
  23. // tokens between them including StartTok and EndTok. Returns the token after
  24. // EndTok.
  25. const FormatToken *
  26. processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
  27. tok::TokenKind EndTok,
  28. llvm::function_ref<void(const FormatToken *)> Fn) {
  29. if (!Tok || Tok->isNot(StartTok))
  30. return Tok;
  31. int NestLevel = 0;
  32. do {
  33. if (Tok->is(StartTok))
  34. ++NestLevel;
  35. else if (Tok->is(EndTok))
  36. --NestLevel;
  37. if (Fn)
  38. Fn(Tok);
  39. Tok = Tok->getNextNonComment();
  40. } while (Tok && NestLevel > 0);
  41. return Tok;
  42. }
  43. const FormatToken *skipAttribute(const FormatToken *Tok) {
  44. if (!Tok)
  45. return nullptr;
  46. if (Tok->is(tok::kw___attribute)) {
  47. Tok = Tok->getNextNonComment();
  48. Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
  49. } else if (Tok->is(tok::l_square)) {
  50. Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
  51. }
  52. return Tok;
  53. }
  54. // Computes the name of a namespace given the namespace token.
  55. // Returns "" for anonymous namespace.
  56. std::string computeName(const FormatToken *NamespaceTok) {
  57. assert(NamespaceTok &&
  58. NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  59. "expecting a namespace token");
  60. std::string name;
  61. const FormatToken *Tok = NamespaceTok->getNextNonComment();
  62. if (NamespaceTok->is(TT_NamespaceMacro)) {
  63. // Collects all the non-comment tokens between opening parenthesis
  64. // and closing parenthesis or comma.
  65. assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
  66. Tok = Tok->getNextNonComment();
  67. while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
  68. name += Tok->TokenText;
  69. Tok = Tok->getNextNonComment();
  70. }
  71. return name;
  72. }
  73. Tok = skipAttribute(Tok);
  74. std::string FirstNSName;
  75. // For `namespace [[foo]] A::B::inline C {` or
  76. // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
  77. // Peek for the first '::' (or '{' or '(')) and then return all tokens from
  78. // one token before that up until the '{'. A '(' might be a macro with
  79. // arguments.
  80. const FormatToken *FirstNSTok = nullptr;
  81. while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
  82. if (FirstNSTok)
  83. FirstNSName += FirstNSTok->TokenText;
  84. FirstNSTok = Tok;
  85. Tok = Tok->getNextNonComment();
  86. }
  87. if (FirstNSTok)
  88. Tok = FirstNSTok;
  89. Tok = skipAttribute(Tok);
  90. FirstNSTok = nullptr;
  91. // Add everything from '(' to ')'.
  92. auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
  93. bool IsPrevColoncolon = false;
  94. bool HasColoncolon = false;
  95. bool IsPrevInline = false;
  96. bool NameFinished = false;
  97. // If we found '::' in name, then it's the name. Otherwise, we can't tell
  98. // which one is name. For example, `namespace A B {`.
  99. while (Tok && Tok->isNot(tok::l_brace)) {
  100. if (FirstNSTok) {
  101. if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
  102. if (FirstNSTok->is(tok::l_paren)) {
  103. FirstNSTok = Tok =
  104. processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
  105. continue;
  106. }
  107. if (FirstNSTok->isNot(tok::coloncolon)) {
  108. NameFinished = true;
  109. break;
  110. }
  111. }
  112. name += FirstNSTok->TokenText;
  113. IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
  114. HasColoncolon = HasColoncolon || IsPrevColoncolon;
  115. if (FirstNSTok->is(tok::kw_inline)) {
  116. name += " ";
  117. IsPrevInline = true;
  118. }
  119. }
  120. FirstNSTok = Tok;
  121. Tok = Tok->getNextNonComment();
  122. const FormatToken *TokAfterAttr = skipAttribute(Tok);
  123. if (TokAfterAttr != Tok)
  124. FirstNSTok = Tok = TokAfterAttr;
  125. }
  126. if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
  127. name += FirstNSTok->TokenText;
  128. if (FirstNSName.empty() || HasColoncolon)
  129. return name;
  130. return name.empty() ? FirstNSName : FirstNSName + " " + name;
  131. }
  132. std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
  133. const FormatToken *NamespaceTok,
  134. unsigned SpacesToAdd) {
  135. return "";
  136. std::string text = "//";
  137. text.append(SpacesToAdd, ' ');
  138. text += NamespaceTok->TokenText;
  139. if (NamespaceTok->is(TT_NamespaceMacro))
  140. text += "(";
  141. else if (!NamespaceName.empty())
  142. text += ' ';
  143. text += NamespaceName;
  144. if (NamespaceTok->is(TT_NamespaceMacro))
  145. text += ")";
  146. if (AddNewline)
  147. text += '\n';
  148. return text;
  149. }
  150. bool hasEndComment(const FormatToken *RBraceTok) {
  151. return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
  152. }
  153. bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
  154. const FormatToken *NamespaceTok) {
  155. assert(hasEndComment(RBraceTok));
  156. const FormatToken *Comment = RBraceTok->Next;
  157. // Matches a valid namespace end comment.
  158. // Valid namespace end comments don't need to be edited.
  159. static const llvm::Regex NamespaceCommentPattern =
  160. llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
  161. "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
  162. llvm::Regex::IgnoreCase);
  163. static const llvm::Regex NamespaceMacroCommentPattern =
  164. llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
  165. "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
  166. llvm::Regex::IgnoreCase);
  167. SmallVector<StringRef, 8> Groups;
  168. if (NamespaceTok->is(TT_NamespaceMacro) &&
  169. NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
  170. StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
  171. // The name of the macro must be used.
  172. if (NamespaceTokenText != NamespaceTok->TokenText)
  173. return false;
  174. } else if (NamespaceTok->isNot(tok::kw_namespace) ||
  175. !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
  176. // Comment does not match regex.
  177. return false;
  178. }
  179. StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
  180. // Anonymous namespace comments must not mention a namespace name.
  181. if (NamespaceName.empty() && !NamespaceNameInComment.empty())
  182. return false;
  183. StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
  184. // Named namespace comments must not mention anonymous namespace.
  185. if (!NamespaceName.empty() && !AnonymousInComment.empty())
  186. return false;
  187. if (NamespaceNameInComment == NamespaceName)
  188. return true;
  189. // Has namespace comment flowed onto the next line.
  190. // } // namespace
  191. // // verylongnamespacenamethatdidnotfitonthepreviouscommentline
  192. if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
  193. return false;
  194. static const llvm::Regex CommentPattern = llvm::Regex(
  195. "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
  196. // Pull out just the comment text.
  197. if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
  198. return false;
  199. NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
  200. return NamespaceNameInComment == NamespaceName;
  201. }
  202. void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
  203. const SourceManager &SourceMgr,
  204. tooling::Replacements *Fixes) {
  205. auto EndLoc = RBraceTok->Tok.getEndLoc();
  206. auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
  207. auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
  208. if (Err) {
  209. llvm::errs() << "Error while adding namespace end comment: "
  210. << llvm::toString(std::move(Err)) << "\n";
  211. }
  212. }
  213. void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
  214. const SourceManager &SourceMgr,
  215. tooling::Replacements *Fixes) {
  216. assert(hasEndComment(RBraceTok));
  217. const FormatToken *Comment = RBraceTok->Next;
  218. auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
  219. Comment->Tok.getEndLoc());
  220. auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
  221. if (Err) {
  222. llvm::errs() << "Error while updating namespace end comment: "
  223. << llvm::toString(std::move(Err)) << "\n";
  224. }
  225. }
  226. } // namespace
  227. const FormatToken *
  228. getNamespaceToken(const AnnotatedLine *Line,
  229. const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
  230. if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
  231. return nullptr;
  232. size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
  233. if (StartLineIndex == UnwrappedLine::kInvalidIndex)
  234. return nullptr;
  235. assert(StartLineIndex < AnnotatedLines.size());
  236. const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
  237. if (NamespaceTok->is(tok::l_brace)) {
  238. // "namespace" keyword can be on the line preceding '{', e.g. in styles
  239. // where BraceWrapping.AfterNamespace is true.
  240. if (StartLineIndex > 0) {
  241. NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
  242. if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
  243. return nullptr;
  244. }
  245. }
  246. return NamespaceTok->getNamespaceToken();
  247. }
  248. StringRef
  249. getNamespaceTokenText(const AnnotatedLine *Line,
  250. const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
  251. const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
  252. return NamespaceTok ? NamespaceTok->TokenText : StringRef();
  253. }
  254. NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
  255. const FormatStyle &Style)
  256. : TokenAnalyzer(Env, Style) {}
  257. std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
  258. TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
  259. FormatTokenLexer &Tokens) {
  260. const SourceManager &SourceMgr = Env.getSourceManager();
  261. AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
  262. tooling::Replacements Fixes;
  263. // Spin through the lines and ensure we have balanced braces.
  264. int Braces = 0;
  265. for (AnnotatedLine *Line : AnnotatedLines) {
  266. FormatToken *Tok = Line->First;
  267. while (Tok) {
  268. Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
  269. Tok = Tok->Next;
  270. }
  271. }
  272. // Don't attempt to comment unbalanced braces or this can
  273. // lead to comments being placed on the closing brace which isn't
  274. // the matching brace of the namespace. (occurs during incomplete editing).
  275. if (Braces != 0)
  276. return {Fixes, 0};
  277. std::string AllNamespaceNames;
  278. size_t StartLineIndex = SIZE_MAX;
  279. StringRef NamespaceTokenText;
  280. unsigned int CompactedNamespacesCount = 0;
  281. for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
  282. const AnnotatedLine *EndLine = AnnotatedLines[I];
  283. const FormatToken *NamespaceTok =
  284. getNamespaceToken(EndLine, AnnotatedLines);
  285. if (!NamespaceTok)
  286. continue;
  287. FormatToken *RBraceTok = EndLine->First;
  288. if (RBraceTok->Finalized)
  289. continue;
  290. RBraceTok->Finalized = true;
  291. const FormatToken *EndCommentPrevTok = RBraceTok;
  292. // Namespaces often end with '};'. In that case, attach namespace end
  293. // comments to the semicolon tokens.
  294. if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
  295. EndCommentPrevTok = RBraceTok->Next;
  296. if (StartLineIndex == SIZE_MAX)
  297. StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
  298. std::string NamespaceName = computeName(NamespaceTok);
  299. if (Style.CompactNamespaces) {
  300. if (CompactedNamespacesCount == 0)
  301. NamespaceTokenText = NamespaceTok->TokenText;
  302. if ((I + 1 < E) &&
  303. NamespaceTokenText ==
  304. getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
  305. StartLineIndex - CompactedNamespacesCount - 1 ==
  306. AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
  307. !AnnotatedLines[I + 1]->First->Finalized) {
  308. if (hasEndComment(EndCommentPrevTok)) {
  309. // remove end comment, it will be merged in next one
  310. updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
  311. }
  312. ++CompactedNamespacesCount;
  313. if (!NamespaceName.empty())
  314. AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
  315. continue;
  316. }
  317. NamespaceName += AllNamespaceNames;
  318. CompactedNamespacesCount = 0;
  319. AllNamespaceNames = std::string();
  320. }
  321. // The next token in the token stream after the place where the end comment
  322. // token must be. This is either the next token on the current line or the
  323. // first token on the next line.
  324. const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
  325. if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
  326. EndCommentNextTok = EndCommentNextTok->Next;
  327. if (!EndCommentNextTok && I + 1 < E)
  328. EndCommentNextTok = AnnotatedLines[I + 1]->First;
  329. bool AddNewline = EndCommentNextTok &&
  330. EndCommentNextTok->NewlinesBefore == 0 &&
  331. EndCommentNextTok->isNot(tok::eof);
  332. const std::string EndCommentText =
  333. computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
  334. Style.SpacesInLineCommentPrefix.Minimum);
  335. if (!hasEndComment(EndCommentPrevTok)) {
  336. bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
  337. if (!isShort)
  338. addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
  339. } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
  340. NamespaceTok)) {
  341. updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
  342. }
  343. StartLineIndex = SIZE_MAX;
  344. }
  345. return {Fixes, 0};
  346. }
  347. } // namespace format
  348. } // namespace clang