RawStringLiteralCheck.cpp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. //===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "RawStringLiteralCheck.h"
  9. #include "clang/AST/ASTContext.h"
  10. #include "clang/ASTMatchers/ASTMatchFinder.h"
  11. #include "clang/Lex/Lexer.h"
  12. using namespace clang::ast_matchers;
  13. namespace clang::tidy::modernize {
  14. namespace {
  15. bool containsEscapes(StringRef HayStack, StringRef Escapes) {
  16. size_t BackSlash = HayStack.find('\\');
  17. if (BackSlash == StringRef::npos)
  18. return false;
  19. while (BackSlash != StringRef::npos) {
  20. if (!Escapes.contains(HayStack[BackSlash + 1]))
  21. return false;
  22. BackSlash = HayStack.find('\\', BackSlash + 2);
  23. }
  24. return true;
  25. }
  26. bool isRawStringLiteral(StringRef Text) {
  27. // Already a raw string literal if R comes before ".
  28. const size_t QuotePos = Text.find('"');
  29. assert(QuotePos != StringRef::npos);
  30. return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
  31. }
  32. bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
  33. const StringLiteral *Literal,
  34. const CharsBitSet &DisallowedChars) {
  35. // FIXME: Handle L"", u8"", u"" and U"" literals.
  36. if (!Literal->isOrdinary())
  37. return false;
  38. for (const unsigned char C : Literal->getBytes())
  39. if (DisallowedChars.test(C))
  40. return false;
  41. CharSourceRange CharRange = Lexer::makeFileCharRange(
  42. CharSourceRange::getTokenRange(Literal->getSourceRange()),
  43. *Result.SourceManager, Result.Context->getLangOpts());
  44. StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
  45. Result.Context->getLangOpts());
  46. if (Text.empty() || isRawStringLiteral(Text))
  47. return false;
  48. return containsEscapes(Text, R"('\"?x01)");
  49. }
  50. bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
  51. return Bytes.find(Delimiter.empty()
  52. ? std::string(R"lit()")lit")
  53. : (")" + Delimiter + R"(")")) != StringRef::npos;
  54. }
  55. std::string asRawStringLiteral(const StringLiteral *Literal,
  56. const std::string &DelimiterStem) {
  57. const StringRef Bytes = Literal->getBytes();
  58. std::string Delimiter;
  59. for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
  60. Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
  61. }
  62. if (Delimiter.empty())
  63. return (R"(R"()" + Bytes + R"lit()")lit").str();
  64. return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
  65. }
  66. } // namespace
  67. RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
  68. ClangTidyContext *Context)
  69. : ClangTidyCheck(Name, Context),
  70. DelimiterStem(Options.get("DelimiterStem", "lit")),
  71. ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
  72. // Non-printing characters are disallowed:
  73. // \007 = \a bell
  74. // \010 = \b backspace
  75. // \011 = \t horizontal tab
  76. // \012 = \n new line
  77. // \013 = \v vertical tab
  78. // \014 = \f form feed
  79. // \015 = \r carriage return
  80. // \177 = delete
  81. for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
  82. "\b\t\n\v\f\r\016\017"
  83. "\020\021\022\023\024\025\026\027"
  84. "\030\031\032\033\034\035\036\037"
  85. "\177",
  86. 33))
  87. DisallowedChars.set(C);
  88. // Non-ASCII are disallowed too.
  89. for (unsigned int C = 0x80u; C <= 0xFFu; ++C)
  90. DisallowedChars.set(static_cast<unsigned char>(C));
  91. }
  92. void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
  93. Options.store(Opts, "DelimiterStem", DelimiterStem);
  94. Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
  95. }
  96. void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
  97. Finder->addMatcher(
  98. stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
  99. }
  100. void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
  101. const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
  102. if (Literal->getBeginLoc().isMacroID())
  103. return;
  104. if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
  105. std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
  106. if (ReplaceShorterLiterals ||
  107. Replacement.length() <=
  108. Lexer::MeasureTokenLength(Literal->getBeginLoc(),
  109. *Result.SourceManager, getLangOpts()))
  110. replaceWithRawStringLiteral(Result, Literal, Replacement);
  111. }
  112. }
  113. void RawStringLiteralCheck::replaceWithRawStringLiteral(
  114. const MatchFinder::MatchResult &Result, const StringLiteral *Literal,
  115. StringRef Replacement) {
  116. CharSourceRange CharRange = Lexer::makeFileCharRange(
  117. CharSourceRange::getTokenRange(Literal->getSourceRange()),
  118. *Result.SourceManager, getLangOpts());
  119. diag(Literal->getBeginLoc(),
  120. "escaped string literal can be written as a raw string literal")
  121. << FixItHint::CreateReplacement(CharRange, Replacement);
  122. }
  123. } // namespace clang::tidy::modernize