MisleadingBidirectional.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. //===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "MisleadingBidirectional.h"
  9. #include "clang/Frontend/CompilerInstance.h"
  10. #include "clang/Lex/Preprocessor.h"
  11. #include "llvm/Support/ConvertUTF.h"
  12. #include <optional>
  13. using namespace clang;
  14. using namespace clang::tidy::misc;
  15. static bool containsMisleadingBidi(StringRef Buffer,
  16. bool HonorLineBreaks = true) {
  17. const char *CurPtr = Buffer.begin();
  18. enum BidiChar {
  19. PS = 0x2029,
  20. RLO = 0x202E,
  21. RLE = 0x202B,
  22. LRO = 0x202D,
  23. LRE = 0x202A,
  24. PDF = 0x202C,
  25. RLI = 0x2067,
  26. LRI = 0x2066,
  27. FSI = 0x2068,
  28. PDI = 0x2069
  29. };
  30. SmallVector<BidiChar> BidiContexts;
  31. // Scan each character while maintaining a stack of opened bidi context.
  32. // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
  33. // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
  34. //
  35. // Warn if we end up with an unclosed context.
  36. while (CurPtr < Buffer.end()) {
  37. unsigned char C = *CurPtr;
  38. if (isASCII(C)) {
  39. ++CurPtr;
  40. bool IsParagrapSep =
  41. (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
  42. bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
  43. if (IsParagrapSep || IsSegmentSep)
  44. BidiContexts.clear();
  45. continue;
  46. }
  47. llvm::UTF32 CodePoint;
  48. llvm::ConversionResult Result = llvm::convertUTF8Sequence(
  49. (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
  50. &CodePoint, llvm::strictConversion);
  51. // If conversion fails, utf-8 is designed so that we can just try next char.
  52. if (Result != llvm::conversionOK) {
  53. ++CurPtr;
  54. continue;
  55. }
  56. // Open a PDF context.
  57. if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
  58. CodePoint == LRE)
  59. BidiContexts.push_back(PDF);
  60. // Close PDF Context.
  61. else if (CodePoint == PDF) {
  62. if (!BidiContexts.empty() && BidiContexts.back() == PDF)
  63. BidiContexts.pop_back();
  64. }
  65. // Open a PDI Context.
  66. else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
  67. BidiContexts.push_back(PDI);
  68. // Close a PDI Context.
  69. else if (CodePoint == PDI) {
  70. auto R = llvm::find(llvm::reverse(BidiContexts), PDI);
  71. if (R != BidiContexts.rend())
  72. BidiContexts.resize(BidiContexts.rend() - R - 1);
  73. }
  74. // Line break or equivalent
  75. else if (CodePoint == PS)
  76. BidiContexts.clear();
  77. }
  78. return !BidiContexts.empty();
  79. }
  80. class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
  81. : public CommentHandler {
  82. public:
  83. MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check)
  84. : Check(Check) {}
  85. bool HandleComment(Preprocessor &PP, SourceRange Range) override {
  86. // FIXME: check that we are in a /* */ comment
  87. StringRef Text =
  88. Lexer::getSourceText(CharSourceRange::getCharRange(Range),
  89. PP.getSourceManager(), PP.getLangOpts());
  90. if (containsMisleadingBidi(Text, true))
  91. Check.diag(
  92. Range.getBegin(),
  93. "comment contains misleading bidirectional Unicode characters");
  94. return false;
  95. }
  96. private:
  97. MisleadingBidirectionalCheck &Check;
  98. };
  99. MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
  100. StringRef Name, ClangTidyContext *Context)
  101. : ClangTidyCheck(Name, Context),
  102. Handler(std::make_unique<MisleadingBidirectionalHandler>(*this)) {}
  103. MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
  104. void MisleadingBidirectionalCheck::registerPPCallbacks(
  105. const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
  106. PP->addCommentHandler(Handler.get());
  107. }
  108. void MisleadingBidirectionalCheck::check(
  109. const ast_matchers::MatchFinder::MatchResult &Result) {
  110. if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
  111. StringRef Literal = SL->getBytes();
  112. if (containsMisleadingBidi(Literal, false))
  113. diag(SL->getBeginLoc(), "string literal contains misleading "
  114. "bidirectional Unicode characters");
  115. }
  116. }
  117. void MisleadingBidirectionalCheck::registerMatchers(
  118. ast_matchers::MatchFinder *Finder) {
  119. Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
  120. }