BranchCloneCheck.cpp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "BranchCloneCheck.h"
  9. #include "clang/AST/ASTContext.h"
  10. #include "clang/ASTMatchers/ASTMatchFinder.h"
  11. #include "clang/Analysis/CloneDetection.h"
  12. #include "clang/Lex/Lexer.h"
  13. #include "llvm/Support/Casting.h"
  14. using namespace clang;
  15. using namespace clang::ast_matchers;
  16. /// Returns true when the statements are Type I clones of each other.
  17. static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
  18. const ASTContext &Context) {
  19. if (isa<Expr>(LHS) && isa<Expr>(RHS)) {
  20. // If we have errors in expressions, we will be unable
  21. // to accurately profile and compute hashes for each
  22. // of the left and right statements.
  23. const auto *LHSExpr = llvm::cast<Expr>(LHS);
  24. const auto *RHSExpr = llvm::cast<Expr>(RHS);
  25. if (LHSExpr->containsErrors() && RHSExpr->containsErrors()) {
  26. return false;
  27. }
  28. }
  29. llvm::FoldingSetNodeID DataLHS, DataRHS;
  30. LHS->Profile(DataLHS, Context, false);
  31. RHS->Profile(DataRHS, Context, false);
  32. return (DataLHS == DataRHS);
  33. }
  34. namespace {
  35. /// A branch in a switch may consist of several statements; while a branch in
  36. /// an if/else if/else chain is one statement (which may be a CompoundStmt).
  37. using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
  38. } // anonymous namespace
  39. /// Determines if the bodies of two branches in a switch statements are Type I
  40. /// clones of each other. This function only examines the body of the branch
  41. /// and ignores the `case X:` or `default:` at the start of the branch.
  42. static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
  43. const SwitchBranch RHS,
  44. const ASTContext &Context) {
  45. if (LHS.size() != RHS.size())
  46. return false;
  47. for (size_t I = 0, Size = LHS.size(); I < Size; I++) {
  48. // NOTE: We strip goto labels and annotations in addition to stripping
  49. // the `case X:` or `default:` labels, but it is very unlikely that this
  50. // would cause false positives in real-world code.
  51. if (!areStatementsIdentical(LHS[I]->stripLabelLikeStatements(),
  52. RHS[I]->stripLabelLikeStatements(), Context)) {
  53. return false;
  54. }
  55. }
  56. return true;
  57. }
  58. namespace clang::tidy::bugprone {
  59. void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
  60. Finder->addMatcher(
  61. ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
  62. stmt().bind("if"),
  63. hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
  64. hasElse(stmt().bind("else"))),
  65. this);
  66. Finder->addMatcher(switchStmt().bind("switch"), this);
  67. Finder->addMatcher(conditionalOperator().bind("condOp"), this);
  68. }
  69. void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
  70. const ASTContext &Context = *Result.Context;
  71. if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
  72. const Stmt *Then = IS->getThen();
  73. assert(Then && "An IfStmt must have a `then` branch!");
  74. const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
  75. assert(Else && "We only look for `if` statements with an `else` branch!");
  76. if (!isa<IfStmt>(Else)) {
  77. // Just a simple if with no `else if` branch.
  78. if (areStatementsIdentical(Then->IgnoreContainers(),
  79. Else->IgnoreContainers(), Context)) {
  80. diag(IS->getBeginLoc(), "if with identical then and else branches");
  81. diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
  82. }
  83. return;
  84. }
  85. // This is the complicated case when we start an if/else if/else chain.
  86. // To find all the duplicates, we collect all the branches into a vector.
  87. llvm::SmallVector<const Stmt *, 4> Branches;
  88. const IfStmt *Cur = IS;
  89. while (true) {
  90. // Store the `then` branch.
  91. Branches.push_back(Cur->getThen());
  92. Else = Cur->getElse();
  93. // The chain ends if there is no `else` branch.
  94. if (!Else)
  95. break;
  96. // Check if there is another `else if`...
  97. Cur = dyn_cast<IfStmt>(Else);
  98. if (!Cur) {
  99. // ...this is just a plain `else` branch at the end of the chain.
  100. Branches.push_back(Else);
  101. break;
  102. }
  103. }
  104. size_t N = Branches.size();
  105. llvm::BitVector KnownAsClone(N);
  106. for (size_t I = 0; I + 1 < N; I++) {
  107. // We have already seen Branches[i] as a clone of an earlier branch.
  108. if (KnownAsClone[I])
  109. continue;
  110. int NumCopies = 1;
  111. for (size_t J = I + 1; J < N; J++) {
  112. if (KnownAsClone[J] ||
  113. !areStatementsIdentical(Branches[I]->IgnoreContainers(),
  114. Branches[J]->IgnoreContainers(), Context))
  115. continue;
  116. NumCopies++;
  117. KnownAsClone[J] = true;
  118. if (NumCopies == 2) {
  119. // We report the first occurrence only when we find the second one.
  120. diag(Branches[I]->getBeginLoc(),
  121. "repeated branch in conditional chain");
  122. SourceLocation End =
  123. Lexer::getLocForEndOfToken(Branches[I]->getEndLoc(), 0,
  124. *Result.SourceManager, getLangOpts());
  125. if (End.isValid()) {
  126. diag(End, "end of the original", DiagnosticIDs::Note);
  127. }
  128. }
  129. diag(Branches[J]->getBeginLoc(), "clone %0 starts here",
  130. DiagnosticIDs::Note)
  131. << (NumCopies - 1);
  132. }
  133. }
  134. return;
  135. }
  136. if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
  137. // We do not try to detect chains of ?: operators.
  138. if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
  139. diag(CO->getQuestionLoc(),
  140. "conditional operator with identical true and false expressions");
  141. return;
  142. }
  143. if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
  144. const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
  145. // Code like
  146. // switch (x) case 0: case 1: foobar();
  147. // is legal and calls foobar() if and only if x is either 0 or 1;
  148. // but we do not try to distinguish branches in such code.
  149. if (!Body)
  150. return;
  151. // We will first collect the branches of the switch statements. For the
  152. // sake of simplicity we say that branches are delimited by the SwitchCase
  153. // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
  154. // `default:` labels embedded inside other statements and we do not follow
  155. // the effects of `break` and other manipulation of the control-flow.
  156. llvm::SmallVector<SwitchBranch, 4> Branches;
  157. for (const Stmt *S : Body->body()) {
  158. // If this is a `case` or `default`, we start a new, empty branch.
  159. if (isa<SwitchCase>(S))
  160. Branches.emplace_back();
  161. // There may be code before the first branch (which can be dead code
  162. // and can be code reached either through goto or through case labels
  163. // that are embedded inside e.g. inner compound statements); we do not
  164. // store those statements in branches.
  165. if (!Branches.empty())
  166. Branches.back().push_back(S);
  167. }
  168. auto *End = Branches.end();
  169. auto *BeginCurrent = Branches.begin();
  170. while (BeginCurrent < End) {
  171. auto *EndCurrent = BeginCurrent + 1;
  172. while (EndCurrent < End &&
  173. areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
  174. ++EndCurrent;
  175. }
  176. // At this point the iterator range {BeginCurrent, EndCurrent} contains a
  177. // complete family of consecutive identical branches.
  178. if (EndCurrent > BeginCurrent + 1) {
  179. diag(BeginCurrent->front()->getBeginLoc(),
  180. "switch has %0 consecutive identical branches")
  181. << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
  182. SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
  183. // If the case statement is generated from a macro, it's SourceLocation
  184. // may be invalid, resulting in an assertion failure down the line.
  185. // While not optimal, try the begin location in this case, it's still
  186. // better then nothing.
  187. if (EndLoc.isInvalid())
  188. EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
  189. if (EndLoc.isMacroID())
  190. EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
  191. EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
  192. getLangOpts());
  193. if (EndLoc.isValid()) {
  194. diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
  195. }
  196. }
  197. BeginCurrent = EndCurrent;
  198. }
  199. return;
  200. }
  201. llvm_unreachable("No if statement and no switch statement.");
  202. }
  203. } // namespace clang::tidy::bugprone