RangeSelector.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Tooling/Transformer/RangeSelector.h"
  9. #include "clang/AST/Expr.h"
  10. #include "clang/AST/TypeLoc.h"
  11. #include "clang/ASTMatchers/ASTMatchFinder.h"
  12. #include "clang/Basic/SourceLocation.h"
  13. #include "clang/Lex/Lexer.h"
  14. #include "clang/Tooling/Transformer/SourceCode.h"
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/Support/Errc.h"
  17. #include "llvm/Support/Error.h"
  18. #include <string>
  19. #include <utility>
  20. #include <vector>
  21. using namespace clang;
  22. using namespace transformer;
  23. using ast_matchers::MatchFinder;
  24. using llvm::Error;
  25. using llvm::StringError;
  26. using MatchResult = MatchFinder::MatchResult;
  27. static Error invalidArgumentError(Twine Message) {
  28. return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
  29. }
  30. static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
  31. return invalidArgumentError("mismatched type (node id=" + ID +
  32. " kind=" + Kind.asStringRef() + ")");
  33. }
  34. static Error typeError(StringRef ID, const ASTNodeKind &Kind,
  35. Twine ExpectedType) {
  36. return invalidArgumentError("mismatched type: expected one of " +
  37. ExpectedType + " (node id=" + ID +
  38. " kind=" + Kind.asStringRef() + ")");
  39. }
  40. static Error missingPropertyError(StringRef ID, Twine Description,
  41. StringRef Property) {
  42. return invalidArgumentError(Description + " requires property '" + Property +
  43. "' (node id=" + ID + ")");
  44. }
  45. static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
  46. StringRef ID) {
  47. auto &NodesMap = Nodes.getMap();
  48. auto It = NodesMap.find(ID);
  49. if (It == NodesMap.end())
  50. return invalidArgumentError("ID not bound: " + ID);
  51. return It->second;
  52. }
  53. // FIXME: handling of macros should be configurable.
  54. static SourceLocation findPreviousTokenStart(SourceLocation Start,
  55. const SourceManager &SM,
  56. const LangOptions &LangOpts) {
  57. if (Start.isInvalid() || Start.isMacroID())
  58. return SourceLocation();
  59. SourceLocation BeforeStart = Start.getLocWithOffset(-1);
  60. if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
  61. return SourceLocation();
  62. return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
  63. }
  64. // Finds the start location of the previous token of kind \p TK.
  65. // FIXME: handling of macros should be configurable.
  66. static SourceLocation findPreviousTokenKind(SourceLocation Start,
  67. const SourceManager &SM,
  68. const LangOptions &LangOpts,
  69. tok::TokenKind TK) {
  70. while (true) {
  71. SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
  72. if (L.isInvalid() || L.isMacroID())
  73. return SourceLocation();
  74. Token T;
  75. if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
  76. return SourceLocation();
  77. if (T.is(TK))
  78. return T.getLocation();
  79. Start = L;
  80. }
  81. }
  82. static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
  83. const LangOptions &LangOpts) {
  84. SourceLocation EndLoc =
  85. E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
  86. return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
  87. }
  88. RangeSelector transformer::before(RangeSelector Selector) {
  89. return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
  90. Expected<CharSourceRange> SelectedRange = Selector(Result);
  91. if (!SelectedRange)
  92. return SelectedRange.takeError();
  93. return CharSourceRange::getCharRange(SelectedRange->getBegin());
  94. };
  95. }
  96. RangeSelector transformer::after(RangeSelector Selector) {
  97. return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
  98. Expected<CharSourceRange> SelectedRange = Selector(Result);
  99. if (!SelectedRange)
  100. return SelectedRange.takeError();
  101. SourceLocation End = SelectedRange->getEnd();
  102. if (SelectedRange->isTokenRange()) {
  103. // We need to find the actual (exclusive) end location from which to
  104. // create a new source range. However, that's not guaranteed to be valid,
  105. // even if the token location itself is valid. So, we create a token range
  106. // consisting only of the last token, then map that range back to the
  107. // source file. If that succeeds, we have a valid location for the end of
  108. // the generated range.
  109. CharSourceRange Range = Lexer::makeFileCharRange(
  110. CharSourceRange::getTokenRange(SelectedRange->getEnd()),
  111. *Result.SourceManager, Result.Context->getLangOpts());
  112. if (Range.isInvalid())
  113. return invalidArgumentError(
  114. "after: can't resolve sub-range to valid source range");
  115. End = Range.getEnd();
  116. }
  117. return CharSourceRange::getCharRange(End);
  118. };
  119. }
  120. RangeSelector transformer::node(std::string ID) {
  121. return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
  122. Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
  123. if (!Node)
  124. return Node.takeError();
  125. return (Node->get<Decl>() != nullptr ||
  126. (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
  127. ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
  128. *Result.Context)
  129. : CharSourceRange::getTokenRange(Node->getSourceRange());
  130. };
  131. }
  132. RangeSelector transformer::statement(std::string ID) {
  133. return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
  134. Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
  135. if (!Node)
  136. return Node.takeError();
  137. return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
  138. *Result.Context);
  139. };
  140. }
  141. RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
  142. return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
  143. Expected<CharSourceRange> BeginRange = Begin(Result);
  144. if (!BeginRange)
  145. return BeginRange.takeError();
  146. Expected<CharSourceRange> EndRange = End(Result);
  147. if (!EndRange)
  148. return EndRange.takeError();
  149. SourceLocation B = BeginRange->getBegin();
  150. SourceLocation E = EndRange->getEnd();
  151. // Note: we are precluding the possibility of sub-token ranges in the case
  152. // that EndRange is a token range.
  153. if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
  154. return invalidArgumentError("Bad range: out of order");
  155. }
  156. return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
  157. };
  158. }
  159. RangeSelector transformer::encloseNodes(std::string BeginID,
  160. std::string EndID) {
  161. return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
  162. }
  163. RangeSelector transformer::member(std::string ID) {
  164. return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
  165. Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
  166. if (!Node)
  167. return Node.takeError();
  168. if (auto *M = Node->get<clang::MemberExpr>())
  169. return CharSourceRange::getTokenRange(
  170. M->getMemberNameInfo().getSourceRange());
  171. return typeError(ID, Node->getNodeKind(), "MemberExpr");
  172. };
  173. }
  174. RangeSelector transformer::name(std::string ID) {
  175. return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
  176. Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
  177. if (!N)
  178. return N.takeError();
  179. auto &Node = *N;
  180. if (const auto *D = Node.get<NamedDecl>()) {
  181. if (!D->getDeclName().isIdentifier())
  182. return missingPropertyError(ID, "name", "identifier");
  183. SourceLocation L = D->getLocation();
  184. auto R = CharSourceRange::getTokenRange(L, L);
  185. // Verify that the range covers exactly the name.
  186. // FIXME: extend this code to support cases like `operator +` or
  187. // `foo<int>` for which this range will be too short. Doing so will
  188. // require subcasing `NamedDecl`, because it doesn't provide virtual
  189. // access to the \c DeclarationNameInfo.
  190. if (tooling::getText(R, *Result.Context) != D->getName())
  191. return CharSourceRange();
  192. return R;
  193. }
  194. if (const auto *E = Node.get<DeclRefExpr>()) {
  195. if (!E->getNameInfo().getName().isIdentifier())
  196. return missingPropertyError(ID, "name", "identifier");
  197. SourceLocation L = E->getLocation();
  198. return CharSourceRange::getTokenRange(L, L);
  199. }
  200. if (const auto *I = Node.get<CXXCtorInitializer>()) {
  201. if (!I->isMemberInitializer() && I->isWritten())
  202. return missingPropertyError(ID, "name", "explicit member initializer");
  203. SourceLocation L = I->getMemberLocation();
  204. return CharSourceRange::getTokenRange(L, L);
  205. }
  206. if (const auto *T = Node.get<TypeLoc>()) {
  207. TypeLoc Loc = *T;
  208. auto ET = Loc.getAs<ElaboratedTypeLoc>();
  209. if (!ET.isNull()) {
  210. Loc = ET.getNamedTypeLoc();
  211. }
  212. return CharSourceRange::getTokenRange(Loc.getSourceRange());
  213. }
  214. return typeError(ID, Node.getNodeKind(),
  215. "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
  216. };
  217. }
  218. namespace {
  219. // FIXME: make this available in the public API for users to easily create their
  220. // own selectors.
  221. // Creates a selector from a range-selection function \p Func, which selects a
  222. // range that is relative to a bound node id. \c T is the node type expected by
  223. // \p Func.
  224. template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
  225. class RelativeSelector {
  226. std::string ID;
  227. public:
  228. RelativeSelector(std::string ID) : ID(std::move(ID)) {}
  229. Expected<CharSourceRange> operator()(const MatchResult &Result) {
  230. Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
  231. if (!N)
  232. return N.takeError();
  233. if (const auto *Arg = N->get<T>())
  234. return Func(Result, *Arg);
  235. return typeError(ID, N->getNodeKind());
  236. }
  237. };
  238. } // namespace
  239. // FIXME: Change the following functions from being in an anonymous namespace
  240. // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
  241. // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
  242. // namespace works around a bug in earlier versions.
  243. namespace {
  244. // Returns the range of the statements (all source between the braces).
  245. CharSourceRange getStatementsRange(const MatchResult &,
  246. const CompoundStmt &CS) {
  247. return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
  248. CS.getRBracLoc());
  249. }
  250. } // namespace
  251. RangeSelector transformer::statements(std::string ID) {
  252. return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
  253. }
  254. namespace {
  255. // Returns the range of the source between the call's parentheses.
  256. CharSourceRange getCallArgumentsRange(const MatchResult &Result,
  257. const CallExpr &CE) {
  258. return CharSourceRange::getCharRange(
  259. findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
  260. .getLocWithOffset(1),
  261. CE.getRParenLoc());
  262. }
  263. } // namespace
  264. RangeSelector transformer::callArgs(std::string ID) {
  265. return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
  266. }
  267. namespace {
  268. // Returns the range of the elements of the initializer list. Includes all
  269. // source between the braces.
  270. CharSourceRange getElementsRange(const MatchResult &,
  271. const InitListExpr &E) {
  272. return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
  273. E.getRBraceLoc());
  274. }
  275. } // namespace
  276. RangeSelector transformer::initListElements(std::string ID) {
  277. return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
  278. }
  279. namespace {
  280. // Returns the range of the else branch, including the `else` keyword.
  281. CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
  282. return tooling::maybeExtendRange(
  283. CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
  284. tok::TokenKind::semi, *Result.Context);
  285. }
  286. } // namespace
  287. RangeSelector transformer::elseBranch(std::string ID) {
  288. return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
  289. }
  290. RangeSelector transformer::expansion(RangeSelector S) {
  291. return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
  292. Expected<CharSourceRange> SRange = S(Result);
  293. if (!SRange)
  294. return SRange.takeError();
  295. return Result.SourceManager->getExpansionRange(*SRange);
  296. };
  297. }