Parsing.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Tooling/Transformer/Parsing.h"
  9. #include "clang/AST/Expr.h"
  10. #include "clang/ASTMatchers/ASTMatchFinder.h"
  11. #include "clang/Basic/CharInfo.h"
  12. #include "clang/Basic/SourceLocation.h"
  13. #include "clang/Lex/Lexer.h"
  14. #include "clang/Tooling/Transformer/RangeSelector.h"
  15. #include "clang/Tooling/Transformer/SourceCode.h"
  16. #include "llvm/ADT/None.h"
  17. #include "llvm/ADT/StringMap.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/Support/Errc.h"
  20. #include "llvm/Support/Error.h"
  21. #include <string>
  22. #include <utility>
  23. #include <vector>
  24. using namespace clang;
  25. using namespace transformer;
  26. // FIXME: This implementation is entirely separate from that of the AST
  27. // matchers. Given the similarity of the languages and uses of the two parsers,
  28. // the two should share a common parsing infrastructure, as should other
  29. // Transformer types. We intend to unify this implementation soon to share as
  30. // much as possible with the AST Matchers parsing.
  31. namespace {
  32. using llvm::Expected;
  33. template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
  34. struct ParseState {
  35. // The remaining input to be processed.
  36. StringRef Input;
  37. // The original input. Not modified during parsing; only for reference in
  38. // error reporting.
  39. StringRef OriginalInput;
  40. };
  41. // Represents an intermediate result returned by a parsing function. Functions
  42. // that don't generate values should use `llvm::None`
  43. template <typename ResultType> struct ParseProgress {
  44. ParseState State;
  45. // Intermediate result generated by the Parser.
  46. ResultType Value;
  47. };
  48. template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
  49. template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
  50. class ParseError : public llvm::ErrorInfo<ParseError> {
  51. public:
  52. // Required field for all ErrorInfo derivatives.
  53. static char ID;
  54. ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
  55. : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
  56. Excerpt(std::move(InputExcerpt)) {}
  57. void log(llvm::raw_ostream &OS) const override {
  58. OS << "parse error at position (" << Pos << "): " << ErrorMsg
  59. << ": " + Excerpt;
  60. }
  61. std::error_code convertToErrorCode() const override {
  62. return llvm::inconvertibleErrorCode();
  63. }
  64. // Position of the error in the input string.
  65. size_t Pos;
  66. std::string ErrorMsg;
  67. // Excerpt of the input starting at the error position.
  68. std::string Excerpt;
  69. };
  70. char ParseError::ID;
  71. } // namespace
  72. static const llvm::StringMap<RangeSelectorOp<std::string>> &
  73. getUnaryStringSelectors() {
  74. static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
  75. {"name", name},
  76. {"node", node},
  77. {"statement", statement},
  78. {"statements", statements},
  79. {"member", member},
  80. {"callArgs", callArgs},
  81. {"elseBranch", elseBranch},
  82. {"initListElements", initListElements}};
  83. return M;
  84. }
  85. static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
  86. getUnaryRangeSelectors() {
  87. static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
  88. {"before", before}, {"after", after}, {"expansion", expansion}};
  89. return M;
  90. }
  91. static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
  92. getBinaryStringSelectors() {
  93. static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
  94. {"encloseNodes", encloseNodes}};
  95. return M;
  96. }
  97. static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
  98. getBinaryRangeSelectors() {
  99. static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
  100. M = {{"enclose", enclose}, {"between", between}};
  101. return M;
  102. }
  103. template <typename Element>
  104. llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
  105. llvm::StringRef Key) {
  106. auto it = Map.find(Key);
  107. if (it == Map.end())
  108. return llvm::None;
  109. return it->second;
  110. }
  111. template <typename ResultType>
  112. ParseProgress<ResultType> makeParseProgress(ParseState State,
  113. ResultType Result) {
  114. return ParseProgress<ResultType>{State, std::move(Result)};
  115. }
  116. static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
  117. size_t Pos = S.OriginalInput.size() - S.Input.size();
  118. return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
  119. S.OriginalInput.substr(Pos, 20).str());
  120. }
  121. // Returns a new ParseState that advances \c S by \c N characters.
  122. static ParseState advance(ParseState S, size_t N) {
  123. S.Input = S.Input.drop_front(N);
  124. return S;
  125. }
  126. static StringRef consumeWhitespace(StringRef S) {
  127. return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
  128. }
  129. // Parses a single expected character \c c from \c State, skipping preceding
  130. // whitespace. Error if the expected character isn't found.
  131. static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
  132. State.Input = consumeWhitespace(State.Input);
  133. if (State.Input.empty() || State.Input.front() != c)
  134. return makeParseError(State,
  135. ("expected char not found: " + llvm::Twine(c)).str());
  136. return makeParseProgress(advance(State, 1), llvm::None);
  137. }
  138. // Parses an identitifer "token" -- handles preceding whitespace.
  139. static ExpectedProgress<std::string> parseId(ParseState State) {
  140. State.Input = consumeWhitespace(State.Input);
  141. auto Id = State.Input.take_while(
  142. [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
  143. if (Id.empty())
  144. return makeParseError(State, "failed to parse name");
  145. return makeParseProgress(advance(State, Id.size()), Id.str());
  146. }
  147. // For consistency with the AST matcher parser and C++ code, node ids are
  148. // written as strings. However, we do not support escaping in the string.
  149. static ExpectedProgress<std::string> parseStringId(ParseState State) {
  150. State.Input = consumeWhitespace(State.Input);
  151. if (State.Input.empty())
  152. return makeParseError(State, "unexpected end of input");
  153. if (!State.Input.consume_front("\""))
  154. return makeParseError(
  155. State,
  156. "expecting string, but encountered other character or end of input");
  157. StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
  158. if (State.Input.size() == Id.size())
  159. return makeParseError(State, "unterminated string");
  160. // Advance past the trailing quote as well.
  161. return makeParseProgress(advance(State, Id.size() + 1), Id.str());
  162. }
  163. // Parses a single element surrounded by parens. `Op` is applied to the parsed
  164. // result to create the result of this function call.
  165. template <typename T>
  166. ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
  167. RangeSelectorOp<T> Op,
  168. ParseState State) {
  169. auto P = parseChar('(', State);
  170. if (!P)
  171. return P.takeError();
  172. auto E = ParseElement(P->State);
  173. if (!E)
  174. return E.takeError();
  175. P = parseChar(')', E->State);
  176. if (!P)
  177. return P.takeError();
  178. return makeParseProgress(P->State, Op(std::move(E->Value)));
  179. }
  180. // Parses a pair of elements surrounded by parens and separated by comma. `Op`
  181. // is applied to the parsed results to create the result of this function call.
  182. template <typename T>
  183. ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
  184. RangeSelectorOp<T, T> Op,
  185. ParseState State) {
  186. auto P = parseChar('(', State);
  187. if (!P)
  188. return P.takeError();
  189. auto Left = ParseElement(P->State);
  190. if (!Left)
  191. return Left.takeError();
  192. P = parseChar(',', Left->State);
  193. if (!P)
  194. return P.takeError();
  195. auto Right = ParseElement(P->State);
  196. if (!Right)
  197. return Right.takeError();
  198. P = parseChar(')', Right->State);
  199. if (!P)
  200. return P.takeError();
  201. return makeParseProgress(P->State,
  202. Op(std::move(Left->Value), std::move(Right->Value)));
  203. }
  204. // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
  205. // Id operator). Returns StencilType representing the operator on success and
  206. // error if it fails to parse input for an operator.
  207. static ExpectedProgress<RangeSelector>
  208. parseRangeSelectorImpl(ParseState State) {
  209. auto Id = parseId(State);
  210. if (!Id)
  211. return Id.takeError();
  212. std::string OpName = std::move(Id->Value);
  213. if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
  214. return parseSingle(parseStringId, *Op, Id->State);
  215. if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
  216. return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
  217. if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
  218. return parsePair(parseStringId, *Op, Id->State);
  219. if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
  220. return parsePair(parseRangeSelectorImpl, *Op, Id->State);
  221. return makeParseError(State, "unknown selector name: " + OpName);
  222. }
  223. Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
  224. ParseState State = {Input, Input};
  225. ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
  226. if (!Result)
  227. return Result.takeError();
  228. State = Result->State;
  229. // Discard any potentially trailing whitespace.
  230. State.Input = consumeWhitespace(State.Input);
  231. if (State.Input.empty())
  232. return Result->Value;
  233. return makeParseError(State, "unexpected input after selector");
  234. }