ast_builder.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. #include "ast_builder.h"
  2. #include "ast_nodes.h"
  3. #include "parse_double.h"
  4. #include <yql/essentials/core/issue/protos/issue_id.pb.h>
  5. #include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
  6. #include <yql/essentials/ast/yql_ast_escaping.h>
  7. #include <util/generic/singleton.h>
  8. #include <util/system/compiler.h>
  9. #include <util/string/cast.h>
  10. #include <util/string/builder.h>
  11. #include <util/charset/utf8.h>
  12. #include <util/system/cpu_id.h>
  13. #include <cmath>
  14. using namespace NYql;
  15. using namespace NYql::NJsonPath;
  16. using namespace NJsonPathGenerated;
  17. using namespace NReWrapper;
  18. namespace {
  19. constexpr ui32 RegexpLibId = NReWrapper::TSerialization::YDB_REWRAPPER_LIB_ID;
  20. TPosition GetPos(const TToken& token) {
  21. return TPosition(token.GetColumn(), token.GetLine());
  22. }
  23. bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) {
  24. result.clear();
  25. error.clear();
  26. const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"');
  27. const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\'');
  28. if (!doubleQuoted && !singleQuoted) {
  29. error = "String must be quoted";
  30. return false;
  31. }
  32. if (singleQuoted && onlyDoubleQuoted) {
  33. error = "Only double quoted strings allowed";
  34. return false;
  35. }
  36. char quoteChar = doubleQuoted ? '"' : '\'';
  37. size_t readBytes = 0;
  38. TStringBuf atom(str);
  39. atom.Skip(1);
  40. TStringOutput sout(result);
  41. result.reserve(str.size());
  42. auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
  43. if (unescapeResult == EUnescapeResult::OK) {
  44. return true;
  45. } else {
  46. error = UnescapeResultToString(unescapeResult);
  47. return false;
  48. }
  49. }
  50. }
  51. TAstBuilder::TAstBuilder(TIssues& issues)
  52. : Issues(issues)
  53. {
  54. }
  55. void TAstBuilder::Error(TPosition pos, const TStringBuf message) {
  56. Issues.AddIssue(pos, message);
  57. Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
  58. }
  59. TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) {
  60. TAstNodePtr from = BuildExpr(node.GetRule_expr1());
  61. TAstNodePtr to = nullptr;
  62. if (node.HasBlock2()) {
  63. to = BuildExpr(node.GetBlock2().GetRule_expr2());
  64. }
  65. return {from, to};
  66. }
  67. TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) {
  68. TVector<TArrayAccessNode::TSubscript> subscripts;
  69. subscripts.reserve(1 + node.Block3Size());
  70. subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2()));
  71. for (size_t i = 0; i < node.Block3Size(); i++) {
  72. subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2()));
  73. }
  74. return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input);
  75. }
  76. TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) {
  77. return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input);
  78. }
  79. TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) {
  80. switch (node.GetAltCase()) {
  81. case TRule_identifier::kAltIdentifier1:
  82. return node.GetAlt_identifier1().GetToken1().GetValue();
  83. case TRule_identifier::kAltIdentifier2:
  84. return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue();
  85. case TRule_identifier::ALT_NOT_SET:
  86. Y_ABORT("Alternative for 'identifier' rule is not set");
  87. }
  88. }
  89. TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) {
  90. TString name;
  91. const auto& nameBlock = node.GetBlock2();
  92. switch (nameBlock.GetAltCase()) {
  93. case TRule_member_accessor_TBlock2::kAlt1:
  94. name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1());
  95. break;
  96. case TRule_member_accessor_TBlock2::kAlt2: {
  97. const auto& token = nameBlock.GetAlt2().GetToken1();
  98. TString error;
  99. if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) {
  100. Error(GetPos(token), error);
  101. return nullptr;
  102. }
  103. break;
  104. }
  105. case TRule_member_accessor_TBlock2::ALT_NOT_SET:
  106. Y_ABORT("Alternative for 'member_accessor' rule is not set");
  107. }
  108. return new TMemberAccessNode(GetPos(node.GetToken1()), name, input);
  109. }
  110. TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) {
  111. const auto& token = node.GetToken2();
  112. return new TWildcardMemberAccessNode(GetPos(token), input);
  113. }
  114. TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) {
  115. const auto predicate = BuildExpr(node.GetRule_expr3());
  116. return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input);
  117. }
  118. TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) {
  119. const auto& token = node.GetToken2();
  120. const auto pos = GetPos(token);
  121. const auto& value = token.GetValue();
  122. auto type = EMethodType::Double;
  123. if (value == "abs") {
  124. type = EMethodType::Abs;
  125. } else if (value == "floor") {
  126. type = EMethodType::Floor;
  127. } else if (value == "ceiling") {
  128. type = EMethodType::Ceiling;
  129. } else if (value == "type") {
  130. type = EMethodType::Type;
  131. } else if (value == "size") {
  132. type = EMethodType::Size;
  133. } else if (value == "keyvalue") {
  134. type = EMethodType::KeyValue;
  135. }
  136. return new TMethodCallNode(pos, type, input);
  137. }
  138. TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) {
  139. switch (node.GetAltCase()) {
  140. case TRule_accessor_op::kAltAccessorOp1:
  141. return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input);
  142. case TRule_accessor_op::kAltAccessorOp2:
  143. return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input);
  144. case TRule_accessor_op::kAltAccessorOp3:
  145. return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input);
  146. case TRule_accessor_op::kAltAccessorOp4:
  147. return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input);
  148. case TRule_accessor_op::kAltAccessorOp5:
  149. return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input);
  150. case TRule_accessor_op::kAltAccessorOp6:
  151. return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input);
  152. case TRule_accessor_op::ALT_NOT_SET:
  153. Y_ABORT("Alternative for 'accessor_op' rule is not set");
  154. }
  155. }
  156. TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) {
  157. switch (node.GetAltCase()) {
  158. case TRule_primary::kAltPrimary1: {
  159. const auto& token = node.GetAlt_primary1().GetToken1();
  160. const auto& numberString = token.GetValue();
  161. const double parsedValue = ParseDouble(numberString);
  162. if (Y_UNLIKELY(std::isnan(parsedValue))) {
  163. Y_ABORT("Invalid number was allowed by JsonPath grammar");
  164. }
  165. if (Y_UNLIKELY(std::isinf(parsedValue))) {
  166. Error(GetPos(token), "Number literal is infinity");
  167. return nullptr;
  168. }
  169. return new TNumberLiteralNode(GetPos(token), parsedValue);
  170. }
  171. case TRule_primary::kAltPrimary2: {
  172. const auto& token = node.GetAlt_primary2().GetToken1();
  173. return new TContextObjectNode(GetPos(token));
  174. }
  175. case TRule_primary::kAltPrimary3: {
  176. const auto& token = node.GetAlt_primary3().GetToken1();
  177. return new TLastArrayIndexNode(GetPos(token));
  178. }
  179. case TRule_primary::kAltPrimary4: {
  180. const auto& primary = node.GetAlt_primary4().GetBlock1();
  181. const auto input = BuildExpr(primary.GetRule_expr2());
  182. if (primary.HasBlock4()) {
  183. const auto& token = primary.GetBlock4().GetToken1();
  184. return new TIsUnknownPredicateNode(GetPos(token), input);
  185. }
  186. return input;
  187. }
  188. case TRule_primary::kAltPrimary5: {
  189. const auto& token = node.GetAlt_primary5().GetToken1();
  190. return new TVariableNode(GetPos(token), token.GetValue().substr(1));
  191. }
  192. case TRule_primary::kAltPrimary6: {
  193. const auto& token = node.GetAlt_primary6().GetToken1();
  194. return new TBooleanLiteralNode(GetPos(token), true);
  195. }
  196. case TRule_primary::kAltPrimary7: {
  197. const auto& token = node.GetAlt_primary7().GetToken1();
  198. return new TBooleanLiteralNode(GetPos(token), false);
  199. }
  200. case TRule_primary::kAltPrimary8: {
  201. const auto& token = node.GetAlt_primary8().GetToken1();
  202. return new TNullLiteralNode(GetPos(token));
  203. }
  204. case TRule_primary::kAltPrimary9: {
  205. const auto& token = node.GetAlt_primary9().GetToken1();
  206. TString value;
  207. TString error;
  208. if (!TryStringContent(token.GetValue(), value, error)) {
  209. Error(GetPos(token), error);
  210. return nullptr;
  211. }
  212. return new TStringLiteralNode(GetPos(token), value);
  213. }
  214. case TRule_primary::kAltPrimary10: {
  215. const auto& token = node.GetAlt_primary10().GetToken1();
  216. return new TFilterObjectNode(GetPos(token));
  217. }
  218. case TRule_primary::ALT_NOT_SET:
  219. Y_ABORT("Alternative for 'primary' rule is not set");
  220. }
  221. }
  222. TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) {
  223. TAstNodePtr input = BuildPrimary(node.GetRule_primary1());
  224. for (size_t i = 0; i < node.Block2Size(); i++) {
  225. input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input);
  226. }
  227. return input;
  228. }
  229. TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) {
  230. return BuildAccessorExpr(node.GetRule_accessor_expr1());
  231. }
  232. TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) {
  233. const auto& regexToken = node.GetToken2();
  234. TString regex;
  235. TString error;
  236. if (!TryStringContent(regexToken.GetValue(), regex, error)) {
  237. Error(GetPos(regexToken), error);
  238. return nullptr;
  239. }
  240. ui32 parsedFlags = 0;
  241. if (node.HasBlock3()) {
  242. TString flags;
  243. const auto& flagsToken = node.GetBlock3().GetToken2();
  244. if (!TryStringContent(flagsToken.GetValue(), flags, error)) {
  245. Error(GetPos(flagsToken), error);
  246. return nullptr;
  247. }
  248. for (char flag : flags) {
  249. switch (flag) {
  250. case 'i':
  251. parsedFlags |= FLAGS_CASELESS;
  252. break;
  253. default:
  254. Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'");
  255. break;
  256. }
  257. }
  258. }
  259. IRePtr compiledRegex;
  260. try {
  261. compiledRegex = NDispatcher::Compile(regex, parsedFlags, RegexpLibId);
  262. } catch (const NReWrapper::TCompileException& e) {
  263. Error(GetPos(regexToken), e.AsStrBuf());
  264. return nullptr;
  265. }
  266. return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex));
  267. }
  268. TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) {
  269. switch (node.GetAltCase()) {
  270. case TRule_predicate_expr::kAltPredicateExpr1: {
  271. const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1();
  272. const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1());
  273. if (!predicate.HasBlock2()) {
  274. return input;
  275. }
  276. const auto& block = predicate.GetBlock2();
  277. switch (block.GetAltCase()) {
  278. case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: {
  279. const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1();
  280. const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3());
  281. return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix);
  282. }
  283. case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: {
  284. return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input);
  285. }
  286. case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET:
  287. Y_ABORT("Alternative for inner block of 'predicate_expr' rule is not set");
  288. }
  289. Y_UNREACHABLE();
  290. }
  291. case TRule_predicate_expr::kAltPredicateExpr2: {
  292. const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1();
  293. const auto input = BuildExpr(predicate.GetRule_expr3());
  294. return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input);
  295. }
  296. case TRule_predicate_expr::ALT_NOT_SET:
  297. Y_ABORT("Alternative for 'predicate' rule is not set");
  298. }
  299. Y_UNREACHABLE();
  300. }
  301. TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) {
  302. const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2());
  303. if (!node.HasBlock1()) {
  304. return predicateExpr;
  305. }
  306. const auto& opToken = node.GetBlock1().GetToken1();
  307. const auto& opValue = opToken.GetValue();
  308. auto operation = EUnaryOperation::Plus;
  309. if (opValue == "-") {
  310. operation = EUnaryOperation::Minus;
  311. } else if (opValue == "!") {
  312. operation = EUnaryOperation::Not;
  313. }
  314. return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr);
  315. }
  316. TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) {
  317. TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1());
  318. for (size_t i = 0; i < node.Block2Size(); i++) {
  319. const auto& block = node.GetBlock2(i);
  320. const auto& opToken = block.GetToken1();
  321. const auto& opValue = opToken.GetValue();
  322. auto operation = EBinaryOperation::Multiply;
  323. if (opValue == "/") {
  324. operation = EBinaryOperation::Divide;
  325. } else if (opValue == "%") {
  326. operation = EBinaryOperation::Modulo;
  327. }
  328. const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2());
  329. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  330. }
  331. return result;
  332. }
  333. TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) {
  334. TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1());
  335. for (size_t i = 0; i < node.Block2Size(); i++) {
  336. const auto& block = node.GetBlock2(i);
  337. const auto& opToken = block.GetToken1();
  338. auto operation = EBinaryOperation::Add;
  339. if (opToken.GetValue() == "-") {
  340. operation = EBinaryOperation::Substract;
  341. }
  342. const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2());
  343. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  344. }
  345. return result;
  346. }
  347. TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) {
  348. TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1());
  349. if (node.HasBlock2()) {
  350. const auto& block = node.GetBlock2();
  351. const auto& opToken = block.GetToken1();
  352. const auto& opValue = opToken.GetValue();
  353. auto operation = EBinaryOperation::Less;
  354. if (opValue == "<=") {
  355. operation = EBinaryOperation::LessEqual;
  356. } else if (opValue == ">") {
  357. operation = EBinaryOperation::Greater;
  358. } else if (opValue == ">=") {
  359. operation = EBinaryOperation::GreaterEqual;
  360. }
  361. const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2());
  362. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  363. }
  364. return result;
  365. }
  366. TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) {
  367. TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1());
  368. if (node.HasBlock2()) {
  369. const auto& block = node.GetBlock2();
  370. const auto& opToken = block.GetToken1();
  371. const auto& opValue = opToken.GetValue();
  372. auto operation = EBinaryOperation::Equal;
  373. if (opValue == "<>" || opValue == "!=") {
  374. operation = EBinaryOperation::NotEqual;
  375. }
  376. const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2());
  377. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  378. }
  379. return result;
  380. }
  381. TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) {
  382. TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1());
  383. for (size_t i = 0; i < node.Block2Size(); i++) {
  384. const auto& block = node.GetBlock2(i);
  385. const auto& opToken = block.GetToken1();
  386. const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2());
  387. result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand);
  388. }
  389. return result;
  390. }
  391. TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) {
  392. TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1());
  393. for (size_t i = 0; i < node.Block2Size(); i++) {
  394. const auto& block = node.GetBlock2(i);
  395. const auto& opToken = block.GetToken1();
  396. const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2());
  397. result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand);
  398. }
  399. return result;
  400. }
  401. TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) {
  402. return BuildOrExpr(node.GetRule_or_expr1());
  403. }
  404. TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) {
  405. TPosition pos;
  406. auto mode = EJsonPathMode::Lax;
  407. if (node.HasBlock1()) {
  408. const auto& modeToken = node.GetBlock1().GetToken1();
  409. pos = GetPos(modeToken);
  410. if (modeToken.GetValue() == "strict") {
  411. mode = EJsonPathMode::Strict;
  412. }
  413. }
  414. const auto expr = BuildExpr(node.GetRule_expr2());
  415. return new TRootNode(pos, expr, mode);
  416. }
  417. TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) {
  418. return BuildJsonPath(ast.GetRule_jsonpath());
  419. }
  420. namespace NYql::NJsonPath {
  421. ui32 GetReLibId() {
  422. return RegexpLibId;
  423. }
  424. }