ast_builder.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. #include "ast_builder.h"
  2. #include "ast_nodes.h"
  3. #include "parse_double.h"
  4. #include <yql/essentials/core/issue/protos/issue_id.pb.h>
  5. #include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
  6. #include <yql/essentials/ast/yql_ast_escaping.h>
  7. #include <util/generic/singleton.h>
  8. #include <util/system/compiler.h>
  9. #include <util/string/cast.h>
  10. #include <util/string/builder.h>
  11. #include <util/charset/utf8.h>
  12. #include <util/system/cpu_id.h>
  13. #include <cmath>
  14. using namespace NYql;
  15. using namespace NYql::NJsonPath;
  16. using namespace NJsonPathGenerated;
  17. using namespace NReWrapper;
  18. namespace {
  19. constexpr ui32 RegexpLibId = NReWrapper::TSerialization::YDB_REWRAPPER_LIB_ID;
  20. TPosition GetPos(const TToken& token) {
  21. return TPosition(token.GetColumn(), token.GetLine());
  22. }
  23. bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) {
  24. result.clear();
  25. error.clear();
  26. const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"');
  27. const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\'');
  28. if (!doubleQuoted && !singleQuoted) {
  29. error = "String must be quoted";
  30. return false;
  31. }
  32. if (singleQuoted && onlyDoubleQuoted) {
  33. error = "Only double quoted strings allowed";
  34. return false;
  35. }
  36. char quoteChar = doubleQuoted ? '"' : '\'';
  37. size_t readBytes = 0;
  38. TStringBuf atom(str);
  39. atom.Skip(1);
  40. TStringOutput sout(result);
  41. result.reserve(str.size());
  42. auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
  43. if (unescapeResult == EUnescapeResult::OK) {
  44. return true;
  45. } else {
  46. error = UnescapeResultToString(unescapeResult);
  47. return false;
  48. }
  49. }
  50. }
  51. TAstBuilder::TAstBuilder(TIssues& issues)
  52. : Issues(issues)
  53. {
  54. }
  55. void TAstBuilder::Error(TPosition pos, const TStringBuf message) {
  56. Issues.AddIssue(pos, message);
  57. Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
  58. }
  59. TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) {
  60. TAstNodePtr from = BuildExpr(node.GetRule_expr1());
  61. TAstNodePtr to = nullptr;
  62. if (node.HasBlock2()) {
  63. to = BuildExpr(node.GetBlock2().GetRule_expr2());
  64. }
  65. return {from, to};
  66. }
  67. TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) {
  68. TVector<TArrayAccessNode::TSubscript> subscripts;
  69. subscripts.reserve(1 + node.Block3Size());
  70. subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2()));
  71. for (size_t i = 0; i < node.Block3Size(); i++) {
  72. subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2()));
  73. }
  74. return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input);
  75. }
  76. TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) {
  77. return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input);
  78. }
  79. TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) {
  80. switch (node.GetAltCase()) {
  81. case TRule_identifier::kAltIdentifier1:
  82. return node.GetAlt_identifier1().GetToken1().GetValue();
  83. case TRule_identifier::kAltIdentifier2:
  84. return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue();
  85. case TRule_identifier::ALT_NOT_SET:
  86. Y_ABORT("Alternative for 'identifier' rule is not set");
  87. }
  88. }
  89. TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) {
  90. TString name;
  91. const auto& nameBlock = node.GetBlock2();
  92. switch (nameBlock.GetAltCase()) {
  93. case TRule_member_accessor_TBlock2::kAlt1:
  94. name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1());
  95. break;
  96. case TRule_member_accessor_TBlock2::kAlt2: {
  97. const auto& token = nameBlock.GetAlt2().GetToken1();
  98. TString error;
  99. if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) {
  100. Error(GetPos(token), error);
  101. return nullptr;
  102. }
  103. break;
  104. }
  105. case TRule_member_accessor_TBlock2::ALT_NOT_SET:
  106. Y_ABORT("Alternative for 'member_accessor' rule is not set");
  107. }
  108. return new TMemberAccessNode(GetPos(node.GetToken1()), name, input);
  109. }
  110. TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) {
  111. const auto& token = node.GetToken2();
  112. return new TWildcardMemberAccessNode(GetPos(token), input);
  113. }
  114. TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) {
  115. const auto predicate = BuildExpr(node.GetRule_expr3());
  116. return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input);
  117. }
  118. TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) {
  119. const auto& token = node.GetToken2();
  120. const auto pos = GetPos(token);
  121. const auto& value = token.GetValue();
  122. auto type = EMethodType::Double;
  123. if (value == "abs") {
  124. type = EMethodType::Abs;
  125. } else if (value == "floor") {
  126. type = EMethodType::Floor;
  127. } else if (value == "ceiling") {
  128. type = EMethodType::Ceiling;
  129. } else if (value == "type") {
  130. type = EMethodType::Type;
  131. } else if (value == "size") {
  132. type = EMethodType::Size;
  133. } else if (value == "keyvalue") {
  134. type = EMethodType::KeyValue;
  135. }
  136. return new TMethodCallNode(pos, type, input);
  137. }
  138. TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) {
  139. switch (node.GetAltCase()) {
  140. case TRule_accessor_op::kAltAccessorOp1:
  141. return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input);
  142. case TRule_accessor_op::kAltAccessorOp2:
  143. return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input);
  144. case TRule_accessor_op::kAltAccessorOp3:
  145. return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input);
  146. case TRule_accessor_op::kAltAccessorOp4:
  147. return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input);
  148. case TRule_accessor_op::kAltAccessorOp5:
  149. return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input);
  150. case TRule_accessor_op::kAltAccessorOp6:
  151. return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input);
  152. case TRule_accessor_op::ALT_NOT_SET:
  153. Y_ABORT("Alternative for 'accessor_op' rule is not set");
  154. }
  155. }
  156. TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) {
  157. switch (node.GetAltCase()) {
  158. case TRule_primary::kAltPrimary1: {
  159. const auto& token = node.GetAlt_primary1().GetToken1();
  160. const auto& numberString = token.GetValue();
  161. const double parsedValue = ParseDouble(numberString);
  162. if (Y_UNLIKELY(std::isnan(parsedValue))) {
  163. Y_ABORT("Invalid number was allowed by JsonPath grammar");
  164. }
  165. if (Y_UNLIKELY(std::isinf(parsedValue))) {
  166. Error(GetPos(token), "Number literal is infinity");
  167. return nullptr;
  168. }
  169. return new TNumberLiteralNode(GetPos(token), parsedValue);
  170. }
  171. case TRule_primary::kAltPrimary2: {
  172. const auto& token = node.GetAlt_primary2().GetToken1();
  173. return new TContextObjectNode(GetPos(token));
  174. }
  175. case TRule_primary::kAltPrimary3: {
  176. const auto& token = node.GetAlt_primary3().GetToken1();
  177. return new TLastArrayIndexNode(GetPos(token));
  178. }
  179. case TRule_primary::kAltPrimary4: {
  180. const auto& primary = node.GetAlt_primary4().GetBlock1();
  181. const auto input = BuildExpr(primary.GetRule_expr2());
  182. if (primary.HasBlock4()) {
  183. const auto& token = primary.GetBlock4().GetToken1();
  184. return new TIsUnknownPredicateNode(GetPos(token), input);
  185. }
  186. return input;
  187. }
  188. case TRule_primary::kAltPrimary5: {
  189. const auto& token = node.GetAlt_primary5().GetToken1();
  190. return new TVariableNode(GetPos(token), token.GetValue().substr(1));
  191. }
  192. case TRule_primary::kAltPrimary6: {
  193. const auto& token = node.GetAlt_primary6().GetToken1();
  194. return new TBooleanLiteralNode(GetPos(token), true);
  195. }
  196. case TRule_primary::kAltPrimary7: {
  197. const auto& token = node.GetAlt_primary7().GetToken1();
  198. return new TBooleanLiteralNode(GetPos(token), false);
  199. }
  200. case TRule_primary::kAltPrimary8: {
  201. const auto& token = node.GetAlt_primary8().GetToken1();
  202. return new TNullLiteralNode(GetPos(token));
  203. }
  204. case TRule_primary::kAltPrimary9: {
  205. const auto& token = node.GetAlt_primary9().GetToken1();
  206. TString value;
  207. TString error;
  208. if (!TryStringContent(token.GetValue(), value, error)) {
  209. Error(GetPos(token), error);
  210. return nullptr;
  211. }
  212. return new TStringLiteralNode(GetPos(token), value);
  213. }
  214. case TRule_primary::kAltPrimary10: {
  215. const auto& token = node.GetAlt_primary10().GetToken1();
  216. return new TFilterObjectNode(GetPos(token));
  217. }
  218. case TRule_primary::ALT_NOT_SET:
  219. Y_ABORT("Alternative for 'primary' rule is not set");
  220. }
  221. }
  222. TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) {
  223. TAstNodePtr input = BuildPrimary(node.GetRule_primary1());
  224. for (size_t i = 0; i < node.Block2Size(); i++) {
  225. input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input);
  226. }
  227. return input;
  228. }
  229. TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) {
  230. return BuildAccessorExpr(node.GetRule_accessor_expr1());
  231. }
  232. TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) {
  233. const auto& regexToken = node.GetToken2();
  234. TString regex;
  235. TString error;
  236. if (!TryStringContent(regexToken.GetValue(), regex, error)) {
  237. Error(GetPos(regexToken), error);
  238. return nullptr;
  239. }
  240. ui32 parsedFlags = 0;
  241. if (node.HasBlock3()) {
  242. TString flags;
  243. const auto& flagsToken = node.GetBlock3().GetToken2();
  244. if (!TryStringContent(flagsToken.GetValue(), flags, error)) {
  245. Error(GetPos(flagsToken), error);
  246. return nullptr;
  247. }
  248. for (char flag : flags) {
  249. switch (flag) {
  250. case 'i':
  251. parsedFlags |= FLAGS_CASELESS;
  252. break;
  253. default:
  254. Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'");
  255. break;
  256. }
  257. }
  258. }
  259. IRePtr compiledRegex;
  260. try {
  261. compiledRegex = NDispatcher::Compile(regex, parsedFlags,
  262. NDispatcher::Has(RegexpLibId) ? RegexpLibId : TSerialization::kRe2);
  263. } catch (const NReWrapper::TCompileException& e) {
  264. Error(GetPos(regexToken), e.AsStrBuf());
  265. return nullptr;
  266. }
  267. return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex));
  268. }
  269. TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) {
  270. switch (node.GetAltCase()) {
  271. case TRule_predicate_expr::kAltPredicateExpr1: {
  272. const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1();
  273. const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1());
  274. if (!predicate.HasBlock2()) {
  275. return input;
  276. }
  277. const auto& block = predicate.GetBlock2();
  278. switch (block.GetAltCase()) {
  279. case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: {
  280. const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1();
  281. const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3());
  282. return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix);
  283. }
  284. case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: {
  285. return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input);
  286. }
  287. case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET:
  288. Y_ABORT("Alternative for inner block of 'predicate_expr' rule is not set");
  289. }
  290. Y_UNREACHABLE();
  291. }
  292. case TRule_predicate_expr::kAltPredicateExpr2: {
  293. const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1();
  294. const auto input = BuildExpr(predicate.GetRule_expr3());
  295. return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input);
  296. }
  297. case TRule_predicate_expr::ALT_NOT_SET:
  298. Y_ABORT("Alternative for 'predicate' rule is not set");
  299. }
  300. Y_UNREACHABLE();
  301. }
  302. TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) {
  303. const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2());
  304. if (!node.HasBlock1()) {
  305. return predicateExpr;
  306. }
  307. const auto& opToken = node.GetBlock1().GetToken1();
  308. const auto& opValue = opToken.GetValue();
  309. auto operation = EUnaryOperation::Plus;
  310. if (opValue == "-") {
  311. operation = EUnaryOperation::Minus;
  312. } else if (opValue == "!") {
  313. operation = EUnaryOperation::Not;
  314. }
  315. return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr);
  316. }
  317. TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) {
  318. TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1());
  319. for (size_t i = 0; i < node.Block2Size(); i++) {
  320. const auto& block = node.GetBlock2(i);
  321. const auto& opToken = block.GetToken1();
  322. const auto& opValue = opToken.GetValue();
  323. auto operation = EBinaryOperation::Multiply;
  324. if (opValue == "/") {
  325. operation = EBinaryOperation::Divide;
  326. } else if (opValue == "%") {
  327. operation = EBinaryOperation::Modulo;
  328. }
  329. const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2());
  330. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  331. }
  332. return result;
  333. }
  334. TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) {
  335. TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1());
  336. for (size_t i = 0; i < node.Block2Size(); i++) {
  337. const auto& block = node.GetBlock2(i);
  338. const auto& opToken = block.GetToken1();
  339. auto operation = EBinaryOperation::Add;
  340. if (opToken.GetValue() == "-") {
  341. operation = EBinaryOperation::Substract;
  342. }
  343. const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2());
  344. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  345. }
  346. return result;
  347. }
  348. TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) {
  349. TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1());
  350. if (node.HasBlock2()) {
  351. const auto& block = node.GetBlock2();
  352. const auto& opToken = block.GetToken1();
  353. const auto& opValue = opToken.GetValue();
  354. auto operation = EBinaryOperation::Less;
  355. if (opValue == "<=") {
  356. operation = EBinaryOperation::LessEqual;
  357. } else if (opValue == ">") {
  358. operation = EBinaryOperation::Greater;
  359. } else if (opValue == ">=") {
  360. operation = EBinaryOperation::GreaterEqual;
  361. }
  362. const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2());
  363. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  364. }
  365. return result;
  366. }
  367. TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) {
  368. TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1());
  369. if (node.HasBlock2()) {
  370. const auto& block = node.GetBlock2();
  371. const auto& opToken = block.GetToken1();
  372. const auto& opValue = opToken.GetValue();
  373. auto operation = EBinaryOperation::Equal;
  374. if (opValue == "<>" || opValue == "!=") {
  375. operation = EBinaryOperation::NotEqual;
  376. }
  377. const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2());
  378. result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
  379. }
  380. return result;
  381. }
  382. TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) {
  383. TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1());
  384. for (size_t i = 0; i < node.Block2Size(); i++) {
  385. const auto& block = node.GetBlock2(i);
  386. const auto& opToken = block.GetToken1();
  387. const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2());
  388. result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand);
  389. }
  390. return result;
  391. }
  392. TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) {
  393. TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1());
  394. for (size_t i = 0; i < node.Block2Size(); i++) {
  395. const auto& block = node.GetBlock2(i);
  396. const auto& opToken = block.GetToken1();
  397. const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2());
  398. result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand);
  399. }
  400. return result;
  401. }
  402. TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) {
  403. return BuildOrExpr(node.GetRule_or_expr1());
  404. }
  405. TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) {
  406. TPosition pos;
  407. auto mode = EJsonPathMode::Lax;
  408. if (node.HasBlock1()) {
  409. const auto& modeToken = node.GetBlock1().GetToken1();
  410. pos = GetPos(modeToken);
  411. if (modeToken.GetValue() == "strict") {
  412. mode = EJsonPathMode::Strict;
  413. }
  414. }
  415. const auto expr = BuildExpr(node.GetRule_expr2());
  416. return new TRootNode(pos, expr, mode);
  417. }
  418. TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) {
  419. return BuildJsonPath(ast.GetRule_jsonpath());
  420. }
  421. namespace NYql::NJsonPath {
  422. ui32 GetReLibId() {
  423. return RegexpLibId;
  424. }
  425. }