123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500 |
- #include "ast_builder.h"
- #include "ast_nodes.h"
- #include "parse_double.h"
- #include <yql/essentials/core/issue/protos/issue_id.pb.h>
- #include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
- #include <yql/essentials/ast/yql_ast_escaping.h>
- #include <util/generic/singleton.h>
- #include <util/system/compiler.h>
- #include <util/string/cast.h>
- #include <util/string/builder.h>
- #include <util/charset/utf8.h>
- #include <util/system/cpu_id.h>
- #include <cmath>
- using namespace NYql;
- using namespace NYql::NJsonPath;
- using namespace NJsonPathGenerated;
- using namespace NReWrapper;
- namespace {
- constexpr ui32 RegexpLibId = NReWrapper::TSerialization::YDB_REWRAPPER_LIB_ID;
- TPosition GetPos(const TToken& token) {
- return TPosition(token.GetColumn(), token.GetLine());
- }
- bool TryStringContent(const TString& str, TString& result, TString& error, bool onlyDoubleQuoted = true) {
- result.clear();
- error.clear();
- const bool doubleQuoted = str.StartsWith('"') && str.EndsWith('"');
- const bool singleQuoted = str.StartsWith('\'') && str.EndsWith('\'');
- if (!doubleQuoted && !singleQuoted) {
- error = "String must be quoted";
- return false;
- }
- if (singleQuoted && onlyDoubleQuoted) {
- error = "Only double quoted strings allowed";
- return false;
- }
- char quoteChar = doubleQuoted ? '"' : '\'';
- size_t readBytes = 0;
- TStringBuf atom(str);
- atom.Skip(1);
- TStringOutput sout(result);
- result.reserve(str.size());
- auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
- if (unescapeResult == EUnescapeResult::OK) {
- return true;
- } else {
- error = UnescapeResultToString(unescapeResult);
- return false;
- }
- }
- }
- TAstBuilder::TAstBuilder(TIssues& issues)
- : Issues(issues)
- {
- }
- void TAstBuilder::Error(TPosition pos, const TStringBuf message) {
- Issues.AddIssue(pos, message);
- Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
- }
- TArrayAccessNode::TSubscript TAstBuilder::BuildArraySubscript(const TRule_array_subscript& node) {
- TAstNodePtr from = BuildExpr(node.GetRule_expr1());
- TAstNodePtr to = nullptr;
- if (node.HasBlock2()) {
- to = BuildExpr(node.GetBlock2().GetRule_expr2());
- }
- return {from, to};
- }
- TAstNodePtr TAstBuilder::BuildArrayAccessor(const TRule_array_accessor& node, TAstNodePtr input) {
- TVector<TArrayAccessNode::TSubscript> subscripts;
- subscripts.reserve(1 + node.Block3Size());
- subscripts.push_back(BuildArraySubscript(node.GetRule_array_subscript2()));
- for (size_t i = 0; i < node.Block3Size(); i++) {
- subscripts.push_back(BuildArraySubscript(node.GetBlock3(i).GetRule_array_subscript2()));
- }
- return new TArrayAccessNode(GetPos(node.GetToken1()), subscripts, input);
- }
- TAstNodePtr TAstBuilder::BuildWildcardArrayAccessor(const TRule_wildcard_array_accessor& node, TAstNodePtr input) {
- return new TWildcardArrayAccessNode(GetPos(node.GetToken1()), input);
- }
- TString TAstBuilder::BuildIdentifier(const TRule_identifier& node) {
- switch (node.GetAltCase()) {
- case TRule_identifier::kAltIdentifier1:
- return node.GetAlt_identifier1().GetToken1().GetValue();
- case TRule_identifier::kAltIdentifier2:
- return node.GetAlt_identifier2().GetRule_keyword1().GetToken1().GetValue();
- case TRule_identifier::ALT_NOT_SET:
- Y_ABORT("Alternative for 'identifier' rule is not set");
- }
- }
- TAstNodePtr TAstBuilder::BuildMemberAccessor(const TRule_member_accessor& node, TAstNodePtr input) {
- TString name;
- const auto& nameBlock = node.GetBlock2();
- switch (nameBlock.GetAltCase()) {
- case TRule_member_accessor_TBlock2::kAlt1:
- name = BuildIdentifier(nameBlock.GetAlt1().GetRule_identifier1());
- break;
- case TRule_member_accessor_TBlock2::kAlt2: {
- const auto& token = nameBlock.GetAlt2().GetToken1();
- TString error;
- if (!TryStringContent(token.GetValue(), name, error, /* onlyDoubleQuoted */ false)) {
- Error(GetPos(token), error);
- return nullptr;
- }
- break;
- }
- case TRule_member_accessor_TBlock2::ALT_NOT_SET:
- Y_ABORT("Alternative for 'member_accessor' rule is not set");
- }
- return new TMemberAccessNode(GetPos(node.GetToken1()), name, input);
- }
- TAstNodePtr TAstBuilder::BuildWildcardMemberAccessor(const TRule_wildcard_member_accessor& node, TAstNodePtr input) {
- const auto& token = node.GetToken2();
- return new TWildcardMemberAccessNode(GetPos(token), input);
- }
- TAstNodePtr TAstBuilder::BuildFilter(const TRule_filter& node, TAstNodePtr input) {
- const auto predicate = BuildExpr(node.GetRule_expr3());
- return new TFilterPredicateNode(GetPos(node.GetToken2()), predicate, input);
- }
- TAstNodePtr TAstBuilder::BuildMethod(const TRule_method& node, TAstNodePtr input) {
- const auto& token = node.GetToken2();
- const auto pos = GetPos(token);
- const auto& value = token.GetValue();
- auto type = EMethodType::Double;
- if (value == "abs") {
- type = EMethodType::Abs;
- } else if (value == "floor") {
- type = EMethodType::Floor;
- } else if (value == "ceiling") {
- type = EMethodType::Ceiling;
- } else if (value == "type") {
- type = EMethodType::Type;
- } else if (value == "size") {
- type = EMethodType::Size;
- } else if (value == "keyvalue") {
- type = EMethodType::KeyValue;
- }
- return new TMethodCallNode(pos, type, input);
- }
- TAstNodePtr TAstBuilder::BuildAccessorOp(const TRule_accessor_op& node, TAstNodePtr input) {
- switch (node.GetAltCase()) {
- case TRule_accessor_op::kAltAccessorOp1:
- return BuildMemberAccessor(node.GetAlt_accessor_op1().GetRule_member_accessor1(), input);
- case TRule_accessor_op::kAltAccessorOp2:
- return BuildWildcardMemberAccessor(node.GetAlt_accessor_op2().GetRule_wildcard_member_accessor1(), input);
- case TRule_accessor_op::kAltAccessorOp3:
- return BuildArrayAccessor(node.GetAlt_accessor_op3().GetRule_array_accessor1(), input);
- case TRule_accessor_op::kAltAccessorOp4:
- return BuildWildcardArrayAccessor(node.GetAlt_accessor_op4().GetRule_wildcard_array_accessor1(), input);
- case TRule_accessor_op::kAltAccessorOp5:
- return BuildFilter(node.GetAlt_accessor_op5().GetRule_filter1(), input);
- case TRule_accessor_op::kAltAccessorOp6:
- return BuildMethod(node.GetAlt_accessor_op6().GetRule_method1(), input);
- case TRule_accessor_op::ALT_NOT_SET:
- Y_ABORT("Alternative for 'accessor_op' rule is not set");
- }
- }
- TAstNodePtr TAstBuilder::BuildPrimary(const TRule_primary& node) {
- switch (node.GetAltCase()) {
- case TRule_primary::kAltPrimary1: {
- const auto& token = node.GetAlt_primary1().GetToken1();
- const auto& numberString = token.GetValue();
- const double parsedValue = ParseDouble(numberString);
- if (Y_UNLIKELY(std::isnan(parsedValue))) {
- Y_ABORT("Invalid number was allowed by JsonPath grammar");
- }
- if (Y_UNLIKELY(std::isinf(parsedValue))) {
- Error(GetPos(token), "Number literal is infinity");
- return nullptr;
- }
- return new TNumberLiteralNode(GetPos(token), parsedValue);
- }
- case TRule_primary::kAltPrimary2: {
- const auto& token = node.GetAlt_primary2().GetToken1();
- return new TContextObjectNode(GetPos(token));
- }
- case TRule_primary::kAltPrimary3: {
- const auto& token = node.GetAlt_primary3().GetToken1();
- return new TLastArrayIndexNode(GetPos(token));
- }
- case TRule_primary::kAltPrimary4: {
- const auto& primary = node.GetAlt_primary4().GetBlock1();
- const auto input = BuildExpr(primary.GetRule_expr2());
- if (primary.HasBlock4()) {
- const auto& token = primary.GetBlock4().GetToken1();
- return new TIsUnknownPredicateNode(GetPos(token), input);
- }
- return input;
- }
- case TRule_primary::kAltPrimary5: {
- const auto& token = node.GetAlt_primary5().GetToken1();
- return new TVariableNode(GetPos(token), token.GetValue().substr(1));
- }
- case TRule_primary::kAltPrimary6: {
- const auto& token = node.GetAlt_primary6().GetToken1();
- return new TBooleanLiteralNode(GetPos(token), true);
- }
- case TRule_primary::kAltPrimary7: {
- const auto& token = node.GetAlt_primary7().GetToken1();
- return new TBooleanLiteralNode(GetPos(token), false);
- }
- case TRule_primary::kAltPrimary8: {
- const auto& token = node.GetAlt_primary8().GetToken1();
- return new TNullLiteralNode(GetPos(token));
- }
- case TRule_primary::kAltPrimary9: {
- const auto& token = node.GetAlt_primary9().GetToken1();
- TString value;
- TString error;
- if (!TryStringContent(token.GetValue(), value, error)) {
- Error(GetPos(token), error);
- return nullptr;
- }
- return new TStringLiteralNode(GetPos(token), value);
- }
- case TRule_primary::kAltPrimary10: {
- const auto& token = node.GetAlt_primary10().GetToken1();
- return new TFilterObjectNode(GetPos(token));
- }
- case TRule_primary::ALT_NOT_SET:
- Y_ABORT("Alternative for 'primary' rule is not set");
- }
- }
- TAstNodePtr TAstBuilder::BuildAccessorExpr(const TRule_accessor_expr& node) {
- TAstNodePtr input = BuildPrimary(node.GetRule_primary1());
- for (size_t i = 0; i < node.Block2Size(); i++) {
- input = BuildAccessorOp(node.GetBlock2(i).GetRule_accessor_op1(), input);
- }
- return input;
- }
- TAstNodePtr TAstBuilder::BuildPlainExpr(const TRule_plain_expr& node) {
- return BuildAccessorExpr(node.GetRule_accessor_expr1());
- }
- TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, TAstNodePtr input) {
- const auto& regexToken = node.GetToken2();
- TString regex;
- TString error;
- if (!TryStringContent(regexToken.GetValue(), regex, error)) {
- Error(GetPos(regexToken), error);
- return nullptr;
- }
- ui32 parsedFlags = 0;
- if (node.HasBlock3()) {
- TString flags;
- const auto& flagsToken = node.GetBlock3().GetToken2();
- if (!TryStringContent(flagsToken.GetValue(), flags, error)) {
- Error(GetPos(flagsToken), error);
- return nullptr;
- }
- for (char flag : flags) {
- switch (flag) {
- case 'i':
- parsedFlags |= FLAGS_CASELESS;
- break;
- default:
- Error(GetPos(flagsToken), TStringBuilder() << "Unsupported regex flag '" << flag << "'");
- break;
- }
- }
- }
- IRePtr compiledRegex;
- try {
- compiledRegex = NDispatcher::Compile(regex, parsedFlags,
- NDispatcher::Has(RegexpLibId) ? RegexpLibId : TSerialization::kRe2);
- } catch (const NReWrapper::TCompileException& e) {
- Error(GetPos(regexToken), e.AsStrBuf());
- return nullptr;
- }
- return new TLikeRegexPredicateNode(GetPos(node.GetToken1()), input, std::move(compiledRegex));
- }
- TAstNodePtr TAstBuilder::BuildPredicateExpr(const TRule_predicate_expr& node) {
- switch (node.GetAltCase()) {
- case TRule_predicate_expr::kAltPredicateExpr1: {
- const auto& predicate = node.GetAlt_predicate_expr1().GetBlock1();
- const auto input = BuildPlainExpr(predicate.GetRule_plain_expr1());
- if (!predicate.HasBlock2()) {
- return input;
- }
- const auto& block = predicate.GetBlock2();
- switch (block.GetAltCase()) {
- case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt1: {
- const auto& innerBlock = block.GetAlt1().GetRule_starts_with_expr1();
- const auto& prefix = BuildPlainExpr(innerBlock.GetRule_plain_expr3());
- return new TStartsWithPredicateNode(GetPos(innerBlock.GetToken1()), input, prefix);
- }
- case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::kAlt2: {
- return BuildLikeRegexExpr(block.GetAlt2().GetRule_like_regex_expr1(), input);
- }
- case TRule_predicate_expr_TAlt1_TBlock1_TBlock2::ALT_NOT_SET:
- Y_ABORT("Alternative for inner block of 'predicate_expr' rule is not set");
- }
- Y_UNREACHABLE();
- }
- case TRule_predicate_expr::kAltPredicateExpr2: {
- const auto& predicate = node.GetAlt_predicate_expr2().GetBlock1();
- const auto input = BuildExpr(predicate.GetRule_expr3());
- return new TExistsPredicateNode(GetPos(predicate.GetToken1()), input);
- }
- case TRule_predicate_expr::ALT_NOT_SET:
- Y_ABORT("Alternative for 'predicate' rule is not set");
- }
- Y_UNREACHABLE();
- }
- TAstNodePtr TAstBuilder::BuildUnaryExpr(const TRule_unary_expr& node) {
- const auto predicateExpr = BuildPredicateExpr(node.GetRule_predicate_expr2());
- if (!node.HasBlock1()) {
- return predicateExpr;
- }
- const auto& opToken = node.GetBlock1().GetToken1();
- const auto& opValue = opToken.GetValue();
- auto operation = EUnaryOperation::Plus;
- if (opValue == "-") {
- operation = EUnaryOperation::Minus;
- } else if (opValue == "!") {
- operation = EUnaryOperation::Not;
- }
- return new TUnaryOperationNode(GetPos(opToken), operation, predicateExpr);
- }
- TAstNodePtr TAstBuilder::BuildMulExpr(const TRule_mul_expr& node) {
- TAstNodePtr result = BuildUnaryExpr(node.GetRule_unary_expr1());
- for (size_t i = 0; i < node.Block2Size(); i++) {
- const auto& block = node.GetBlock2(i);
- const auto& opToken = block.GetToken1();
- const auto& opValue = opToken.GetValue();
- auto operation = EBinaryOperation::Multiply;
- if (opValue == "/") {
- operation = EBinaryOperation::Divide;
- } else if (opValue == "%") {
- operation = EBinaryOperation::Modulo;
- }
- const auto rightOperand = BuildUnaryExpr(block.GetRule_unary_expr2());
- result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
- }
- return result;
- }
- TAstNodePtr TAstBuilder::BuildAddExpr(const TRule_add_expr& node) {
- TAstNodePtr result = BuildMulExpr(node.GetRule_mul_expr1());
- for (size_t i = 0; i < node.Block2Size(); i++) {
- const auto& block = node.GetBlock2(i);
- const auto& opToken = block.GetToken1();
- auto operation = EBinaryOperation::Add;
- if (opToken.GetValue() == "-") {
- operation = EBinaryOperation::Substract;
- }
- const auto rightOperand = BuildMulExpr(block.GetRule_mul_expr2());
- result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
- }
- return result;
- }
- TAstNodePtr TAstBuilder::BuildCompareExpr(const TRule_compare_expr& node) {
- TAstNodePtr result = BuildAddExpr(node.GetRule_add_expr1());
- if (node.HasBlock2()) {
- const auto& block = node.GetBlock2();
- const auto& opToken = block.GetToken1();
- const auto& opValue = opToken.GetValue();
- auto operation = EBinaryOperation::Less;
- if (opValue == "<=") {
- operation = EBinaryOperation::LessEqual;
- } else if (opValue == ">") {
- operation = EBinaryOperation::Greater;
- } else if (opValue == ">=") {
- operation = EBinaryOperation::GreaterEqual;
- }
- const auto rightOperand = BuildAddExpr(block.GetRule_add_expr2());
- result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
- }
- return result;
- }
- TAstNodePtr TAstBuilder::BuildEqualExpr(const TRule_equal_expr& node) {
- TAstNodePtr result = BuildCompareExpr(node.GetRule_compare_expr1());
- if (node.HasBlock2()) {
- const auto& block = node.GetBlock2();
- const auto& opToken = block.GetToken1();
- const auto& opValue = opToken.GetValue();
- auto operation = EBinaryOperation::Equal;
- if (opValue == "<>" || opValue == "!=") {
- operation = EBinaryOperation::NotEqual;
- }
- const auto rightOperand = BuildCompareExpr(block.GetRule_compare_expr2());
- result = new TBinaryOperationNode(GetPos(opToken), operation, result, rightOperand);
- }
- return result;
- }
- TAstNodePtr TAstBuilder::BuildAndExpr(const TRule_and_expr& node) {
- TAstNodePtr result = BuildEqualExpr(node.GetRule_equal_expr1());
- for (size_t i = 0; i < node.Block2Size(); i++) {
- const auto& block = node.GetBlock2(i);
- const auto& opToken = block.GetToken1();
- const auto rightOperand = BuildEqualExpr(block.GetRule_equal_expr2());
- result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::And, result, rightOperand);
- }
- return result;
- }
- TAstNodePtr TAstBuilder::BuildOrExpr(const TRule_or_expr& node) {
- TAstNodePtr result = BuildAndExpr(node.GetRule_and_expr1());
- for (size_t i = 0; i < node.Block2Size(); i++) {
- const auto& block = node.GetBlock2(i);
- const auto& opToken = block.GetToken1();
- const auto rightOperand = BuildAndExpr(block.GetRule_and_expr2());
- result = new TBinaryOperationNode(GetPos(opToken), EBinaryOperation::Or, result, rightOperand);
- }
- return result;
- }
- TAstNodePtr TAstBuilder::BuildExpr(const TRule_expr& node) {
- return BuildOrExpr(node.GetRule_or_expr1());
- }
- TAstNodePtr TAstBuilder::BuildJsonPath(const TRule_jsonpath& node) {
- TPosition pos;
- auto mode = EJsonPathMode::Lax;
- if (node.HasBlock1()) {
- const auto& modeToken = node.GetBlock1().GetToken1();
- pos = GetPos(modeToken);
- if (modeToken.GetValue() == "strict") {
- mode = EJsonPathMode::Strict;
- }
- }
- const auto expr = BuildExpr(node.GetRule_expr2());
- return new TRootNode(pos, expr, mode);
- }
- TAstNodePtr TAstBuilder::Build(const TJsonPathParserAST& ast) {
- return BuildJsonPath(ast.GetRule_jsonpath());
- }
- namespace NYql::NJsonPath {
- ui32 GetReLibId() {
- return RegexpLibId;
- }
- }
|