Browse Source

Extracted parser from jsonpath lib

init
commit_hash:8c6dd46ee72034f0480757612dcceb524d19a1d1
vvvv 2 months ago
parent
commit
f7f8a1dcd4

+ 1 - 1
yql/essentials/core/ya.make

@@ -69,7 +69,7 @@ PEERDIR(
     yql/essentials/core/url_lister/interface
     yql/essentials/core/url_preprocessing/interface
     yql/essentials/minikql
-    yql/essentials/minikql/jsonpath
+    yql/essentials/minikql/jsonpath/parser
     yql/essentials/core/minsketch
     yql/essentials/protos
     yql/essentials/public/udf

+ 4 - 4
yql/essentials/core/yql_expr_type_annotation.cpp

@@ -9,7 +9,7 @@
 #include <yql/essentials/public/udf/udf_data_type.h>
 #include <yql/essentials/minikql/dom/json.h>
 #include <yql/essentials/minikql/dom/yson.h>
-#include <yql/essentials/minikql/jsonpath/jsonpath.h>
+#include <yql/essentials/minikql/jsonpath/parser/parser.h>
 #include <yql/essentials/core/sql_types/simple_types.h>
 #include "yql/essentials/parser/pg_catalog/catalog.h"
 #include <yql/essentials/parser/pg_wrapper/interface/utils.h>
@@ -4396,7 +4396,7 @@ TMaybe<EDataSlot> GetSuperType(EDataSlot dataSlot1, EDataSlot dataSlot2, bool wa
     }
 
     if (IsDataTypeInterval(dataSlot1) && IsDataTypeInterval(dataSlot2)) {
-        return (dataSlot1 == EDataSlot::Interval64 || dataSlot2 == EDataSlot::Interval64) 
+        return (dataSlot1 == EDataSlot::Interval64 || dataSlot2 == EDataSlot::Interval64)
             ? EDataSlot::Interval64
             : EDataSlot::Interval;
     }
@@ -4944,7 +4944,7 @@ bool IsSqlInCollectionItemsNullable(const NNodes::TCoSqlIn& node) {
                     break;
                 }
             }
-            
+
             break;
         }
         case ETypeAnnotationKind::Dict: {
@@ -6449,7 +6449,7 @@ TExprNode::TPtr ExpandPgAggregationTraits(TPositionHandle pos, const NPg::TAggre
                 .Atom(1, ToString(aggDesc.FinalFuncId))
                 .List(2)
                     .Do([aggResultType, originalAggResultType](TExprNodeBuilder& builder) -> TExprNodeBuilder& {
-                        if (aggResultType != originalAggResultType) { 
+                        if (aggResultType != originalAggResultType) {
                             builder.List(0)
                                 .Atom(0, "type")
                                 .Atom(1, NPg::LookupType(aggResultType).Name)

+ 1 - 1
yql/essentials/minikql/jsonpath/executor.cpp

@@ -1,5 +1,5 @@
 #include "executor.h"
-#include "parse_double.h"
+#include <yql/essentials/minikql/jsonpath/parser/parse_double.h>
 
 #include <yql/essentials/core/issue/protos/issue_id.pb.h>
 #include <yql/essentials/minikql/dom/node.h>

+ 1 - 1
yql/essentials/minikql/jsonpath/executor.h

@@ -1,6 +1,6 @@
 #pragma once
 
-#include "binary.h"
+#include <yql/essentials/minikql/jsonpath/parser/binary.h>
 #include "value.h"
 
 #include <yql/essentials/public/issue/yql_issue.h>

+ 1 - 98
yql/essentials/minikql/jsonpath/jsonpath.cpp

@@ -1,111 +1,14 @@
 #include "jsonpath.h"
 
-#include "binary.h"
-#include "ast_builder.h"
+#include <yql/essentials/minikql/jsonpath/parser/binary.h>
 #include "executor.h"
-#include "type_check.h"
-#include "value.h"
-
-#include <yql/essentials/core/issue/protos/issue_id.pb.h>
-#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathLexer.h>
-#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.h>
-#include <yql/essentials/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h>
-#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h>
-
-#include <google/protobuf/message.h>
-
-#include <util/string/strip.h>
-
-#if defined(_tsan_enabled_)
-#include <util/system/mutex.h>
-#endif
 
 using namespace NYql;
 using namespace NYql::NUdf;
 using namespace NJson;
 
-namespace {
-
-#if defined(_tsan_enabled_)
-TMutex SanitizerJsonPathTranslationMutex;
-#endif
-
-class TParseErrorsCollector : public NProtoAST::IErrorCollector {
-public:
-    TParseErrorsCollector(TIssues& issues, size_t maxErrors)
-        : IErrorCollector(maxErrors)
-        , Issues(issues)
-    {
-    }
-
-private:
-    void AddError(ui32 line, ui32 column, const TString& message) override {
-        Issues.AddIssue(TPosition(column, line, "jsonpath"), StripString(message));
-        Issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
-    }
-
-    TIssues& Issues;
-};
-
-}
-
 namespace NYql::NJsonPath {
 
-const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors) {
-    if (!IsUtf(path)) {
-        issues.AddIssue(TPosition(1, 1, "jsonpath"), "JsonPath must be UTF-8 encoded string");
-        issues.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR);
-        return {};
-    }
-
-    google::protobuf::Arena arena;
-    const google::protobuf::Message* rawAst = nullptr;
-    {
-    #if defined(_tsan_enabled_)
-        TGuard<TMutex> guard(SanitizerJsonPathTranslationMutex);
-    #endif
-        NProtoAST::TProtoASTBuilder3<NALP::JsonPathParser, NALP::JsonPathLexer> builder(path, "JsonPath", &arena);
-        TParseErrorsCollector collector(issues, maxParseErrors);
-        rawAst = builder.BuildAST(collector);
-    }
-
-    if (rawAst == nullptr) {
-        return nullptr;
-    }
-
-    const google::protobuf::Descriptor* descriptor = rawAst->GetDescriptor();
-    if (descriptor && descriptor->name() != "TJsonPathParserAST") {
-        return nullptr;
-    }
-
-    const auto* protoAst = static_cast<const NJsonPathGenerated::TJsonPathParserAST*>(rawAst);
-    TAstBuilder astBuilder(issues);
-    TAstNodePtr ast = astBuilder.Build(*protoAst);
-    if (!issues.Empty()) {
-        return nullptr;
-    }
-
-    // At this point AST is guaranteed to be valid. We return it even if
-    // type checker finds some logical errors.
-    TJsonPathTypeChecker checker(issues);
-    ast->Accept(checker);
-    return ast;
-}
-
-const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast) {
-    TJsonPathBuilder builder;
-    ast->Accept(builder);
-    return builder.ShrinkAndGetResult();
-}
-
-const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors) {
-    const auto ast = ParseJsonPathAst(path, issues, maxParseErrors);
-    if (!issues.Empty()) {
-        return {};
-    }
-    return PackBinaryJsonPath(ast);
-}
-
 TResult ExecuteJsonPath(
     const TJsonPathPtr jsonPath,
     const TValue& json,

+ 2 - 6
yql/essentials/minikql/jsonpath/jsonpath.h

@@ -2,17 +2,13 @@
 
 #include "executor.h"
 
+#include <yql/essentials/minikql/jsonpath/parser/parser.h>
+
 #include <yql/essentials/public/udf/udf_value.h>
 #include <yql/essentials/public/udf/udf_value_builder.h>
 
 namespace NYql::NJsonPath {
 
-const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_t maxParseErrors);
-
-const TJsonPathPtr PackBinaryJsonPath(const TAstNodePtr ast, TIssues& issues);
-
-const TJsonPathPtr ParseJsonPath(const TStringBuf path, TIssues& issues, size_t maxParseErrors);
-
 TVariablesMap DictToVariables(const NUdf::TUnboxedValue& dict);
 
 TResult ExecuteJsonPath(

+ 2 - 1
yql/essentials/minikql/jsonpath/ast_builder.cpp → yql/essentials/minikql/jsonpath/parser/ast_builder.cpp

@@ -293,7 +293,8 @@ TAstNodePtr TAstBuilder::BuildLikeRegexExpr(const TRule_like_regex_expr& node, T
 
     IRePtr compiledRegex;
     try {
-        compiledRegex = NDispatcher::Compile(regex, parsedFlags, RegexpLibId);
+        compiledRegex = NDispatcher::Compile(regex, parsedFlags,
+            NDispatcher::Has(RegexpLibId) ? RegexpLibId : TSerialization::kRe2);
     } catch (const NReWrapper::TCompileException& e) {
         Error(GetPos(regexToken), e.AsStrBuf());
         return nullptr;

+ 0 - 0
yql/essentials/minikql/jsonpath/ast_builder.h → yql/essentials/minikql/jsonpath/parser/ast_builder.h


+ 0 - 0
yql/essentials/minikql/jsonpath/ast_nodes.cpp → yql/essentials/minikql/jsonpath/parser/ast_nodes.cpp


+ 0 - 0
yql/essentials/minikql/jsonpath/ast_nodes.h → yql/essentials/minikql/jsonpath/parser/ast_nodes.h


Some files were not shown because too many files changed in this diff