Browse Source

Migrating from antlr3 to antlr4 (#2977)

Co-authored-by: root <p.g.orlov@tinkoff.ru>
Orlov Pavel 6 months ago
parent
commit
11433e1637

+ 1 - 0
.gitignore

@@ -43,6 +43,7 @@ __pycache__/
 .idea/
 .vscode/
 .clangd
+.antlr/
 
 # KDevelop IDE
 *.kdev4

+ 16 - 3
ydb/library/yql/parser/lexer_common/ut/hints_ut.cpp

@@ -8,9 +8,9 @@
 using namespace NSQLTranslation;
 using namespace NSQLTranslationV1;
 
-TSQLHints CollectHints(const TString& query) {
+TSQLHints CollectHints(const TString& query, bool antlr4Parser) {
     bool ansi = false;
-    auto lexer = MakeLexer(ansi);
+    auto lexer = MakeLexer(ansi, antlr4Parser);
     UNIT_ASSERT(lexer);
     TSQLHints result;
     NYql::TIssues issues;
@@ -27,7 +27,20 @@ TString SerializeHints(const TVector<TSQLHint>& hints) {
 Y_UNIT_TEST_SUITE(TLexerHintsTests) {
     Y_UNIT_TEST(Basic) {
         TString query = "/*+ some() */ SELECT /*+ foo(one) */ --+ bar(two)";
-        auto hintsWithPos = CollectHints(query);
+        auto hintsWithPos = CollectHints(query, false);
+        UNIT_ASSERT(hintsWithPos.size() == 1);
+        NYql::TPosition pos = hintsWithPos.begin()->first;
+        TVector<TSQLHint> hints = hintsWithPos.begin()->second;
+
+        UNIT_ASSERT_EQUAL(pos.Row, 1);
+        UNIT_ASSERT_EQUAL(pos.Column, 15);
+
+        TStringBuf expected = R"raw("foo":{"one"},"bar":{"two"})raw";
+        UNIT_ASSERT_NO_DIFF(SerializeHints(hints), expected);
+    }
+    Y_UNIT_TEST(Antlr4) {
+        TString query = "/*+ some() */ SELECT /*+ foo(one) */ --+ bar(two)";
+        auto hintsWithPos = CollectHints(query, true);
         UNIT_ASSERT(hintsWithPos.size() == 1);
         NYql::TPosition pos = hintsWithPos.begin()->first;
         TVector<TSQLHint> hints = hintsWithPos.begin()->second;

+ 4 - 1
ydb/library/yql/parser/proto_ast/gen/jsonpath/ya.make

@@ -10,7 +10,6 @@ IF (CPP_PROTO)
     SET(PROTOBUF_SUFFIX_PATH .pb.h)
     SET(LEXER_PARSER_NAMESPACE NALP)
 
-
     CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg)
     CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in ${antlr_templates}/protobuf/protobuf.stg)
 
@@ -28,6 +27,10 @@ IF (CPP_PROTO)
 
     NO_COMPILER_WARNINGS()
 
+    ADDINCL(
+        GLOBAL contrib/libs/antlr4_cpp_runtime/src
+    )
+
     INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)
 
     RUN_ANTLR(

+ 4 - 0
ydb/library/yql/parser/proto_ast/gen/v0/ya.make

@@ -20,6 +20,10 @@ NO_COMPILER_WARNINGS()
 
 INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)
 
+ADDINCL(
+    GLOBAL contrib/libs/antlr4_cpp_runtime/src
+)
+
 RUN_ANTLR(
     ${sql_grammar}
     -lib .

+ 4 - 0
ydb/library/yql/parser/proto_ast/gen/v1/ya.make

@@ -28,6 +28,10 @@ ENDIF()
 
 NO_COMPILER_WARNINGS()
 
+ADDINCL(
+    GLOBAL contrib/libs/antlr4_cpp_runtime/src
+)
+
 INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)
 
 RUN_ANTLR(

+ 4 - 0
ydb/library/yql/parser/proto_ast/gen/v1_ansi/ya.make

@@ -28,6 +28,10 @@ ENDIF()
 
 NO_COMPILER_WARNINGS()
 
+ADDINCL(
+    GLOBAL contrib/libs/antlr4_cpp_runtime/src
+)
+
 INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)
 
 RUN_ANTLR(

+ 10 - 0
ydb/library/yql/parser/proto_ast/gen/v1_ansi_antlr4/epilogue.cmake

@@ -0,0 +1,10 @@
+set(GRAMMAR_STRING_CORE_SINGLE "~([']) | (QUOTE_SINGLE QUOTE_SINGLE)")
+set(GRAMMAR_STRING_CORE_DOUBLE "~([\"]) | (QUOTE_DOUBLE QUOTE_DOUBLE)")
+set(GRAMMAR_MULTILINE_COMMENT_CORE       "MULTILINE_COMMENT | .")
+
+configure_file(
+  ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in
+  ${CMAKE_BINARY_DIR}/ydb/library/yql/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4.g
+)
+
+

+ 52 - 0
ydb/library/yql/parser/proto_ast/gen/v1_ansi_antlr4/ya.make

@@ -0,0 +1,52 @@
+LIBRARY()
+
+PEERDIR (
+    ydb/library/yql/parser/proto_ast/gen/v1_proto_split
+)
+
+SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR})
+SET(antlr_templates ${antlr_output}/org/antlr/v4/tool/templates/codegen)
+SET(sql_grammar ${antlr_output}/SQLv1Antlr4.g)
+
+SET(ANTLR_PACKAGE_NAME NSQLv1Generated)
+SET(PROTOBUF_HEADER_PATH ydb/library/yql/parser/proto_ast/gen/v1_proto_split)
+SET(PROTOBUF_SUFFIX_PATH .pb.main.h)
+
+SET(LEXER_PARSER_NAMESPACE NALPAnsiAntlr4)
+
+SET(GRAMMAR_STRING_CORE_SINGLE "\"~([']) | (QUOTE_SINGLE QUOTE_SINGLE)\"")
+SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE#]) | (QUOTE_DOUBLE QUOTE_DOUBLE)\"")
+SET(GRAMMAR_MULTILINE_COMMENT_CORE       "\"MULTILINE_COMMENT | .\"")
+
+CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg)
+CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg.in ${antlr_templates}/Cpp/Files.stg)
+
+IF(EXPORT_CMAKE)
+    MANUAL_GENERATION(${sql_grammar})
+ELSE()
+    CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in ${sql_grammar})
+ENDIF()
+
+NO_COMPILER_WARNINGS()
+
+ADDINCL(
+    GLOBAL contrib/libs/antlr4_cpp_runtime/src
+)
+
+INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/ya.make.incl)
+
+RUN_ANTLR4(
+    ${sql_grammar}
+    -no-listener
+    -package NALPAnsiAntlr4
+    -lib .
+    -o ${antlr_output}
+    IN ${sql_grammar} ${antlr_templates}/Cpp/Cpp.stg ${antlr_templates}/Cpp/Files.stg
+    OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h
+    OUTPUT_INCLUDES
+    ${PROTOBUF_HEADER_PATH}/SQLv1Parser.pb.main.h
+    ${STG_INCLUDES}
+    CWD ${antlr_output}
+)
+
+END()

+ 9 - 0
ydb/library/yql/parser/proto_ast/gen/v1_antlr4/epilogue.cmake

@@ -0,0 +1,9 @@
+set(GRAMMAR_STRING_CORE_SINGLE "~(['\\]) | (BACKSLASH .)")
+set(GRAMMAR_STRING_CORE_DOUBLE "~([\"\\]) | (BACKSLASH .)")
+set(GRAMMAR_MULTILINE_COMMENT_CORE       ".")
+configure_file(
+  ${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in
+  ${CMAKE_BINARY_DIR}/ydb/library/yql/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4.g
+)
+
+

+ 52 - 0
ydb/library/yql/parser/proto_ast/gen/v1_antlr4/ya.make

@@ -0,0 +1,52 @@
+LIBRARY()
+
+PEERDIR (
+    ydb/library/yql/parser/proto_ast/gen/v1_proto_split
+)
+
+SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR})
+SET(antlr_templates ${antlr_output}/org/antlr/v4/tool/templates/codegen)
+SET(sql_grammar ${antlr_output}/SQLv1Antlr4.g)
+
+SET(ANTLR_PACKAGE_NAME NSQLv1Generated)
+SET(PROTOBUF_HEADER_PATH ydb/library/yql/parser/proto_ast/gen/v1_proto_split)
+SET(PROTOBUF_SUFFIX_PATH .pb.main.h)
+
+SET(LEXER_PARSER_NAMESPACE NALPDefaultAntlr4)
+
+SET(GRAMMAR_STRING_CORE_SINGLE "\"~(['#BACKSLASH#]) | (BACKSLASH .)\"")
+SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE##BACKSLASH#]) | (BACKSLASH .)\"")
+SET(GRAMMAR_MULTILINE_COMMENT_CORE       "\".\"")
+
+CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg)
+CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg.in ${antlr_templates}/Cpp/Files.stg)
+
+IF(EXPORT_CMAKE)
+    MANUAL_GENERATION(${sql_grammar})
+ELSE()
+    CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in ${sql_grammar})
+ENDIF()
+
+NO_COMPILER_WARNINGS()
+
+ADDINCL(
+    GLOBAL contrib/libs/antlr4_cpp_runtime/src
+)
+
+INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)
+
+RUN_ANTLR4(
+    ${sql_grammar}
+    -no-listener
+    -package NALPDefaultAntlr4
+    -lib .
+    -o ${antlr_output}
+    IN ${sql_grammar} ${antlr_templates}/Cpp/Cpp.stg ${antlr_templates}/Cpp/Files.stg
+    OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h
+    OUTPUT_INCLUDES
+    ${PROTOBUF_HEADER_PATH}/SQLv1Parser.pb.main.h
+    ${STG_INCLUDES}
+    CWD ${antlr_output}
+)
+
+END()

Some files were not shown because too many files changed in this diff