Browse Source

Move pq_read tool to ydb

hor911 1 year ago
parent
commit
f0c60cc7e1

+ 5 - 0
.mapping.json

@@ -9818,6 +9818,11 @@
   "ydb/tests/tools/kqprun/src/CMakeLists.linux-x86_64.txt":"",
   "ydb/tests/tools/kqprun/src/CMakeLists.txt":"",
   "ydb/tests/tools/kqprun/src/CMakeLists.windows-x86_64.txt":"",
+  "ydb/tests/tools/pq_read/CMakeLists.darwin-x86_64.txt":"",
+  "ydb/tests/tools/pq_read/CMakeLists.linux-aarch64.txt":"",
+  "ydb/tests/tools/pq_read/CMakeLists.linux-x86_64.txt":"",
+  "ydb/tests/tools/pq_read/CMakeLists.txt":"",
+  "ydb/tests/tools/pq_read/CMakeLists.windows-x86_64.txt":"",
   "yt/CMakeLists.txt":"",
   "yt/cpp/CMakeLists.txt":"",
   "yt/cpp/mapreduce/CMakeLists.txt":"",

+ 139 - 0
ydb/tests/fq/s3/test_s3.py

@@ -4,8 +4,10 @@
 import boto3
 import logging
 import pytest
+import time
 import ydb.public.api.protos.draft.fq_pb2 as fq
 import ydb.public.api.protos.ydb_value_pb2 as ydb
+import ydb.tests.library.common.yatest_common as yatest_common
 from ydb.tests.tools.datastreams_helpers.test_yds_base import TestYdsBase
 from ydb.tests.tools.fq_runner.kikimr_utils import yq_v1, yq_all
 
@@ -215,6 +217,143 @@ Pear,15,33'''
         query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id
         client.wait_query_status(query_id, fq.QueryMeta.FAILED)
 
+    @yq_v1
+    @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True)
+    def test_checkpoints_on_join_s3_with_yds(self, kikimr, s3, client):
+        # Prepare S3
+        resource = boto3.resource(
+            "s3",
+            endpoint_url=s3.s3_url,
+            aws_access_key_id="key",
+            aws_secret_access_key="secret_key"
+        )
+
+        s3_client = boto3.client(
+            "s3",
+            endpoint_url=s3.s3_url,
+            aws_access_key_id="key",
+            aws_secret_access_key="secret_key"
+        )
+
+        bucket_name = "join_s3_with_yds"
+        bucket = resource.Bucket(bucket_name)
+        bucket.create(ACL='public-read')
+        bucket.objects.all().delete()
+
+        def put_kv(k, v):
+            json = '{}"key": {}, "value": "{}"{}'.format("{", k, v, "}")
+            s3_client.put_object(Body=json, Bucket=bucket_name, Key='a/b/c/{}.json'.format(k), ContentType='text/json')
+
+        put_kv(1, "one")
+        put_kv(2, "two")
+        put_kv(3, "three")
+
+        kikimr.control_plane.wait_bootstrap(1)
+        client.create_storage_connection("s3_dict", bucket_name)
+
+        # Prepare YDS
+        self.init_topics("yds_dict")
+        client.create_yds_connection(name="yds", database_id="FakeDatabaseId")
+
+        # Run query
+        sql = R'''
+            PRAGMA dq.MaxTasksPerStage="2";
+
+            $s3_dict_raw =
+                SELECT cast(Data AS json) AS data
+                FROM s3_dict.`*`
+                WITH (format=raw, SCHEMA (
+                    Data String NOT NULL
+                ));
+
+            $s3_dict =
+                SELECT
+                    cast(JSON_VALUE(data, '$.key') AS int64) AS key,
+                    cast(JSON_VALUE(data, '$.value') AS String) AS value
+                FROM $s3_dict_raw;
+
+            $parsed_yson_topic =
+                SELECT
+                    Yson::LookupInt64(yson_data, "key") AS key,
+                    Yson::LookupString(yson_data, "val") AS val
+                FROM (
+                    SELECT
+                        Yson::Parse(Data) AS yson_data
+                    FROM yds.`{input_topic}` WITH SCHEMA (Data String NOT NULL));
+
+            $joined_seq =
+                SELECT
+                    s3_dict.value AS num,
+                    yds_seq.val AS word
+                FROM $parsed_yson_topic AS yds_seq
+                    INNER JOIN $s3_dict AS s3_dict
+                        ON yds_seq.key = s3_dict.key;
+
+            INSERT INTO yds.`{output_topic}`
+            SELECT
+                Yson::SerializeText(Yson::From(TableRow()))
+            FROM $joined_seq;
+            '''\
+        .format(
+            input_topic=self.input_topic,
+            output_topic=self.output_topic,
+        )
+
+        query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.STREAMING).result.query_id
+        client.wait_query_status(query_id, fq.QueryMeta.RUNNING)
+        kikimr.control_plane.wait_zero_checkpoint(query_id)
+
+        yds_data = [
+            '{"key" = 1; "val" = "January";}',
+            '{"key" = 2; "val" = "February";}',
+            '{"key" = 3; "val" = "March";}',
+            '{"key" = 1; "val" = "Monday";}',
+            '{"key" = 2; "val" = "Tuesday";}',
+            '{"key" = 3; "val" = "Wednesday";}',
+            '{"key" = 1; "val" = "Gold";}',
+            '{"key" = 2; "val" = "Silver";}',
+            '{"key" = 3; "val" = "Bronze";}',
+        ]
+        self.write_stream(yds_data)
+
+        expected = [
+            '{"num" = "one"; "word" = "January"}',
+            '{"num" = "two"; "word" = "February"}',
+            '{"num" = "three"; "word" = "March"}',
+            '{"num" = "one"; "word" = "Monday"}',
+            '{"num" = "two"; "word" = "Tuesday"}',
+            '{"num" = "three"; "word" = "Wednesday"}',
+            '{"num" = "one"; "word" = "Gold"}',
+            '{"num" = "two"; "word" = "Silver"}',
+            '{"num" = "three"; "word" = "Bronze"}',
+        ]
+        assert self.read_stream(len(expected)) == expected
+
+        # Check that checkpointing is finished
+        def wait_checkpoints(require_query_is_on=False):
+            deadline = time.time() + yatest_common.plain_or_under_sanitizer(300, 900)
+            while True:
+                completed = kikimr.control_plane.get_completed_checkpoints(query_id, require_query_is_on)
+                if completed >= 3:
+                    break
+                assert time.time() < deadline, "Completed: {}".format(completed)
+                time.sleep(yatest_common.plain_or_under_sanitizer(0.5, 2))
+
+        logging.debug("Wait checkpoints")
+        wait_checkpoints(True)
+        logging.debug("Wait checkpoints success")
+
+        kikimr.control_plane.kikimr_cluster.nodes[1].stop()
+        kikimr.control_plane.kikimr_cluster.nodes[1].start()
+        kikimr.control_plane.wait_bootstrap(1)
+
+        logging.debug("Wait checkpoints after restore")
+        wait_checkpoints(False)
+        logging.debug("Wait checkpoints after restore success")
+
+        client.abort_query(query_id)
+        client.wait_query(query_id)
+
     @yq_v1  # v2 compute with multiple nodes is not supported yet
     @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True)
     @pytest.mark.parametrize("kikimr", [{"compute": 3}], indirect=True)

+ 1 - 0
ydb/tests/fq/s3/ya.make

@@ -16,6 +16,7 @@ PEERDIR(
 
 DEPENDS(
     contrib/python/moto/bin
+    ydb/tests/tools/pq_read
 )
 
 TEST_SRCS(

+ 1 - 0
ydb/tests/tools/CMakeLists.txt

@@ -8,3 +8,4 @@
 
 add_subdirectory(idx_test)
 add_subdirectory(kqprun)
+add_subdirectory(pq_read)

+ 1 - 1
ydb/tests/tools/datastreams_helpers/data_plane.py

@@ -49,7 +49,7 @@ def read_stream(path, messages_count, commit_after_processing=True, consumer_nam
     )
     result_file = yatest.common.output_path(result_file_name)
     cmd = [
-        yatest.common.binary_path("kikimr/yq/tools/pq_read/pq_read"),
+        yatest.common.binary_path("ydb/tests/tools/pq_read/pq_read"),
         "--endpoint", os.getenv("YDB_ENDPOINT"),
         "--database", os.getenv("YDB_DATABASE"),
         "--topic-path", path,

+ 33 - 0
ydb/tests/tools/pq_read/CMakeLists.darwin-x86_64.txt

@@ -0,0 +1,33 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pq_read)
+target_link_libraries(pq_read PUBLIC
+  contrib-libs-cxxsupp
+  yutil
+  library-cpp-cpuid_check
+  library-cpp-colorizer
+  library-cpp-getopt
+  cpp-threading-future
+  cpp-client-ydb_persqueue_public
+)
+target_link_options(pq_read PRIVATE
+  -Wl,-platform_version,macos,11.0,11.0
+  -fPIC
+  -fPIC
+  -framework
+  CoreFoundation
+)
+target_sources(pq_read PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/tests/tools/pq_read/main.cpp
+)
+target_allocator(pq_read
+  system_allocator
+)
+vcs_info(pq_read)

+ 36 - 0
ydb/tests/tools/pq_read/CMakeLists.linux-aarch64.txt

@@ -0,0 +1,36 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pq_read)
+target_link_libraries(pq_read PUBLIC
+  contrib-libs-linux-headers
+  contrib-libs-cxxsupp
+  yutil
+  library-cpp-colorizer
+  library-cpp-getopt
+  cpp-threading-future
+  cpp-client-ydb_persqueue_public
+)
+target_link_options(pq_read PRIVATE
+  -ldl
+  -lrt
+  -Wl,--no-as-needed
+  -fPIC
+  -fPIC
+  -lpthread
+  -lrt
+  -ldl
+)
+target_sources(pq_read PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/tests/tools/pq_read/main.cpp
+)
+target_allocator(pq_read
+  cpp-malloc-jemalloc
+)
+vcs_info(pq_read)

+ 38 - 0
ydb/tests/tools/pq_read/CMakeLists.linux-x86_64.txt

@@ -0,0 +1,38 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pq_read)
+target_link_libraries(pq_read PUBLIC
+  contrib-libs-linux-headers
+  contrib-libs-cxxsupp
+  yutil
+  library-cpp-cpuid_check
+  library-cpp-colorizer
+  library-cpp-getopt
+  cpp-threading-future
+  cpp-client-ydb_persqueue_public
+)
+target_link_options(pq_read PRIVATE
+  -ldl
+  -lrt
+  -Wl,--no-as-needed
+  -fPIC
+  -fPIC
+  -lpthread
+  -lrt
+  -ldl
+)
+target_sources(pq_read PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/tests/tools/pq_read/main.cpp
+)
+target_allocator(pq_read
+  cpp-malloc-tcmalloc
+  libs-tcmalloc-no_percpu_cache
+)
+vcs_info(pq_read)

+ 17 - 0
ydb/tests/tools/pq_read/CMakeLists.txt

@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+  include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+  include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+  include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+  include(CMakeLists.linux-x86_64.txt)
+endif()

+ 26 - 0
ydb/tests/tools/pq_read/CMakeLists.windows-x86_64.txt

@@ -0,0 +1,26 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pq_read)
+target_link_libraries(pq_read PUBLIC
+  contrib-libs-cxxsupp
+  yutil
+  library-cpp-cpuid_check
+  library-cpp-colorizer
+  library-cpp-getopt
+  cpp-threading-future
+  cpp-client-ydb_persqueue_public
+)
+target_sources(pq_read PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/tests/tools/pq_read/main.cpp
+)
+target_allocator(pq_read
+  system_allocator
+)
+vcs_info(pq_read)

Some files were not shown because too many files changed in this diff