Browse Source

YQL-16257 Moved sql (yt_native_file & dq_file) tests, file & streaming udfs

vvvv 1 year ago
parent
commit
2081f10ef6

+ 10 - 0
.mapping.json

@@ -8383,6 +8383,11 @@
   "ydb/library/yql/udfs/common/digest/CMakeLists.linux-x86_64.txt":"",
   "ydb/library/yql/udfs/common/digest/CMakeLists.txt":"",
   "ydb/library/yql/udfs/common/digest/CMakeLists.windows-x86_64.txt":"",
+  "ydb/library/yql/udfs/common/file/CMakeLists.darwin-x86_64.txt":"",
+  "ydb/library/yql/udfs/common/file/CMakeLists.linux-aarch64.txt":"",
+  "ydb/library/yql/udfs/common/file/CMakeLists.linux-x86_64.txt":"",
+  "ydb/library/yql/udfs/common/file/CMakeLists.txt":"",
+  "ydb/library/yql/udfs/common/file/CMakeLists.windows-x86_64.txt":"",
   "ydb/library/yql/udfs/common/histogram/CMakeLists.darwin-x86_64.txt":"",
   "ydb/library/yql/udfs/common/histogram/CMakeLists.linux-aarch64.txt":"",
   "ydb/library/yql/udfs/common/histogram/CMakeLists.linux-x86_64.txt":"",
@@ -8462,6 +8467,11 @@
   "ydb/library/yql/udfs/common/stat/ut/CMakeLists.linux-x86_64.txt":"",
   "ydb/library/yql/udfs/common/stat/ut/CMakeLists.txt":"",
   "ydb/library/yql/udfs/common/stat/ut/CMakeLists.windows-x86_64.txt":"",
+  "ydb/library/yql/udfs/common/streaming/CMakeLists.darwin-x86_64.txt":"",
+  "ydb/library/yql/udfs/common/streaming/CMakeLists.linux-aarch64.txt":"",
+  "ydb/library/yql/udfs/common/streaming/CMakeLists.linux-x86_64.txt":"",
+  "ydb/library/yql/udfs/common/streaming/CMakeLists.txt":"",
+  "ydb/library/yql/udfs/common/streaming/CMakeLists.windows-x86_64.txt":"",
   "ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt":"",
   "ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt":"",
   "ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt":"",

+ 8 - 0
ydb/library/yql/tests/common/test_framework/conftest.py

@@ -0,0 +1,8 @@
+try:
+    from yql_http_file_server import yql_http_file_server
+except ImportError:
+    yql_http_file_server = None
+
+# bunch of useless statements for linter happiness
+# (otherwise it complains about unused names)
+assert yql_http_file_server is yql_http_file_server

+ 2 - 0
ydb/library/yql/tests/common/test_framework/udfs_deps/ya.make

@@ -2,6 +2,7 @@ SET(
     UDFS
     ydb/library/yql/udfs/common/datetime2
     ydb/library/yql/udfs/common/digest
+    ydb/library/yql/udfs/common/file
     ydb/library/yql/udfs/common/hyperloglog
     ydb/library/yql/udfs/common/pire
     ydb/library/yql/udfs/common/protobuf
@@ -17,6 +18,7 @@ SET(
     ydb/library/yql/udfs/common/math
     ydb/library/yql/udfs/common/url_base
     ydb/library/yql/udfs/common/unicode_base
+    ydb/library/yql/udfs/common/streaming
     ydb/library/yql/udfs/examples/callables
     ydb/library/yql/udfs/examples/dicts
     ydb/library/yql/udfs/examples/dummylog

+ 6 - 0
ydb/library/yql/tests/common/test_framework/ya.make

@@ -5,6 +5,12 @@ PY_SRCS(
     yql_utils.py
     yql_ports.py
     yqlrun.py
+    yql_http_file_server.py
+)
+
+PY_SRCS(
+    NAMESPACE ydb_library_yql_test_framework
+    conftest.py
 )
 
 PEERDIR(

+ 136 - 0
ydb/library/yql/tests/common/test_framework/yql_http_file_server.py

@@ -0,0 +1,136 @@
+import io
+import os
+import pytest
+import threading
+import shutil
+
+import six.moves.BaseHTTPServer as BaseHTTPServer
+import six.moves.socketserver as socketserver
+
+from yql_ports import get_yql_port, release_yql_port
+
+
+# handler is created on each request
+# store state in server
+class YqlHttpRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+    def get_requested_filename(self):
+        return self.path.lstrip('/')
+
+    def do_GET(self):
+        f = self.send_head(self.get_requested_filename())
+        if f:
+            try:
+                shutil.copyfileobj(f, self.wfile)
+            finally:
+                f.close()
+
+    def do_HEAD(self):
+        f = self.send_head(self.get_requested_filename())
+        if f:
+            f.close()
+
+    def get_file_and_size(self, filename):
+        try:
+            path = self.server.file_paths[filename]
+            f = open(path, 'rb')
+            fs = os.fstat(f.fileno())
+            size = fs[6]
+            return (f, size)
+        except KeyError:
+            try:
+                content = self.server.file_contents[filename]
+                return (io.BytesIO(content), len(content))
+            except KeyError:
+                return (None, 0)
+
+        return (None, 0)
+
+    def send_head(self, filename):
+        (f, size) = self.get_file_and_size(filename)
+
+        if not f:
+            self.send_error(404, "File %s not found" % filename)
+            return None
+
+        if self.server.etag is not None:
+            if_none_match = self.headers.get('If-None-Match', None)
+            if if_none_match == self.server.etag:
+                self.send_response(304)
+                self.end_headers()
+                f.close()
+                return None
+
+        self.send_response(200)
+
+        if self.server.etag is not None:
+            self.send_header("ETag", self.server.etag)
+
+        self.send_header("Content-type", 'application/octet-stream')
+        self.send_header("Content-Length", size)
+        self.end_headers()
+        return f
+
+
+class YqlHttpFileServer(socketserver.TCPServer, object):
+    def __init__(self):
+        self.http_server_port = get_yql_port('YqlHttpFileServer')
+        super(YqlHttpFileServer, self).__init__(('', self.http_server_port), YqlHttpRequestHandler,
+                                                bind_and_activate=False)
+        self.file_contents = {}
+        self.file_paths = {}
+        # common etag for all resources
+        self.etag = None
+        self.serve_thread = None
+
+    def start(self):
+        self.allow_reuse_address = True
+        self.server_bind()
+        self.server_activate()
+        self.serve_thread = threading.Thread(target=self.serve_forever)
+        self.serve_thread.start()
+
+    def stop(self):
+        super(YqlHttpFileServer, self).shutdown()
+        self.serve_thread.join()
+        release_yql_port(self.http_server_port)
+        self.http_server_port = None
+
+    def forget_files(self):
+        self.register_files({}, {})
+
+    def set_etag(self, newEtag):
+        self.etag = newEtag
+
+    def register_new_path(self, key, file_path):
+        self.file_paths[key] = file_path
+        return self.compose_http_link(key)
+
+    def register_files(self, file_contents, file_paths):
+        self.file_contents = file_contents
+        self.file_paths = file_paths
+
+        keys = []
+        if file_contents:
+            keys.extend(file_contents.keys())
+
+        if file_paths:
+            keys.extend(file_paths.keys())
+
+        return {k: self.compose_http_link(k) for k in keys}
+
+    def compose_http_link(self, filename):
+        return self.compose_http_host() + '/' + filename
+
+    def compose_http_host(self):
+        if not self.http_server_port:
+            raise Exception('http_server_port is empty. start HTTP server first')
+
+        return 'http://localhost:%d' % self.http_server_port
+
+
+@pytest.fixture(scope='module')
+def yql_http_file_server(request):
+    server = YqlHttpFileServer()
+    server.start()
+    request.addfinalizer(server.stop)
+    return server

+ 7 - 3
ydb/library/yql/tests/common/test_framework/yql_utils.py

@@ -10,7 +10,7 @@ import re
 import tempfile
 import shutil
 
-from collections import namedtuple, defaultdict
+from collections import namedtuple, defaultdict, OrderedDict
 from functools import partial
 import codecs
 import decimal
@@ -801,8 +801,8 @@ def normalize_table_yson(y):
     if isinstance(y, list):
         return [normalize_table_yson(i) for i in y]
     if isinstance(y, dict):
-        normDict = dict()
-        for k, v in six.iteritems(y):
+        normDict = OrderedDict()
+        for k, v in sorted(six.iteritems(y), key=lambda x: x[0], reverse=True):
             if k == "_other":
                 normDict[normalize_table_yson(k)] = sorted(normalize_table_yson(v))
             elif v != "Void" and v is not None and not isinstance(v, YsonEntity):
@@ -811,6 +811,10 @@ def normalize_table_yson(y):
     return y
 
 
+def dump_table_yson(res_yson):
+    return cyson.dumps(sorted(normalize_table_yson(cyson.loads('[' + res_yson + ']'))), format="pretty")
+
+
 def normalize_source_code_path(s):
     # remove contrib/
     s = re.sub(r'\b(contrib/)(ydb/library/yql.*)', r'\2', s)

+ 0 - 6
ydb/library/yql/tests/common/test_framework/yqlrun.py

@@ -1,5 +1,4 @@
 import os
-import pytest
 import shutil
 import yatest.common
 import yql_utils
@@ -244,11 +243,6 @@ class YQLRun(object):
         if run_sql and not self.use_sql2yql:
             cmd += '--sql '
 
-        if yql_utils.get_param('MULTIRUN'):
-            if '/* multirun can not */' in yql_program:
-                pytest.skip('multirun can not execute this')
-            cmd += '-M %s ' % yql_utils.get_param('MULTIRUN')
-
         if parameters:
             parameters_file = res_file_path('params.yson')
             with open(parameters_file, 'w') as f:

+ 10 - 3
ydb/library/yql/tests/s-expressions/common_file.py

@@ -1,3 +1,5 @@
+import codecs
+import cyson
 import os
 import re
 import pytest
@@ -22,9 +24,6 @@ def get_block_gateways_config():
 
 def yqlrun_yt_results(provider, prepare, suite, case, config):
     if (suite, case) not in yqlrun_yt_results.cache:
-        if 'ViewWithUdfProcess' in case:
-            pytest.skip('ScriptUdf')
-
         if provider not in get_supported_providers(config):
             pytest.skip('%s provider is not supported here' % provider)
 
@@ -38,6 +37,14 @@ def yqlrun_yt_results(provider, prepare, suite, case, config):
         in_tables, out_tables = get_tables(suite, config, DATA_PATH, def_attr=KSV_ATTR)
         files = get_files(suite, config, DATA_PATH)
 
+        for table in in_tables:
+            if cyson.loads(table.attr).get("type") == "document":
+                content = table.content
+            else:
+                content = table.attr
+            if 'Python' in content or 'Javascript' in content:
+                pytest.skip('ScriptUdf')
+
         yqlrun = YQLRun(prov=provider, keep_temp=True, udfs_dir=yql_binary_path('ydb/library/yql/tests/common/test_framework/udfs_deps'))
         res, tables_res = execute(
             yqlrun,

+ 48 - 0
ydb/library/yql/tests/sql/dq_file.make

@@ -0,0 +1,48 @@
+PY2TEST()
+
+TEST_SRCS(
+    test.py
+)
+
+IF (SANITIZER_TYPE OR WITH_VALGRIND)
+    TIMEOUT(1800)
+    SIZE(LARGE)
+    TAG(ya:fat sb:ttl=2)
+ELSE()
+    TIMEOUT(600)
+    SIZE(MEDIUM)
+    TAG(sb:ttl=2)
+ENDIF()
+
+FORK_TESTS()
+FORK_SUBTESTS()
+SPLIT_FACTOR(10)
+
+DEPENDS(
+    ydb/library/yql/tools/astdiff
+    ydb/library/yql/tools/dqrun
+    ydb/library/yql/tools/yqlrun
+    ydb/library/yql/tests/common/test_framework/udfs_deps
+)
+DATA(
+    arcadia/ydb/library/yql/tests/sql # python files
+    arcadia/ydb/library/yql/mount
+    arcadia/ydb/library/yql/cfg/tests
+)
+PEERDIR(
+    ydb/library/yql/tests/common/test_framework
+    library/python/testing/swag/lib
+)
+
+NO_CHECK_IMPORTS()
+
+REQUIREMENTS(
+    ram:32
+)
+
+IF (SANITIZER_TYPE == "memory")
+    TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
+

+ 3 - 14
ydb/library/yql/tests/sql/dq_file.py

@@ -4,13 +4,11 @@ import pytest
 import re
 import json
 import yql_utils
-from yt import yson
-from yt.yson.convert import yson_to_json
 import cyson
 
 import yatest.common
 from yql_utils import execute_sql, get_tables, get_files, get_http_files, replace_vals, get_supported_providers, \
-    KSV_ATTR, yql_binary_path, is_xfail, is_skip_forceblocks, get_param, normalize_source_code_path, normalize_table_yson, \
+    KSV_ATTR, yql_binary_path, is_xfail, is_skip_forceblocks, get_param, normalize_source_code_path, dump_table_yson, \
     get_gateway_cfg_suffix, do_custom_query_check
 from yqlrun import YQLRun
 
@@ -22,8 +20,6 @@ DQRUN_PATH = yql_binary_path('ydb/library/yql/tools/dqrun/dqrun')
 
 
 def run_test(suite, case, cfg, tmpdir, what, yql_http_file_server):
-    if get_param('MULTIRUN'):
-        pytest.skip('multirun can not execute this')
     if get_param('SQL_FLAGS'):
         if what == 'Debug' or what == 'Plan':
             pytest.skip('SKIP')
@@ -99,18 +95,11 @@ def run_test(suite, case, cfg, tmpdir, what, yql_http_file_server):
                     '%(dq_result_name)s result:\n %(dq_res_yson)s\n\n' \
                     '%(yqlrun_result_name)s result:\n %(yqlrun_res_yson)s\n' % locals()
 
-                # Compare output tables
-                def dumpJson(res_yson):
-                    return json.dumps(
-                        yson_to_json(sorted(normalize_table_yson(cyson.loads('[' + res_yson + ']')))),
-                        sort_keys=True,
-                        ensure_ascii=False)
-
                 for table in yqlrun_tables_res:
                     assert table in tables_res
 
-                    yqlrun_table_yson = dumpJson(yqlrun_tables_res[table].content)
-                    dq_table_yson = dumpJson(tables_res[table].content)
+                    yqlrun_table_yson = dump_table_yson(yqlrun_tables_res[table].content)
+                    dq_table_yson = dump_table_yson(tables_res[table].content)
 
                     assert yqlrun_table_yson == dq_table_yson, \
                         'OUT_TABLE_DIFFER: %(table)s\n' \

Some files were not shown because too many files changed in this diff