Browse Source

Implement tpc benchmarks as tests (#8125)

Whompe 6 months ago
parent
commit
a20c2251f5

+ 159 - 0
ydb/library/benchmarks/runner/run_tests/run_tests.py

@@ -0,0 +1,159 @@
+import argparse
+import subprocess
+import pathlib
+import os
+from sys import stderr
+
+
+def variant(string):
+    if string not in ["h", "ds"]:
+        raise ValueError("variant must be h or ds")
+    return string
+
+
+def paths(string):
+    return list(map(pathlib.Path, string.split(";")))
+
+
+def parse_args():
+    subparser = argparse.ArgumentParser()
+
+    subparser.add_argument('--is-test', action="store_true", default=False)
+
+    subparser.add_argument('--datasize', type=int, default=1)
+    subparser.add_argument('--variant', type=variant, default='h')
+    subparser.add_argument('--tasks', type=int, default=1)
+
+    subparser.add_argument('-o', '--output', default="./results")
+    subparser.add_argument('--clean-old', action="store_true", default=False)
+    subparser.add_argument('--query-filter', action="append", default=[])
+
+    args, argv = subparser.parse_known_args()
+
+    if args.is_test:
+        parser = argparse.ArgumentParser()
+
+        parser.add_argument('--dqrun', type=pathlib.Path)
+        parser.add_argument('--gen-queries', type=pathlib.Path)
+        parser.add_argument('--downloaders-dir', type=pathlib.Path)
+        parser.add_argument('--udfs-dir', type=paths)
+        parser.add_argument('--fs-cfg', type=pathlib.Path)
+        parser.add_argument('--flame-graph', type=pathlib.Path)
+        parser.add_argument('--result-compare', type=pathlib.Path)
+        parser.add_argument('--gateways-cfg', type=pathlib.Path)
+        parser.add_argument('--runner-path', type=pathlib.Path)
+
+        return parser.parse_args(argv, namespace=args)
+    else:
+        parser = argparse.ArgumentParser()
+
+        parser.add_argument('--ydb-root', type=lambda path: pathlib.Path(path).resolve(), default="../../../../")
+
+        args = parser.parse_args(argv, namespace=args)
+
+        args.dqrun = args.ydb_root / "ydb" / "library" / "yql" / "tools" / "dqrun" / "dqrun"
+        args.gen_queries = args.ydb_root / "ydb" / "library" / "benchmarks" / "gen_queries" / "gen_queries"
+        args.downloaders_dir = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner"
+        args.fs_cfg = args.ydb_root / "ydb" / "library" / "yql" / "tools" / "dqrun" / "examples" / "fs.conf"
+        args.flame_graph = args.ydb_root / "contrib" / "tools" / "flame-graph"
+        args.result_compare = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "result_compare" / "result_compare"
+        args.gateways_cfg = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "runner" / "test-gateways.conf"
+        args.runner_path = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "runner" / "runner"
+
+        udfs_prefix = args.ydb_root / "ydb" / "library" / "yql" / "udfs" / "common"
+        args.udfs_dir = [udfs_prefix / name for name in ["set", "url_base", "datetime2", "re2", "math", "unicode_base"]]
+
+        return args
+
+
+class Runner:
+    def prepare_queries_dir(self, custom_pragmas):
+        print("Preparing queries...", file=stderr)
+        self.queries_dir.mkdir(parents=True, exist_ok=True)
+        cmd = [str(self.args.gen_queries)]
+        cmd += ["--output", f"{self.queries_dir}"]
+        cmd += ["--variant", f"{self.args.variant}"]
+        cmd += ["--syntax", "yql"]
+        cmd += ["--dataset-size", f"{self.args.datasize}"]
+        for it in custom_pragmas:
+            cmd += ["--pragma", it]
+        res = subprocess.run(cmd)
+        if res.returncode != 0:
+            raise OSError("Failed to prepare queries")
+
+    def prepare_tpc_dir(self):
+        print("Preparing tpc...", file=stderr)
+        cmd = [f"./download_files_{self.args.variant}_{self.args.datasize}.sh"]
+        res = subprocess.run(cmd, cwd=self.args.downloaders_dir)
+        if res.returncode != 0:
+            raise OSError("Failed to prepare tpc")
+
+    def __init__(self, args, enable_spilling):
+        self.args = args
+        self.enable_spilling = enable_spilling
+
+        self.queries_dir = pathlib.Path(f"queries{"+" if self.enable_spilling else "-"}spilling-{args.datasize}-{args.tasks}")
+        if self.args.clean_old or not self.queries_dir.exists():
+            self.prepare_queries_dir([
+                f"dq.MaxTasksPerStage={self.args.tasks}",
+                "dq.OptLLVM=ON"
+            ] + [
+                "dq.UseFinalizeByKey=true",
+                "dq.EnableSpillingNodes=All",
+            ] if self.enable_spilling else [])
+        self.tpc_dir = pathlib.Path(f"{self.args.downloaders_dir}/tpc/{self.args.variant}/{self.args.datasize}")
+        if self.args.clean_old or not self.tpc_dir.exists():
+            self.prepare_tpc_dir()
+        if not pathlib.Path("./tpc").exists():
+            os.symlink(f"{self.args.downloaders_dir}/tpc", f"{pathlib.Path("./tpc")}", target_is_directory=True)
+
+        self.result_dir = pathlib.Path(f"{self.args.output}/{"with" if self.enable_spilling else "no"}-spilling/{args.variant}-{args.datasize}-{args.tasks}").resolve()
+        self.result_dir.mkdir(parents=True, exist_ok=True)
+
+    def run(self):
+        cmd = ["/usr/bin/time", f"{str(self.args.runner_path)}"]
+        # cmd += ["--perf"]
+        for it in self.args.query_filter:
+            cmd += ["--include-q", it]
+        cmd += ["--query-dir", f"{str(self.queries_dir)}/{self.args.variant}"]
+        cmd += ["--bindings", f"{str(self.queries_dir)}/{self.args.variant}/bindings.json"]
+        cmd += ["--result-dir", str(self.result_dir)]
+        cmd += ["--flame-graph", str(self.args.flame_graph)]
+        cmd += [f"{self.args.dqrun}", "-s"]
+        cmd += ["--enable-spilling"] if self.enable_spilling else []
+        cmd += ["--udfs-dir", ";".join(map(str, self.args.udfs_dir))]
+        cmd += ["--fs-cfg", f"{str(self.args.fs_cfg)}"]
+        cmd += ["--gateways-cfg", f"{str(self.args.gateways_cfg)}"]
+        print("Running runner...", file=stderr)
+        subprocess.run(cmd)
+
+        print("Run results at: ", self.result_dir)
+        return self.result_dir
+
+
+def result_compare(args, to_compare):
+    print("Comparing...")
+    cmd = [f"{args.result_compare}"]
+    cmd += ["-v"]
+    cmd += to_compare
+    with open(f"{args.output}/result-{args.variant}-{args.datasize}-{args.tasks}.htm", "w") as result_table:
+        res = subprocess.run(cmd, stdout=result_table)
+    if res.returncode != 0:
+        raise OSError("Failed to compare result")
+
+
+def main():
+    args = parse_args()
+
+    results = []
+    print("With spilling...", file=stderr)
+    results.append(Runner(args, True).run())
+    print("No spilling...", file=stderr)
+    results.append(Runner(args, False).run())
+
+    if not args.is_test:
+        result_compare(args, results)
+
+
+if __name__ == "__main__":
+    main()

+ 10 - 0
ydb/library/benchmarks/runner/run_tests/ya.make

@@ -0,0 +1,10 @@
+PY3_PROGRAM()
+
+PY_SRCS(
+    MAIN run_tests.py
+)
+
+PEERDIR(
+)
+
+END()

+ 22 - 18
ydb/library/benchmarks/runner/runner/runner.py

@@ -27,7 +27,7 @@ def run(argv, out, err, timeout=30*60, hard_timeout=5):
     oldmask = signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGCHLD})
     try:
         start_time = time_ns()
-        pid = os.posix_spawn(argv[0], argv, os.environ, setsigmask=oldmask, file_actions=(
+        pid = os.posix_spawnp(argv[0], argv, os.environ, setsigmask=oldmask, file_actions=(
             ([(os.POSIX_SPAWN_OPEN, 1, out, os.O_WRONLY | os.O_CREAT, 0o666)] if out else []) +
             ([(os.POSIX_SPAWN_OPEN, 2, err, os.O_WRONLY | os.O_CREAT, 0o666)] if err else [])
             ))
@@ -65,25 +65,25 @@ def run(argv, out, err, timeout=30*60, hard_timeout=5):
 
 
 def main():
-
     parser = argparse.ArgumentParser()
     parser.add_argument('--query-dir', type=str, default='q/scalar')
     parser.add_argument('--bindings', type=str, default='bindings.json')
-    parser.add_argument('--result-dir', type=str, default="result-{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()))
+    parser.add_argument('--result-dir', type=Path, default="result-{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()))
     parser.add_argument('--timeout', type=int, default=30*60)
     parser.add_argument('--perf', action='store_true')
-    parser.add_argument('--arc-path', type=str, default='{}/arcadia'.format(os.environ['HOME']))
+    parser.add_argument('--flame-graph', type=Path, default=None)
     parser.add_argument('--include-q', default=[], action='append')
     parser.add_argument('--exclude-q', default=[], action='append')
+
     args, argv = parser.parse_known_intermixed_args()
+
     qdir = args.query_dir
     bindings = args.bindings
     outdir = args.result_dir
     assert len(argv)
     querydir = Path(qdir)
-    os.makedirs(outdir + '/' + qdir, exist_ok=True)
-    with open(outdir + '/' + qdir + "/summary.tsv", "w") as outf, \
-         open(outdir + '/' + qdir + "/summary.json", "w") as outj:
+    with open(outdir / "summary.tsv", "w") as outf, \
+         open(outdir / "summary.json", "w") as outj:
         print(' '.join(argv + ['-p', qdir, '--bindings-file', bindings]), file=outf)
         print(json.dumps({
             'cmdline': argv,
@@ -92,12 +92,13 @@ def main():
             'version': 100
         }), file=outj)
         for query in sorted(querydir.glob('**/*.sql'), key=lambda x: tuple(map(lambda y: int(y) if re.match(RE_DIGITS, y) else y, re.split(RE_DIGITS, str(x))))):
-            q = str(query)
-            name = outdir + '/' + q
+            q = str(query.stem)
+            print(f"{q}", end="", flush=True)
+            name = str(outdir / q)
             if len(args.include_q):
                 include = False
                 for r in args.include_q:
-                    if re.search(r, name):
+                    if re.search(r, str(query)):
                         include = True
                         break
                 if not include:
@@ -105,13 +106,14 @@ def main():
             if len(args.exclude_q):
                 include = True
                 for r in args.exclude_q:
-                    if re.search(r, name):
+                    if re.search(r, str(query)):
                         include = False
                         break
                 if not include:
                     continue
             print(q, end='\t', file=outf)
             outname = name + '-result.yson'
+            print(".", end="", flush=True)
             exitcode, rusage, elapsed, iostat = run(
                 argv + [
                     '--result-file', outname,
@@ -120,7 +122,7 @@ def main():
                     '--err-file', name + '-err.txt',
                     '--expr-file', name + '-expr.txt',
                     '--stat', name + '-stat.yson',
-                    '-p', q
+                    '-p', str(query)
                 ],
                 name + '-stdout.txt',
                 name + '-stderr.txt',
@@ -164,25 +166,27 @@ def main():
                 }
             }), file=outj)
             outj.flush()
+            print(".", end="", flush=True)
             if args.perf:
                 exitcode, rusage, elapsed, iostat = run(
-                    ['{}/ya'.format(args.arc_path), 'tool', 'perf', 'record', '-F250', '-g', '--call-graph', 'dwarf', '-o', '{}/perf.data'.format(outdir), '--'] +
+                    ['perf', 'record', '-F250', '-g', '--call-graph', 'dwarf', '-o', '{}/perf.data'.format(outdir), '--'] +
                     argv + [
                         '--result-file', '/dev/null',
                         '--bindings-file', bindings,
                         '--plan-file', '/dev/null',
                         '--err-file', '/dev/null',
                         '--expr-file', '/dev/null',
-                        '-p', q
+                        '-p', str(query)
                     ],
                     name + '-stdout-perf.txt',
                     name + '-stderr-perf.txt',
                     timeout=args.timeout)
                 os.system('''
-                {0}/ya tool perf script -i {2}/perf.data --header |
-                {0}/contrib/tools/flame-graph/stackcollapse-perf.pl |
-                {0}/contrib/tools/flame-graph/flamegraph.pl > {1}.svg
-                '''.format(args.arc_path, name, outdir))
+                perf script -i {2}/perf.data --header |
+                {0}/stackcollapse-perf.pl |
+                {0}/flamegraph.pl > {1}.svg
+                '''.format(args.flame_graph, name, outdir))
+            print(".", flush=True)
 
 
 if __name__ == "__main__":

+ 0 - 3
ydb/library/benchmarks/runner/runner/ya.make

@@ -4,7 +4,4 @@ PY_SRCS(
     MAIN runner.py
 )
 
-PEERDIR(
-)
-
 END()

+ 80 - 0
ydb/library/benchmarks/runner/tpc_tests.py

@@ -0,0 +1,80 @@
+import yatest.common
+import pathlib
+import sys
+import os
+
+
+class Runner:
+    DEPS = {
+        "run_tests" : "ydb/library/benchmarks/runner/run_tests",
+        "dqrun" : "ydb/library/yql/tools/dqrun",
+        "gen-queries" : "ydb/library/benchmarks/gen_queries",
+        "result-compare" : "ydb/library/benchmarks/runner/result_compare",
+        "runner" : "ydb/library/benchmarks/runner/runner"
+    }
+
+    DATA = {
+        "fs-cfg" : "ydb/library/yql/tools/dqrun/examples/fs.conf",
+        "gateways-cfg" : "ydb/library/benchmarks/runner/runner/test-gateways.conf",
+        "flame-graph" : "contrib/tools/flame-graph",
+        "downloaders-dir" : "ydb/library/benchmarks/runner",
+    }
+
+    UDFS = [
+        "ydb/library/yql/udfs/common/set",
+        "ydb/library/yql/udfs/common/url_base",
+        "ydb/library/yql/udfs/common/datetime2",
+        "ydb/library/yql/udfs/common/re2"
+    ]
+
+    def __init__(self):
+        self.deps = {name : pathlib.Path(yatest.common.binary_path(path)) for name, path in self.DEPS.items()}
+        self.udfs = [pathlib.Path(yatest.common.binary_path(path)) for path in self.UDFS]
+        self.data = {name : pathlib.Path(yatest.common.source_path(path)) for name, path in self.DATA.items() if name}
+        self.output = pathlib.Path(yatest.common.output_path())
+        self.results_path = self.output / "results"
+        self.results_path.mkdir()
+
+        self.cmd = [str(self.deps["run_tests"]) + "/run_tests", "--is-test"]
+        self.cmd += ["--dqrun", str(self.deps["dqrun"]) + "/dqrun"]
+        self.cmd += ["--gen-queries", str(self.deps["gen-queries"]) + "/gen_queries"]
+        self.cmd += ["--result-compare", str(self.deps["result-compare"]) + "/result_compare"]
+        self.cmd += ["--downloaders-dir", str(self.data["downloaders-dir"])]
+        self.cmd += ["--runner", str(self.deps["runner"]) + "/runner"]
+        self.cmd += ["--flame-graph", str(self.data["flame-graph"])]
+        self.cmd += ["--udfs-dir", ";".join(map(str, self.udfs))]
+        self.cmd += ["--fs-cfg", str(self.data["fs-cfg"])]
+        self.cmd += ["--gateways-cfg", str(self.data["gateways-cfg"])]
+        self.cmd += ["-o", str(self.results_path)]
+
+    def wrapped_run(self, variant, datasize, tasks, query_filter):
+        cmd = self.cmd
+        cmd += ["--variant", f"{variant}"]
+        cmd += ["--datasize", f"{datasize}"]
+        cmd += ["--tasks", f"{tasks}"]
+        cmd += ["--clean-old"]
+        if query_filter:
+            cmd += ["--query-filter", f"{query_filter}"]
+        yatest.common.execute(cmd, stdout=sys.stdout, stderr=sys.stderr)
+
+
+def upload(result_path, s3_folder):
+    uploader = pathlib.Path(yatest.common.source_path("ydb/library/benchmarks/runner/upload_results.py")).resolve()
+    cmd = ["python3", str(uploader)]
+    cmd += ["--result-path", str(result_path)]
+    cmd += ["--s3-folder", str(s3_folder)]
+    yatest.common.execute(cmd, stdout=sys.stdout, stderr=sys.stderr)
+
+
+def test_tpc():
+    is_ci = os.environ.get("PUBLIC_DIR") is not None
+
+    runner = Runner()
+    runner.wrapped_run("h", 1, 1, None)
+    result_path = runner.results_path.resolve()
+    print("Results path: ", result_path, file=sys.stderr)
+
+    if is_ci:
+        s3_folder = pathlib.Path(os.environ["PUBLIC_DIR"]).resolve()
+
+        upload(result_path, s3_folder)

+ 178 - 0
ydb/library/benchmarks/runner/upload_results.py

@@ -0,0 +1,178 @@
+import sys
+import ydb
+import shutil
+import json
+import pathlib
+import os
+import argparse
+import datetime
+
+
+DATABASE_ENDPOINT = "grpcs://lb.etnvsjbk7kh1jc6bbfi8.ydb.mdb.yandexcloud.net:2135"
+DATABASE_PATH = "/ru-central1/b1ggceeul2pkher8vhb6/etnvsjbk7kh1jc6bbfi8"
+
+
+class RunParams:
+    def __init__(self, is_spilling, variant, datasize, tasks, query):
+        self.is_spilling = is_spilling
+        self.variant = variant
+        self.datasize = datasize
+        self.tasks = tasks
+        self.query = query
+
+    def __repr__(self):
+        result = []
+        for key, value in self.__dict__.items():
+            result.append(f"{key}: {value}")
+        return "RunParams(" + ", ".join(result) + "})"
+
+
+class RunResults:
+    def __init__(self):
+        self.exitcode = None
+        self.read_bytes = None
+        self.write_bytes = None
+        self.user_time_ms = None
+        self.system_time = None
+        self.rss = None
+        self.output_hash = None
+        self.perf_file_path = None
+
+    def from_json(self, json):
+        self.exitcode = json["exitcode"]
+        io_info = json["io"]
+        self.read_bytes = io_info["read_bytes"]
+        self.write_bytes = io_info["write_bytes"]
+        resourse_usage = json["rusage"]
+        self.user_time = datetime.timedelta(seconds=resourse_usage["utime"])
+        self.system_time = datetime.timedelta(seconds=resourse_usage["stime"])
+        self.rss = resourse_usage["maxrss"]
+
+    def __repr__(self):
+        result = []
+        for key, value in self.__dict__.items():
+            result.append(f"{key}: {value}")
+        return "RunQueryData(" + ", ".join(result) + "})"
+
+
+def pretty_print(value):
+    if value is None:
+        return "NULL"
+    if type(value) == datetime.datetime:
+        delt = value - datetime.datetime(1970, 1, 1)
+        assert type(delt) == datetime.timedelta
+        return f"Unwrap(DateTime::FromSeconds({int(delt.total_seconds())}))"
+    if type(value) == datetime.timedelta:
+        return f"DateTime::IntervalFromMicroseconds({int(value / datetime.timedelta(microseconds=1))})"
+    if type(value) == str:
+        return f'\"{value}\"'
+    if type(value) in [int, float]:
+        return str(value)
+    if type(value) == bool:
+        return "TRUE" if value else "FALSE"
+
+    assert False, f"unrecognized type: {type(value)}"
+
+
+def upload_results(result_path, s3_folder, test_start):
+    results_map = {}
+    for entry in result_path.glob("*/*"):
+        if not entry.is_dir():
+            continue
+        this_result = {}
+        suffix = entry.relative_to(result_path)
+        # {no|with}-spilling/<variant>-<datasize>-<tasks>
+        is_spilling = suffix.parts[0].split("-")[0] == "with"
+        variant, datasize, tasks = suffix.parts[1].split("-")
+        datasize = int(datasize)
+        tasks = int(tasks)
+
+        for file in entry.iterdir():
+            if not file.is_file():
+                continue
+            name = file.name
+            if len(file.suffixes) > 0:
+                name = name.rsplit(file.suffixes[0])[0]
+            if name[0] == "q":
+                query_num = int(name[1:].split("-")[0])
+                if query_num not in this_result:
+                    this_result[query_num] = RunResults()
+
+                if file.suffix == ".svg":
+                    dst = file.relative_to(result_path)
+                    this_result[query_num].perf_file_path = dst
+                    # copying files to folder that will be synced with s3
+                    dst = (s3_folder / dst).resolve()
+                    dst.parent.mkdir(parents=True, exist_ok=True)
+                    _ = shutil.copy2(str(file.resolve()), str(dst))
+                # q<num>-stdout.txt
+                if file.stem == f"q{query_num}-stdout":
+                    with open(file, "r") as stdout:
+                        this_result[query_num].output_hash = str(hash(stdout.read().strip()))
+
+        summary_file = entry / "summary.json"
+
+        with open(summary_file, "r") as res_file:
+            for line in res_file.readlines()[1:]:
+                info = json.loads(line)
+                query_num = int(info["q"][1:])
+                this_result[query_num].from_json(info)
+
+        for key, value in this_result.items():
+            params = RunParams(is_spilling, variant, datasize, tasks, key)
+            results_map[params] = value
+
+    with ydb.Driver(
+        endpoint=DATABASE_ENDPOINT,
+        database=DATABASE_PATH,
+        credentials=ydb.credentials_from_env_variables()
+    ) as driver:
+        driver.wait(timeout=5)
+
+        session = ydb.retry_operation_sync(
+            lambda: driver.table_client.session().create()
+        )
+        for params, results in results_map.items():
+            with session.transaction() as tx:
+                mapping = {
+                    "BenchmarkType" : params.variant,
+                    "Scale" : params.datasize,
+                    "QueryNum" : params.query,
+                    "WithSpilling" : params.is_spilling,
+                    "Timestamp" : test_start,
+                    "WasSpillingInAggregation" : None,
+                    "WasSpillingInJoin" : None,
+                    "WasSpillingInChannels" : None,
+                    "MaxTasksPerStage" : params.tasks,
+                    "PerfFileLink" : results.perf_file_path,
+                    "ExitCode" : results.exitcode,
+                    "ResultHash" : results.output_hash,
+                    "SpilledBytes" : results.read_bytes,
+                    "UserTime" : results.user_time,
+                    "SystemTime" : results.system_time
+                }
+                sql = 'UPSERT INTO `perfomance/olap/dq_spilling_nightly_runs`\n\t({columns})\nVALUES\n\t({values})'.format(
+                    columns=", ".join(map(str, mapping.keys())),
+                    values=", ".join(map(pretty_print, mapping.values())))
+                tx.execute(sql, commit_tx=True)
+
+
+def main():
+    upload_time = datetime.datetime.now()
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--result-path", type=pathlib.Path)
+    parser.add_argument("--s3-folder", type=pathlib.Path)
+
+    args = parser.parse_args()
+
+    if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
+        raise AttributeError("Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping uploading")
+    os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ["CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"]
+
+    upload_results(args.result_path, args.s3_folder, upload_time)
+
+
+if __name__ == "__main__":
+    main()

+ 48 - 0
ydb/library/benchmarks/runner/ya.make

@@ -1,4 +1,52 @@
+PY3TEST()
+
+SIZE(LARGE)
+
+TAG(
+    ya:fat
+)
+
+TEST_SRCS(
+    tpc_tests.py
+)
+
+DEPENDS(
+    ydb/library/benchmarks/runner/run_tests
+    ydb/library/yql/tools/dqrun
+    ydb/library/benchmarks/gen_queries
+    ydb/library/benchmarks/runner/result_compare
+    ydb/library/benchmarks/runner/runner
+
+    ydb/library/yql/udfs/common/set
+    ydb/library/yql/udfs/common/url_base
+    ydb/library/yql/udfs/common/datetime2
+    ydb/library/yql/udfs/common/re2
+    ydb/library/yql/udfs/common/math
+    ydb/library/yql/udfs/common/unicode_base
+)
+
+DATA_FILES(
+    ydb/library/yql/tools/dqrun/examples/fs.conf
+    ydb/library/benchmarks/runner/runner/test-gateways.conf
+    contrib/tools/flame-graph
+
+    ydb/library/benchmarks/runner/download_lib.sh
+    ydb/library/benchmarks/runner/download_tables.sh
+    ydb/library/benchmarks/runner/download_tpcds_tables.sh
+    ydb/library/benchmarks/runner/download_files_ds_1.sh
+    ydb/library/benchmarks/runner/download_files_ds_10.sh
+    ydb/library/benchmarks/runner/download_files_ds_100.sh
+    ydb/library/benchmarks/runner/download_files_h_1.sh
+    ydb/library/benchmarks/runner/download_files_h_10.sh
+    ydb/library/benchmarks/runner/download_files_h_100.sh
+
+    ydb/library/benchmarks/runner/upload_results.py
+)
+
+END()
+
 RECURSE(
+    run_tests
     runner
     result_convert
     result_compare