Browse Source

Restoring authorship annotation for <deshevoy@yandex-team.ru>. Commit 2 of 2.

deshevoy 3 years ago
parent
commit
28148f76db

+ 1 - 1
build/platform/msvc/ya.make

@@ -13,7 +13,7 @@ ELSE()
 ENDIF()
 
 IF (CLANG_CL)
-    DECLARE_EXTERNAL_RESOURCE(MSVC_FOR_CLANG sbr:1383387533)  # Microsoft Visual C++ 2017 14.16.27023 (15.9.5) 
+    DECLARE_EXTERNAL_RESOURCE(MSVC_FOR_CLANG sbr:1383387533)  # Microsoft Visual C++ 2017 14.16.27023 (15.9.5)
 ENDIF()
 
 END()

+ 12 - 12
build/plugins/linker_script.py

@@ -1,12 +1,12 @@
-def onlinker_script(unit, *args): 
-    """ 
-        @usage: LINKER_SCRIPT(Files...) 
- 
-        Specify files to be used as a linker script 
-    """ 
-    for arg in args: 
-        if not arg.endswith(".ld") and not arg.endswith(".ld.in"): 
-            unit.message(['error', "Invalid linker script extension: {}".format(arg)]) 
-            return 
- 
-    unit.onglobal_srcs(list(args)) 
+def onlinker_script(unit, *args):
+    """
+        @usage: LINKER_SCRIPT(Files...)
+
+        Specify files to be used as a linker script
+    """
+    for arg in args:
+        if not arg.endswith(".ld") and not arg.endswith(".ld.in"):
+            unit.message(['error', "Invalid linker script extension: {}".format(arg)])
+            return
+
+    unit.onglobal_srcs(list(args))

+ 61 - 61
build/plugins/pybuild.py

@@ -35,19 +35,19 @@ def uniq_suffix(path, unit):
         return ''
     return '.{}'.format(pathid(path)[:4])
 
-def pb2_arg(suf, path, mod, unit): 
-    return '{path}__int__{suf}={mod}{modsuf}'.format( 
-        path=stripext(to_build_root(path, unit)), 
-        suf=suf, 
-        mod=mod, 
-        modsuf=stripext(suf) 
-    ) 
+def pb2_arg(suf, path, mod, unit):
+    return '{path}__int__{suf}={mod}{modsuf}'.format(
+        path=stripext(to_build_root(path, unit)),
+        suf=suf,
+        mod=mod,
+        modsuf=stripext(suf)
+    )
 
 def proto_arg(path, mod, unit):
     return '{}.proto={}'.format(stripext(to_build_root(path, unit)), mod)
 
-def pb_cc_arg(suf, path, unit): 
-    return '{}{suf}'.format(stripext(to_build_root(path, unit)), suf=suf) 
+def pb_cc_arg(suf, path, unit):
+    return '{}{suf}'.format(stripext(to_build_root(path, unit)), suf=suf)
 
 def ev_cc_arg(path, unit):
     return '{}.ev.pb.cc'.format(stripext(to_build_root(path, unit)))
@@ -143,23 +143,23 @@ def add_python_lint_checks(unit, py_ver, files):
         unit.onadd_check(["flake8.py{}".format(py_ver), flake8_cfg] + resolved_files)
 
 
-def is_py3(unit): 
-    return unit.get("PYTHON3") == "yes" 
- 
- 
+def is_py3(unit):
+    return unit.get("PYTHON3") == "yes"
+
+
 def on_py_program(unit, *args):
     py_program(unit, is_py3(unit))
 
 
-def py_program(unit, py3): 
+def py_program(unit, py3):
     """
     Documentation: https://wiki.yandex-team.ru/devtools/commandsandvars/py_srcs/#modulpyprogramimakrospymain
     """
-    if py3: 
+    if py3:
         peers = ['library/python/runtime_py3/main']
         if unit.get('PYTHON_SQLITE3') != 'no':
             peers.append('contrib/tools/python3/src/Modules/_sqlite')
-    else: 
+    else:
         peers = ['library/python/runtime/main']
         if unit.get('PYTHON_SQLITE3') != 'no':
             peers.append('contrib/tools/python/src/Modules/_sqlite')
@@ -193,7 +193,7 @@ def onpy_srcs(unit, *args):
     # and "modname" will be used as a module name.
 
     upath = unit.path()[3:]
-    py3 = is_py3(unit) 
+    py3 = is_py3(unit)
     py_main_only = unit.get('PROCESS_PY_MAIN_ONLY')
     with_py = not unit.get('PYBUILD_NO_PY')
     with_pyc = not unit.get('PYBUILD_NO_PYC')
@@ -207,7 +207,7 @@ def onpy_srcs(unit, *args):
 
     unit_needs_main = unit.get('MODULE_TYPE') in ('PROGRAM', 'DLL')
     if unit_needs_main:
-        py_program(unit, py3) 
+        py_program(unit, py3)
 
     py_namespace_value = unit.get('PY_NAMESPACE_VALUE')
     if py_namespace_value == ".":
@@ -285,7 +285,7 @@ def onpy_srcs(unit, *args):
                 arg = next(args)
 
             if '=' in arg:
-                main_py = False 
+                main_py = False
                 path, mod = arg.split('=', 1)
             else:
                 if trim:
@@ -295,7 +295,7 @@ def onpy_srcs(unit, *args):
                     path = '{}.proto'.format(arg[:-9])
                 else:
                     path = arg
-                main_py = (path == '__main__.py' or path.endswith('/__main__.py')) 
+                main_py = (path == '__main__.py' or path.endswith('/__main__.py'))
                 if not py3 and unit_needs_main and main_py:
                     mod = '__main__'
                 else:
@@ -313,9 +313,9 @@ def onpy_srcs(unit, *args):
                     mod = ns + mod_name
 
             if main_mod:
-                py_main(unit, mod + ":main") 
+                py_main(unit, mod + ":main")
             elif py3 and unit_needs_main and main_py:
-                py_main(unit, mod) 
+                py_main(unit, mod)
 
             if py_main_only:
                 continue
@@ -406,7 +406,7 @@ def onpy_srcs(unit, *args):
                 ] + cython_directives
 
                 cython(cython_args)
-                py_register(unit, mod, py3) 
+                py_register(unit, mod, py3)
                 process_pyx(filename, path, out_suffix, noext)
 
         if files2res:
@@ -440,9 +440,9 @@ def onpy_srcs(unit, *args):
 
         res = []
 
-        if py3: 
+        if py3:
             mod_list_md5 = md5()
-            for path, mod in pys: 
+            for path, mod in pys:
                 mod_list_md5.update(mod)
                 if not (venv and is_extended_source_search_enabled(path, unit)):
                     dest = 'py/' + mod.replace('.', '/') + '.py'
@@ -463,11 +463,11 @@ def onpy_srcs(unit, *args):
                     ns_res += ['-', '{}="{}"'.format(key, namespaces)]
                 unit.onresource(ns_res)
 
-            unit.onresource_files(res) 
+            unit.onresource_files(res)
             add_python_lint_checks(unit, 3, [path for path, mod in pys] + unit.get(['_PY_EXTRA_LINT_FILES_VALUE']).split())
-        else: 
-            for path, mod in pys: 
-                root_rel_path = rootrel_arc_src(path, unit) 
+        else:
+            for path, mod in pys:
+                root_rel_path = rootrel_arc_src(path, unit)
                 if with_py:
                     key = '/py_modules/' + mod
                     res += [
@@ -480,9 +480,9 @@ def onpy_srcs(unit, *args):
                     unit.on_py_compile_bytecode([root_rel_path + '-', src, dst])
                     res += [dst + '.yapyc', '/py_code/' + mod]
 
-            unit.onresource(res) 
+            unit.onresource(res)
             add_python_lint_checks(unit, 2, [path for path, mod in pys] + unit.get(['_PY_EXTRA_LINT_FILES_VALUE']).split())
- 
+
     use_vanilla_protoc = unit.get('USE_VANILLA_PROTOC') == 'yes'
     if use_vanilla_protoc:
         cpp_runtime_path = 'contrib/libs/protobuf_std'
@@ -497,15 +497,15 @@ def onpy_srcs(unit, *args):
         if not upath.startswith(py_runtime_path) and not upath.startswith(builtin_proto_path):
             unit.onpeerdir(py_runtime_path)
 
-        unit.onpeerdir(unit.get("PY_PROTO_DEPS").split()) 
+        unit.onpeerdir(unit.get("PY_PROTO_DEPS").split())
 
         proto_paths = [path for path, mod in protos]
         unit.on_generate_py_protos_internal(proto_paths)
-        unit.onpy_srcs([ 
-            pb2_arg(py_suf, path, mod, unit) 
-            for path, mod in protos 
-            for py_suf in unit.get("PY_PROTO_SUFFIXES").split() 
-        ]) 
+        unit.onpy_srcs([
+            pb2_arg(py_suf, path, mod, unit)
+            for path, mod in protos
+            for py_suf in unit.get("PY_PROTO_SUFFIXES").split()
+        ])
 
         if optimize_proto and need_gazetteer_peerdir:
             unit.onpeerdir(['kernel/gazetteer/proto'])
@@ -531,28 +531,28 @@ def _check_test_srcs(*args):
 
 def ontest_srcs(unit, *args):
     _check_test_srcs(*args)
-    if unit.get('PY3TEST_BIN' if is_py3(unit) else 'PYTEST_BIN') != 'no': 
+    if unit.get('PY3TEST_BIN' if is_py3(unit) else 'PYTEST_BIN') != 'no':
         unit.onpy_srcs(["NAMESPACE", "__tests__"] + list(args))
 
 
-def onpy_doctests(unit, *args): 
+def onpy_doctests(unit, *args):
     """
     @usage PY_DOCTEST(Packages...)
 
     Add to the test doctests for specified Python packages
     The packages should be part of a test (listed as sources of the test or its PEERDIRs).
     """
-    if unit.get('PY3TEST_BIN' if is_py3(unit) else 'PYTEST_BIN') != 'no': 
-        unit.onresource(['-', 'PY_DOCTEST_PACKAGES="{}"'.format(' '.join(args))]) 
- 
- 
-def py_register(unit, func, py3): 
-    if py3: 
-        unit.on_py3_register([func]) 
-    else: 
-        unit.on_py_register([func]) 
- 
- 
+    if unit.get('PY3TEST_BIN' if is_py3(unit) else 'PYTEST_BIN') != 'no':
+        unit.onresource(['-', 'PY_DOCTEST_PACKAGES="{}"'.format(' '.join(args))])
+
+
+def py_register(unit, func, py3):
+    if py3:
+        unit.on_py3_register([func])
+    else:
+        unit.on_py_register([func])
+
+
 def onpy_register(unit, *args):
     """
     @usage: PY_REGISTER([package.]module_name)
@@ -569,28 +569,28 @@ def onpy_register(unit, *args):
     Documentation: https://wiki.yandex-team.ru/arcadia/python/pysrcs/#makrospyregister
     """
 
-    py3 = is_py3(unit) 
+    py3 = is_py3(unit)
 
-    for name in args: 
+    for name in args:
         assert '=' not in name, name
         py_register(unit, name, py3)
         if '.' in name:
             shortname = name.rsplit('.', 1)[1]
-            if py3: 
+            if py3:
                 unit.oncflags(['-DPyInit_{}=PyInit_{}'.format(shortname, mangle(name))])
-            else: 
+            else:
                 unit.oncflags(['-Dinit{}=init{}'.format(shortname, mangle(name))])
- 
- 
-def py_main(unit, arg): 
+
+
+def py_main(unit, arg):
     if unit.get('IGNORE_PY_MAIN'):
         return
     unit_needs_main = unit.get('MODULE_TYPE') in ('PROGRAM', 'DLL')
     if unit_needs_main:
         py_program(unit, is_py3(unit))
-    unit.onresource(['-', 'PY_MAIN={}'.format(arg)]) 
- 
- 
+    unit.onresource(['-', 'PY_MAIN={}'.format(arg)])
+
+
 def onpy_main(unit, arg):
     """
         @usage: PY_MAIN(package.module[:func])
@@ -605,7 +605,7 @@ def onpy_main(unit, arg):
     if ':' not in arg:
         arg += ':main'
 
-    py_main(unit, arg) 
+    py_main(unit, arg)
 
 
 def onpy_constructor(unit, arg):

+ 4 - 4
build/plugins/ytest.py

@@ -38,10 +38,10 @@ PROJECT_TIDY_CONFIG_MAP_PATH = "build/yandex_specific/config/clang_tidy/tidy_pro
 
 tidy_config_map = None
 
-def ontest_data(unit, *args): 
-    ymake.report_configure_error("TEST_DATA is removed in favour of DATA") 
- 
- 
+def ontest_data(unit, *args):
+    ymake.report_configure_error("TEST_DATA is removed in favour of DATA")
+
+
 def save_in_file(filepath, data):
     if filepath:
         with open(filepath, 'a') as file_handler:

+ 221 - 221
build/scripts/fetch_from.py

@@ -1,25 +1,25 @@
 import datetime as dt
 import errno
-import hashlib 
+import hashlib
 import json
 import logging
 import os
 import platform
-import random 
+import random
 import shutil
 import socket
-import string 
-import sys 
+import string
+import sys
 import tarfile
 import urllib2
- 
+
 import retry
- 
- 
-def make_user_agent(): 
-    return 'fetch_from: {host}'.format(host=socket.gethostname()) 
- 
- 
+
+
+def make_user_agent():
+    return 'fetch_from: {host}'.format(host=socket.gethostname())
+
+
 def add_common_arguments(parser):
     parser.add_argument('--copy-to')  # used by jbuild in fetch_resource
     parser.add_argument('--rename-to')  # used by test_node in inject_mds_resource_to_graph
@@ -30,32 +30,32 @@ def add_common_arguments(parser):
     parser.add_argument('--log-path')
     parser.add_argument('-v', '--verbose', action='store_true', default=os.environ.get('YA_VERBOSE_FETCHER'), help='increase stderr verbosity')
     parser.add_argument('outputs', nargs='*', default=[])
- 
- 
+
+
 def ensure_dir(path):
     if not (path == '' or os.path.isdir(path)):
         os.makedirs(path)
 
 
 # Reference code: library/python/fs/__init__.py
-def hardlink_or_copy(src, dst): 
+def hardlink_or_copy(src, dst):
     ensure_dir(os.path.dirname(dst))
 
-    if os.name == 'nt': 
-        shutil.copy(src, dst) 
-    else: 
-        try: 
-            os.link(src, dst) 
-        except OSError as e: 
-            if e.errno == errno.EEXIST: 
-                return 
+    if os.name == 'nt':
+        shutil.copy(src, dst)
+    else:
+        try:
+            os.link(src, dst)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                return
             elif e.errno in (errno.EXDEV, errno.EMLINK, errno.EINVAL, errno.EACCES):
                 sys.stderr.write("Can't make hardlink (errno={}) - fallback to copy: {} -> {}\n".format(e.errno, src, dst))
-                shutil.copy(src, dst) 
-            else: 
-                raise 
- 
- 
+                shutil.copy(src, dst)
+            else:
+                raise
+
+
 def rename_or_copy_and_remove(src, dst):
     ensure_dir(os.path.dirname(dst))
 
@@ -66,30 +66,30 @@ def rename_or_copy_and_remove(src, dst):
         os.remove(src)
 
 
-class BadChecksumFetchError(Exception): 
-    pass 
- 
- 
-class IncompleteFetchError(Exception): 
-    pass 
- 
- 
-class ResourceUnpackingError(Exception): 
-    pass 
- 
- 
-class ResourceIsDirectoryError(Exception): 
-    pass 
- 
- 
-class OutputIsDirectoryError(Exception): 
-    pass 
- 
- 
-class OutputNotExistError(Exception): 
-    pass 
- 
- 
+class BadChecksumFetchError(Exception):
+    pass
+
+
+class IncompleteFetchError(Exception):
+    pass
+
+
+class ResourceUnpackingError(Exception):
+    pass
+
+
+class ResourceIsDirectoryError(Exception):
+    pass
+
+
+class OutputIsDirectoryError(Exception):
+    pass
+
+
+class OutputNotExistError(Exception):
+    pass
+
+
 def setup_logging(args, base_name):
     def makedirs(path):
         try:
@@ -109,11 +109,11 @@ def setup_logging(args, base_name):
         logging.getLogger().addHandler(logging.StreamHandler(sys.stderr))
 
 
-def is_temporary(e): 
- 
+def is_temporary(e):
+
     def is_broken(e):
         return isinstance(e, urllib2.HTTPError) and e.code in (410, 404)
- 
+
     if is_broken(e):
         return False
 
@@ -125,98 +125,98 @@ def is_temporary(e):
     return error.is_temporary_error(e)
 
 
-def uniq_string_generator(size=6, chars=string.ascii_lowercase + string.digits): 
-    return ''.join(random.choice(chars) for _ in range(size)) 
- 
- 
-def report_to_snowden(value): 
-    def inner(): 
-        body = { 
-            'namespace': 'ygg', 
-            'key': 'fetch-from-sandbox', 
-            'value': json.dumps(value), 
-        } 
- 
-        urllib2.urlopen( 
-            'https://back-snowden.qloud.yandex-team.ru/report/add', 
-            json.dumps([body, ]), 
-            timeout=5, 
-        ) 
- 
-    try: 
-        inner() 
-    except Exception as e: 
+def uniq_string_generator(size=6, chars=string.ascii_lowercase + string.digits):
+    return ''.join(random.choice(chars) for _ in range(size))
+
+
+def report_to_snowden(value):
+    def inner():
+        body = {
+            'namespace': 'ygg',
+            'key': 'fetch-from-sandbox',
+            'value': json.dumps(value),
+        }
+
+        urllib2.urlopen(
+            'https://back-snowden.qloud.yandex-team.ru/report/add',
+            json.dumps([body, ]),
+            timeout=5,
+        )
+
+    try:
+        inner()
+    except Exception as e:
         logging.warning('report_to_snowden failed: %s', e)
- 
- 
-def copy_stream(read, *writers, **kwargs): 
-    chunk_size = kwargs.get('size', 1024*1024) 
-    while True: 
-        data = read(chunk_size) 
-        if not data: 
-            break 
-        for write in writers: 
-            write(data) 
- 
- 
-def md5file(fname): 
-    res = hashlib.md5() 
-    with open(fname, 'rb') as f: 
-        copy_stream(f.read, res.update) 
-    return res.hexdigest() 
- 
- 
-def git_like_hash_with_size(filepath): 
-    """ 
-    Calculate git like hash for path 
-    """ 
-    sha = hashlib.sha1() 
- 
-    file_size = 0 
- 
-    with open(filepath, 'rb') as f: 
-        while True: 
-            block = f.read(2 ** 16) 
- 
-            if not block: 
-                break 
- 
-            file_size += len(block) 
-            sha.update(block) 
- 
-    sha.update('\0') 
-    sha.update(str(file_size)) 
- 
-    return sha.hexdigest(), file_size 
- 
- 
-def size_printer(display_name, size): 
-    sz = [0] 
-    last_stamp = [dt.datetime.now()] 
- 
-    def printer(chunk): 
-        sz[0] += len(chunk) 
-        now = dt.datetime.now() 
-        if last_stamp[0] + dt.timedelta(seconds=10) < now: 
-            if size: 
-                print >>sys.stderr, "##status##{} - [[imp]]{:.1f}%[[rst]]".format(display_name, 100.0 * sz[0] / size) 
-            last_stamp[0] = now 
- 
-    return printer 
- 
- 
+
+
+def copy_stream(read, *writers, **kwargs):
+    chunk_size = kwargs.get('size', 1024*1024)
+    while True:
+        data = read(chunk_size)
+        if not data:
+            break
+        for write in writers:
+            write(data)
+
+
+def md5file(fname):
+    res = hashlib.md5()
+    with open(fname, 'rb') as f:
+        copy_stream(f.read, res.update)
+    return res.hexdigest()
+
+
+def git_like_hash_with_size(filepath):
+    """
+    Calculate git like hash for path
+    """
+    sha = hashlib.sha1()
+
+    file_size = 0
+
+    with open(filepath, 'rb') as f:
+        while True:
+            block = f.read(2 ** 16)
+
+            if not block:
+                break
+
+            file_size += len(block)
+            sha.update(block)
+
+    sha.update('\0')
+    sha.update(str(file_size))
+
+    return sha.hexdigest(), file_size
+
+
+def size_printer(display_name, size):
+    sz = [0]
+    last_stamp = [dt.datetime.now()]
+
+    def printer(chunk):
+        sz[0] += len(chunk)
+        now = dt.datetime.now()
+        if last_stamp[0] + dt.timedelta(seconds=10) < now:
+            if size:
+                print >>sys.stderr, "##status##{} - [[imp]]{:.1f}%[[rst]]".format(display_name, 100.0 * sz[0] / size)
+            last_stamp[0] = now
+
+    return printer
+
+
 def fetch_url(url, unpack, resource_file_name, expected_md5=None, expected_sha1=None, tries=10, writers=None):
-    logging.info('Downloading from url %s name %s and expected md5 %s', url, resource_file_name, expected_md5) 
-    tmp_file_name = uniq_string_generator() 
- 
-    request = urllib2.Request(url, headers={'User-Agent': make_user_agent()}) 
+    logging.info('Downloading from url %s name %s and expected md5 %s', url, resource_file_name, expected_md5)
+    tmp_file_name = uniq_string_generator()
+
+    request = urllib2.Request(url, headers={'User-Agent': make_user_agent()})
     req = retry.retry_func(lambda: urllib2.urlopen(request, timeout=30), tries=tries, delay=5, backoff=1.57079)
-    logging.debug('Headers: %s', req.headers.headers) 
-    expected_file_size = int(req.headers['Content-Length']) 
-    real_md5 = hashlib.md5() 
-    real_sha1 = hashlib.sha1() 
- 
-    with open(tmp_file_name, 'wb') as fp: 
+    logging.debug('Headers: %s', req.headers.headers)
+    expected_file_size = int(req.headers['Content-Length'])
+    real_md5 = hashlib.md5()
+    real_sha1 = hashlib.sha1()
+
+    with open(tmp_file_name, 'wb') as fp:
         copy_stream(
             req.read,
             fp.write,
@@ -225,73 +225,73 @@ def fetch_url(url, unpack, resource_file_name, expected_md5=None, expected_sha1=
             size_printer(resource_file_name, expected_file_size),
             *([] if writers is None else writers)
         )
- 
-    real_md5 = real_md5.hexdigest() 
-    real_file_size = os.path.getsize(tmp_file_name) 
-    real_sha1.update('\0') 
-    real_sha1.update(str(real_file_size)) 
-    real_sha1 = real_sha1.hexdigest() 
- 
-    if unpack: 
-        tmp_dir = tmp_file_name + '.dir' 
-        os.makedirs(tmp_dir) 
-        with tarfile.open(tmp_file_name, mode="r|gz") as tar: 
-            tar.extractall(tmp_dir) 
-        tmp_file_name = os.path.join(tmp_dir, resource_file_name) 
-        real_md5 = md5file(tmp_file_name) 
- 
-    logging.info('File size %s (expected %s)', real_file_size, expected_file_size) 
-    logging.info('File md5 %s (expected %s)', real_md5, expected_md5) 
-    logging.info('File sha1 %s (expected %s)', real_sha1, expected_sha1) 
- 
-    if expected_md5 and real_md5 != expected_md5: 
-        report_to_snowden( 
-            { 
-                'headers': req.headers.headers, 
-                'expected_md5': expected_md5, 
-                'real_md5': real_md5 
-            } 
-        ) 
- 
-        raise BadChecksumFetchError( 
-            'Downloaded {}, but expected {} for {}'.format( 
-                real_md5, 
-                expected_md5, 
-                url, 
-            ) 
-        ) 
- 
-    if expected_sha1 and real_sha1 != expected_sha1: 
-        report_to_snowden( 
-            { 
-                'headers': req.headers.headers, 
-                'expected_sha1': expected_sha1, 
-                'real_sha1': real_sha1 
-            } 
-        ) 
- 
-        raise BadChecksumFetchError( 
-            'Downloaded {}, but expected {} for {}'.format( 
-                real_sha1, 
-                expected_sha1, 
-                url, 
-            ) 
-        ) 
- 
-    if expected_file_size != real_file_size: 
-        report_to_snowden({'headers': req.headers.headers, 'file_size': real_file_size}) 
- 
-        raise IncompleteFetchError( 
-            'Downloaded {}, but expected {} for {}'.format( 
-                real_file_size, 
-                expected_file_size, 
-                url, 
-            ) 
-        ) 
- 
-    return tmp_file_name 
- 
- 
+
+    real_md5 = real_md5.hexdigest()
+    real_file_size = os.path.getsize(tmp_file_name)
+    real_sha1.update('\0')
+    real_sha1.update(str(real_file_size))
+    real_sha1 = real_sha1.hexdigest()
+
+    if unpack:
+        tmp_dir = tmp_file_name + '.dir'
+        os.makedirs(tmp_dir)
+        with tarfile.open(tmp_file_name, mode="r|gz") as tar:
+            tar.extractall(tmp_dir)
+        tmp_file_name = os.path.join(tmp_dir, resource_file_name)
+        real_md5 = md5file(tmp_file_name)
+
+    logging.info('File size %s (expected %s)', real_file_size, expected_file_size)
+    logging.info('File md5 %s (expected %s)', real_md5, expected_md5)
+    logging.info('File sha1 %s (expected %s)', real_sha1, expected_sha1)
+
+    if expected_md5 and real_md5 != expected_md5:
+        report_to_snowden(
+            {
+                'headers': req.headers.headers,
+                'expected_md5': expected_md5,
+                'real_md5': real_md5
+            }
+        )
+
+        raise BadChecksumFetchError(
+            'Downloaded {}, but expected {} for {}'.format(
+                real_md5,
+                expected_md5,
+                url,
+            )
+        )
+
+    if expected_sha1 and real_sha1 != expected_sha1:
+        report_to_snowden(
+            {
+                'headers': req.headers.headers,
+                'expected_sha1': expected_sha1,
+                'real_sha1': real_sha1
+            }
+        )
+
+        raise BadChecksumFetchError(
+            'Downloaded {}, but expected {} for {}'.format(
+                real_sha1,
+                expected_sha1,
+                url,
+            )
+        )
+
+    if expected_file_size != real_file_size:
+        report_to_snowden({'headers': req.headers.headers, 'file_size': real_file_size})
+
+        raise IncompleteFetchError(
+            'Downloaded {}, but expected {} for {}'.format(
+                real_file_size,
+                expected_file_size,
+                url,
+            )
+        )
+
+    return tmp_file_name
+
+
 def chmod(filename, mode):
     if platform.system().lower() == 'windows':
         # https://docs.microsoft.com/en-us/windows/win32/fileio/hard-links-and-junctions:
@@ -310,13 +310,13 @@ def chmod(filename, mode):
 def process(fetched_file, file_name, args, remove=True):
     assert len(args.rename) <= len(args.outputs), (
         'too few outputs to rename', args.rename, 'into', args.outputs)
- 
+
     # Forbid changes to the loaded resource
     chmod(fetched_file, 0o444)
 
-    if not os.path.isfile(fetched_file): 
-        raise ResourceIsDirectoryError('Resource must be a file, not a directory: %s' % fetched_file) 
- 
+    if not os.path.isfile(fetched_file):
+        raise ResourceIsDirectoryError('Resource must be a file, not a directory: %s' % fetched_file)
+
     if args.copy_to:
         hardlink_or_copy(fetched_file, args.copy_to)
         if not args.outputs:
@@ -333,8 +333,8 @@ def process(fetched_file, file_name, args, remove=True):
     if args.untar_to:
         ensure_dir(args.untar_to)
         # Extract only requested files
-        try: 
-            with tarfile.open(fetched_file, mode='r:*') as tar: 
+        try:
+            with tarfile.open(fetched_file, mode='r:*') as tar:
                 inputs = set(map(os.path.normpath, args.rename + args.outputs[len(args.rename):]))
                 members = [entry for entry in tar if os.path.normpath(os.path.join(args.untar_to, entry.name)) in inputs]
                 tar.extractall(args.untar_to, members=members)
@@ -342,10 +342,10 @@ def process(fetched_file, file_name, args, remove=True):
             for root, _, files in os.walk(args.untar_to):
                 for filename in files:
                     chmod(os.path.join(root, filename), 0o444)
-        except tarfile.ReadError as e: 
-            logging.exception(e) 
-            raise ResourceUnpackingError('File {} cannot be untared'.format(fetched_file)) 
- 
+        except tarfile.ReadError as e:
+            logging.exception(e)
+            raise ResourceUnpackingError('File {} cannot be untared'.format(fetched_file))
+
     for src, dst in zip(args.rename, args.outputs):
         if src == 'RESOURCE':
             src = fetched_file
@@ -360,7 +360,7 @@ def process(fetched_file, file_name, args, remove=True):
                 rename_or_copy_and_remove(src, dst)
             else:
                 hardlink_or_copy(src, dst)
- 
+
     for path in args.outputs:
         if not os.path.exists(path):
             raise OutputNotExistError('Output does not exist: %s' % os.path.abspath(path))

+ 36 - 36
build/scripts/fetch_from_mds.py

@@ -1,50 +1,50 @@
-import os 
-import sys 
-import logging 
+import os
+import sys
+import logging
 import argparse
- 
-import fetch_from 
- 
-MDS_PREFIX = "https://storage.yandex-team.ru/get-devtools/" 
- 
- 
-def parse_args(): 
+
+import fetch_from
+
+MDS_PREFIX = "https://storage.yandex-team.ru/get-devtools/"
+
+
+def parse_args():
     parser = argparse.ArgumentParser()
     fetch_from.add_common_arguments(parser)
- 
+
     parser.add_argument('--key', required=True)
- 
-    return parser.parse_args() 
- 
- 
-def fetch(key): 
-    parts = key.split("/") 
-    if len(parts) != 3: 
-        raise ValueError("Invalid MDS key '{}'".format(key)) 
- 
-    _, sha1, file_name = parts 
- 
-    fetched_file = fetch_from.fetch_url(MDS_PREFIX + key, False, file_name, expected_sha1=sha1) 
- 
-    return fetched_file, file_name 
- 
- 
+
+    return parser.parse_args()
+
+
+def fetch(key):
+    parts = key.split("/")
+    if len(parts) != 3:
+        raise ValueError("Invalid MDS key '{}'".format(key))
+
+    _, sha1, file_name = parts
+
+    fetched_file = fetch_from.fetch_url(MDS_PREFIX + key, False, file_name, expected_sha1=sha1)
+
+    return fetched_file, file_name
+
+
 def main(args):
     fetched_file, resource_file_name = fetch(args.key)
- 
+
     fetch_from.process(fetched_file, resource_file_name, args)
- 
- 
-if __name__ == '__main__': 
+
+
+if __name__ == '__main__':
     args = parse_args()
     fetch_from.setup_logging(args, os.path.basename(__file__))
- 
-    try: 
+
+    try:
         main(args)
-    except Exception as e: 
-        logging.exception(e) 
+    except Exception as e:
+        logging.exception(e)
         print >>sys.stderr, open(args.abs_log_path).read()
-        sys.stderr.flush() 
+        sys.stderr.flush()
 
         import error
         sys.exit(error.ExitCodes.INFRASTRUCTURE_ERROR if fetch_from.is_temporary(e) else 1)

+ 10 - 10
build/scripts/fetch_from_sandbox.py

@@ -10,9 +10,9 @@ import time
 import urllib2
 import uuid
 
-import fetch_from 
+import fetch_from
+
 
- 
 ORIGIN_SUFFIX = '?origin=fetch-from-sandbox'
 MDS_PREFIX = 'http://storage-int.mds.yandex.net/get-sandbox/'
 TEMPORARY_ERROR_CODES = (429, 500, 503, 504)
@@ -68,7 +68,7 @@ def download_by_skynet(resource_info, file_name):
     if not skynet_id:
         raise ValueError("Resource does not have skynet_id")
 
-    temp_dir = os.path.abspath(fetch_from.uniq_string_generator()) 
+    temp_dir = os.path.abspath(fetch_from.uniq_string_generator())
     os.mkdir(temp_dir)
     sky_get(skynet_id, temp_dir)
     return os.path.join(temp_dir, file_name)
@@ -132,7 +132,7 @@ def fetch_via_script(script, resource_id):
     return subprocess.check_output([script, str(resource_id)]).rstrip()
 
 
-def fetch(resource_id, custom_fetcher): 
+def fetch(resource_id, custom_fetcher):
     try:
         resource_info = get_resource_info(resource_id, touch=True, no_links=True)
     except Exception as e:
@@ -179,9 +179,9 @@ def fetch(resource_id, custom_fetcher):
             if mds_link is not None:
                 # Don't try too hard here: we will get back to MDS later on
                 yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5, tries=2)
-        yield lambda: fetch_from.fetch_url(proxy_link, False, resource_file_name, expected_md5) 
+        yield lambda: fetch_from.fetch_url(proxy_link, False, resource_file_name, expected_md5)
         if mds_link is not None:
-            yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5) 
+            yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5)
 
     if resource_info.get('attributes', {}).get('ttl') != 'inf':
         sys.stderr.write('WARNING: resource {} ttl is not "inf".\n'.format(resource_id))
@@ -211,7 +211,7 @@ def fetch(resource_id, custom_fetcher):
         else:
             raise Exception("No available protocol and/or server to fetch resource")
 
-    return fetched_file, resource_info['file_name'] 
+    return fetched_file, resource_info['file_name']
 
 
 def _get_resource_info_from_file(resource_file):
@@ -241,7 +241,7 @@ def _get_resource_info_from_file(resource_file):
 
 
 def main(args):
-    custom_fetcher = os.environ.get('YA_CUSTOM_FETCHER') 
+    custom_fetcher = os.environ.get('YA_CUSTOM_FETCHER')
 
     resource_info = _get_resource_info_from_file(args.resource_file)
     if resource_info:
@@ -252,8 +252,8 @@ def main(args):
         fetched_file, file_name = fetch(args.resource_id, custom_fetcher)
 
     fetch_from.process(fetched_file, file_name, args, remove=not custom_fetcher and not resource_info)
- 
- 
+
+
 if __name__ == '__main__':
     args = parse_args()
     fetch_from.setup_logging(args, os.path.basename(__file__))

+ 31 - 31
build/scripts/gen_py3_reg.py

@@ -1,34 +1,34 @@
-import sys 
- 
-template = ''' 
+import sys
+
+template = '''
 struct PyObject;
 extern "C" int PyImport_AppendInittab(const char* name, PyObject* (*initfunc)());
 extern "C" PyObject* {1}();
- 
-namespace { 
-    struct TRegistrar { 
-        inline TRegistrar() { 
-            // TODO Collect all modules and call PyImport_ExtendInittab once 
-            PyImport_AppendInittab("{0}", {1}); 
-        } 
-    } REG; 
-} 
-''' 
- 
- 
-def mangle(name): 
-    if '.' not in name: 
-        return name 
-    return ''.join('{}{}'.format(len(s), s) for s in name.split('.')) 
- 
-if __name__ == '__main__': 
-    if len(sys.argv) != 3: 
-        print >>sys.stderr, 'Usage: <path/to/gen_py_reg.py> <python_module_name> <output_file>' 
-        print >>sys.stderr, 'Passed: ' + ' '.join(sys.argv) 
-        sys.exit(1) 
- 
-    with open(sys.argv[2], 'w') as f: 
-        modname = sys.argv[1] 
-        initname = 'PyInit_' + mangle(modname) 
-        code = template.replace('{0}', modname).replace('{1}', initname) 
-        f.write(code) 
+
+namespace {
+    struct TRegistrar {
+        inline TRegistrar() {
+            // TODO Collect all modules and call PyImport_ExtendInittab once
+            PyImport_AppendInittab("{0}", {1});
+        }
+    } REG;
+}
+'''
+
+
+def mangle(name):
+    if '.' not in name:
+        return name
+    return ''.join('{}{}'.format(len(s), s) for s in name.split('.'))
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print >>sys.stderr, 'Usage: <path/to/gen_py_reg.py> <python_module_name> <output_file>'
+        print >>sys.stderr, 'Passed: ' + ' '.join(sys.argv)
+        sys.exit(1)
+
+    with open(sys.argv[2], 'w') as f:
+        modname = sys.argv[1]
+        initname = 'PyInit_' + mangle(modname)
+        code = template.replace('{0}', modname).replace('{1}', initname)
+        f.write(code)

+ 28 - 28
build/scripts/gen_py_protos.py

@@ -4,42 +4,42 @@ import shutil
 import subprocess
 import sys
 import tempfile
-import argparse 
-import re 
+import argparse
+import re
 
 
 OUT_DIR_ARG = '--python_out='
 
-def main(): 
-    parser = argparse.ArgumentParser() 
-    parser.add_argument("--suffixes", nargs="*", default=[]) 
-    parser.add_argument("protoc_args", nargs=argparse.REMAINDER) 
-    script_args = parser.parse_args() 
- 
-    args = script_args.protoc_args 
- 
-    if args[0] == "--": 
-        args = args[1:] 
- 
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--suffixes", nargs="*", default=[])
+    parser.add_argument("protoc_args", nargs=argparse.REMAINDER)
+    script_args = parser.parse_args()
+
+    args = script_args.protoc_args
+
+    if args[0] == "--":
+        args = args[1:]
+
     out_dir_orig = None
     out_dir_temp = None
-    plugin_out_dirs_orig = {} 
+    plugin_out_dirs_orig = {}
     for i in range(len(args)):
         if args[i].startswith(OUT_DIR_ARG):
             assert not out_dir_orig, 'Duplicate "{0}" param'.format(OUT_DIR_ARG)
             out_dir_orig = args[i][len(OUT_DIR_ARG):]
             out_dir_temp = tempfile.mkdtemp(dir=out_dir_orig)
             args[i] = OUT_DIR_ARG + out_dir_temp
-            continue 
- 
-        match = re.match(r"^(--(\w+)_out=).*", args[i]) 
-        if match: 
-            plugin_out_dir_arg = match.group(1) 
-            plugin = match.group(2) 
-            assert plugin not in plugin_out_dirs_orig, 'Duplicate "{0}" param'.format(plugin_out_dir_arg) 
-            plugin_out_dirs_orig[plugin] = args[i][len(plugin_out_dir_arg):] 
-            assert plugin_out_dirs_orig[plugin] == out_dir_orig, 'Params "{0}" and "{1}" expected to have the same value'.format(OUT_DIR_ARG, plugin_out_dir_arg) 
-            args[i] = plugin_out_dir_arg + out_dir_temp 
+            continue
+
+        match = re.match(r"^(--(\w+)_out=).*", args[i])
+        if match:
+            plugin_out_dir_arg = match.group(1)
+            plugin = match.group(2)
+            assert plugin not in plugin_out_dirs_orig, 'Duplicate "{0}" param'.format(plugin_out_dir_arg)
+            plugin_out_dirs_orig[plugin] = args[i][len(plugin_out_dir_arg):]
+            assert plugin_out_dirs_orig[plugin] == out_dir_orig, 'Params "{0}" and "{1}" expected to have the same value'.format(OUT_DIR_ARG, plugin_out_dir_arg)
+            args[i] = plugin_out_dir_arg + out_dir_temp
 
     assert out_dir_temp, 'Param "{0}" not found'.format(OUT_DIR_ARG)
 
@@ -55,13 +55,13 @@ def main():
                 os.mkdir(d_orig)
         for f in files:
             f_orig = f
-            for suf in script_args.suffixes: 
-                if f.endswith(suf): 
-                    f_orig = f[:-len(suf)] + "__int__" + suf 
+            for suf in script_args.suffixes:
+                if f.endswith(suf):
+                    f_orig = f[:-len(suf)] + "__int__" + suf
                     break
             os.rename(path.join(root_temp, f), path.join(root_orig, f_orig))
     shutil.rmtree(out_dir_temp)
 
 
 if __name__ == '__main__':
-    main() 
+    main()

+ 27 - 27
build/scripts/gen_tasklet_reg.py

@@ -1,16 +1,16 @@
-import argparse 
- 
+import argparse
+
 TEMPLATE = '''\
 {includes}\
 #include <tasklet/runtime/lib/{language}_wrapper.h>
 #include <tasklet/runtime/lib/registry.h>
- 
+
 static const NTasklet::TRegHelper REG(
-    "{name}", 
+    "{name}",
     new NTasklet::{wrapper}
-); 
-''' 
- 
+);
+'''
+
 WRAPPER = {
     'cpp': 'TCppWrapper<{impl}>()',
     'js': 'TJsWrapper("{impl}")',
@@ -18,34 +18,34 @@ WRAPPER = {
     'py': 'TPythonWrapper("{impl}")',
     'java': 'TJavaWrapper("{impl}", "{py_wrapper}")',
 }
- 
- 
-def parse_args(): 
-    parser = argparse.ArgumentParser() 
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
     parser.add_argument('name')
     parser.add_argument('output')
     parser.add_argument('-l', '--lang', choices=WRAPPER, required=True)
     parser.add_argument('-i', '--impl', required=True)
     parser.add_argument('-w', '--wrapper', required=False)
     parser.add_argument('includes', nargs='*')
- 
-    return parser.parse_args() 
- 
- 
-if __name__ == '__main__': 
-    args = parse_args() 
- 
+
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
     includes = ''.join(
         '#include <{}>\n'.format(include)
-        for include in args.includes 
-    ) 
- 
-    code = TEMPLATE.format( 
-        includes=includes, 
+        for include in args.includes
+    )
+
+    code = TEMPLATE.format(
+        includes=includes,
         language=args.lang,
-        name=args.name, 
+        name=args.name,
         wrapper=WRAPPER[args.lang].format(impl=args.impl, py_wrapper=args.wrapper),
-    ) 
- 
+    )
+
     with open(args.output, 'w') as f:
-        f.write(code) 
+        f.write(code)

Some files were not shown because too many files changed in this diff