Browse Source

Added function to rename .ctors sections into .init_array in cuda libraries

Добавил функцию, чтобы все библиотеки из CUDA пакетов, которые статически линкуются, гарантировано имели символы в секции `.array_init`, а не в `.ctors`, как происходит сейчас.

В противном случае некоторые библиотеки не работают в случае статической линковки (например, `nvrtc`).
1362e42f94015ba083431caa04d7ae436fd6bf99
toshiksvg 1 year ago
parent
commit
7c1f60188c
2 changed files with 67 additions and 33 deletions
  1. 61 30
      build/scripts/link_exe.py
  2. 6 3
      build/ymake.core.conf

+ 61 - 30
build/scripts/link_exe.py

@@ -41,6 +41,9 @@ CUDA_LIBRARIES = {
     '-lnvinfer_plugin_static': '-lnvinfer_plugin',
     '-lnvinfer_plugin_static': '-lnvinfer_plugin',
     '-lnvonnxparser_static': '-lnvonnxparser',
     '-lnvonnxparser_static': '-lnvonnxparser',
     '-lnvparsers_static': '-lnvparsers',
     '-lnvparsers_static': '-lnvparsers',
+    '-lnvrtc_static': '-lnvrtc',
+    '-lnvrtc-builtins_static': '-lnvrtc-builtins',
+    '-lnvptxcompiler_static': '',
 }
 }
 
 
 
 
@@ -97,37 +100,25 @@ class CUDAManager:
         f.write(script)
         f.write(script)
 
 
 
 
-def process_cuda_libraries(cmd, cuda_manager, build_root):
-    if not cuda_manager.has_cuda_fatbins(cmd):
-        return cmd
-
-    def tmpdir_generator(prefix):
-        for idx in itertools.count():
-            path = os.path.abspath(os.path.join(build_root, prefix + '_' + str(idx)))
-            os.makedirs(path)
-            yield path
-
-    # add custom linker script
-    to_dirpath = next(tmpdir_generator('cuda_linker_script'))
-    script_path = os.path.join(to_dirpath, 'script')
-    with open(script_path, 'w') as f:
-        cuda_manager.write_linker_script(f)
-    flags_with_linker = list(cmd) + ['-Wl,--script={}'.format(script_path)]
+def tmpdir_generator(base_path, prefix):
+    for idx in itertools.count():
+        path = os.path.abspath(os.path.join(base_path, prefix + '_' + str(idx)))
+        os.makedirs(path)
+        yield path
 
 
-    if not cuda_manager.can_prune_libs:
-        return flags_with_linker
 
 
-    tmpdir_gen = tmpdir_generator('cuda_pruned_libs')
+def process_cuda_library_by_external_tool(cmd, build_root, tool_name, callable_tool_executor, allowed_cuda_libs):
+    tmpdir_gen = tmpdir_generator(build_root, 'cuda_' + tool_name + '_libs')
 
 
-    flags_pruned = []
+    new_flags = []
     cuda_deps = set()
     cuda_deps = set()
 
 
     # Because each directory flag only affects flags that follow it,
     # Because each directory flag only affects flags that follow it,
     # for correct pruning we need to process that in reversed order
     # for correct pruning we need to process that in reversed order
-    for flag in reversed(flags_with_linker):
-        if flag in cuda_manager.fatbin_libs:
+    for flag in reversed(cmd):
+        if flag in allowed_cuda_libs:
             cuda_deps.add('lib' + flag[2:] + '.a')
             cuda_deps.add('lib' + flag[2:] + '.a')
-            flag += '_pruned'
+            flag += '_' + tool_name
         elif flag.startswith('-L') and os.path.exists(flag[2:]) and os.path.isdir(flag[2:]) and any(f in cuda_deps for f in os.listdir(flag[2:])):
         elif flag.startswith('-L') and os.path.exists(flag[2:]) and os.path.isdir(flag[2:]) and any(f in cuda_deps for f in os.listdir(flag[2:])):
             from_dirpath = flag[2:]
             from_dirpath = flag[2:]
             from_deps = list(cuda_deps & set(os.listdir(from_dirpath)))
             from_deps = list(cuda_deps & set(os.listdir(from_dirpath)))
@@ -137,19 +128,57 @@ def process_cuda_libraries(cmd, cuda_manager, build_root):
 
 
                 for f in from_deps:
                 for f in from_deps:
                     from_path = os.path.join(from_dirpath, f)
                     from_path = os.path.join(from_dirpath, f)
-                    to_path = os.path.join(to_dirpath, f[:-2] + '_pruned.a')
-                    cuda_manager.prune_lib(from_path, to_path)
+                    to_path = os.path.join(to_dirpath, f[:-2] + '_' + tool_name +'.a')
+                    callable_tool_executor(from_path, to_path)
                     cuda_deps.remove(f)
                     cuda_deps.remove(f)
 
 
                 # do not remove current directory
                 # do not remove current directory
                 # because it can contain other libraries we want link to
                 # because it can contain other libraries we want link to
-                # instead we just add new directory with pruned libs
-                flags_pruned.append('-L' + to_dirpath)
+                # instead we just add new directory with processed by tool libs
+                new_flags.append('-L' + to_dirpath)
 
 
-        flags_pruned.append(flag)
+        new_flags.append(flag)
 
 
     assert not cuda_deps, ('Unresolved CUDA deps: ' + ','.join(cuda_deps))
     assert not cuda_deps, ('Unresolved CUDA deps: ' + ','.join(cuda_deps))
-    return reversed(flags_pruned)
+    return reversed(new_flags)
+
+
+def process_cuda_libraries_by_objcopy(cmd, build_root, objcopy_exe):
+    if not objcopy_exe:
+        return cmd
+
+    def run_objcopy(from_path, to_path):
+        rename_section_command = [objcopy_exe, "--rename-section", ".ctors=.init_array", from_path, to_path]
+        subprocess.check_call(rename_section_command)
+
+    possible_libraries = set(CUDA_LIBRARIES.keys())
+    possible_libraries.update([
+        '-lcudadevrt',
+        '-lcufilt',
+        '-lculibos',
+    ])
+    possible_libraries.update([
+        lib_name + "_pruner" for lib_name in possible_libraries
+    ])
+
+    return process_cuda_library_by_external_tool(list(cmd), build_root, 'objcopy', run_objcopy, possible_libraries)
+
+
+def process_cuda_libraries_by_nvprune(cmd, cuda_manager, build_root):
+    if not cuda_manager.has_cuda_fatbins(cmd):
+        return cmd
+
+    # add custom linker script
+    to_dirpath = next(tmpdir_generator(build_root, 'cuda_linker_script'))
+    script_path = os.path.join(to_dirpath, 'script')
+    with open(script_path, 'w') as f:
+        cuda_manager.write_linker_script(f)
+    flags_with_linker = list(cmd) + ['-Wl,--script={}'.format(script_path)]
+
+    if not cuda_manager.can_prune_libs:
+        return flags_with_linker
+
+    return process_cuda_library_by_external_tool(flags_with_linker, build_root, 'pruner', cuda_manager.prune_lib, cuda_manager.fatbin_libs)
 
 
 
 
 def remove_excessive_flags(cmd):
 def remove_excessive_flags(cmd):
@@ -264,6 +293,7 @@ def parse_args():
     parser.add_option('--cuda-architectures',
     parser.add_option('--cuda-architectures',
                       help='List of supported CUDA architectures, separated by ":" (e.g. "sm_52:compute_70:lto_90a"')
                       help='List of supported CUDA architectures, separated by ":" (e.g. "sm_52:compute_70:lto_90a"')
     parser.add_option('--nvprune-exe')
     parser.add_option('--nvprune-exe')
+    parser.add_option('--objcopy-exe')
     parser.add_option('--build-root')
     parser.add_option('--build-root')
     parser.add_option('--arch')
     parser.add_option('--arch')
     parser.add_option('--linker-output')
     parser.add_option('--linker-output')
@@ -295,7 +325,8 @@ if __name__ == '__main__':
         cmd = fix_cmd_for_dynamic_cuda(cmd)
         cmd = fix_cmd_for_dynamic_cuda(cmd)
     else:
     else:
         cuda_manager = CUDAManager(opts.cuda_architectures, opts.nvprune_exe)
         cuda_manager = CUDAManager(opts.cuda_architectures, opts.nvprune_exe)
-        cmd = process_cuda_libraries(cmd, cuda_manager, opts.build_root)
+        cmd = process_cuda_libraries_by_nvprune(cmd, cuda_manager, opts.build_root)
+        cmd = process_cuda_libraries_by_objcopy(cmd, opts.build_root, opts.objcopy_exe)
     cmd = ProcessWholeArchiveOption(opts.arch, opts.whole_archive_peers, opts.whole_archive_libs).construct_cmd(cmd)
     cmd = ProcessWholeArchiveOption(opts.arch, opts.whole_archive_peers, opts.whole_archive_libs).construct_cmd(cmd)
 
 
     if opts.custom_step:
     if opts.custom_step:

+ 6 - 3
build/ymake.core.conf

@@ -1062,9 +1062,12 @@ module _LINK_UNIT: _BASE_UNIT {
         LINK_SCRIPT_EXE_FLAGS += --dynamic-cuda
         LINK_SCRIPT_EXE_FLAGS += --dynamic-cuda
     }
     }
 
 
-    when ($CUDA_ARCHITECTURES && $USE_DYNAMIC_CUDA != "yes") {
-        LINK_SCRIPT_EXE_FLAGS+=--cuda-architectures $CUDA_ARCHITECTURES
-        LINK_SCRIPT_EXE_FLAGS+=--nvprune-exe $CUDA_ROOT/bin/nvprune
+    when ($USE_DYNAMIC_CUDA != "yes") {
+        when ($CUDA_ARCHITECTURES) {
+            LINK_SCRIPT_EXE_FLAGS+=--cuda-architectures $CUDA_ARCHITECTURES
+            LINK_SCRIPT_EXE_FLAGS+=--nvprune-exe $CUDA_ROOT/bin/nvprune
+        }
+        LINK_SCRIPT_EXE_FLAGS+=--objcopy-exe $OBJCOPY_TOOL
     }
     }
 
 
     LINK_SCRIPT_EXE_FLAGS+=--build-root $(BUILD_ROOT)
     LINK_SCRIPT_EXE_FLAGS+=--build-root $(BUILD_ROOT)