link_exe.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import itertools
  2. import os
  3. import os.path
  4. import sys
  5. import subprocess
  6. import optparse
  7. import process_command_files as pcf
  8. from process_whole_archive_option import ProcessWholeArchiveOption
  9. def get_leaks_suppressions(cmd):
  10. supp, newcmd = [], []
  11. for arg in cmd:
  12. if arg.endswith(".supp"):
  13. supp.append(arg)
  14. else:
  15. newcmd.append(arg)
  16. return supp, newcmd
  17. MUSL_LIBS = '-lc', '-lcrypt', '-ldl', '-lm', '-lpthread', '-lrt', '-lutil'
  18. CUDA_LIBRARIES = {
  19. '-lcublas_static': '-lcublas',
  20. '-lcublasLt_static': '-lcublasLt',
  21. '-lcudart_static': '-lcudart',
  22. '-lcudnn_static': '-lcudnn',
  23. '-lcufft_static_nocallback': '-lcufft',
  24. '-lcurand_static': '-lcurand',
  25. '-lcusolver_static': '-lcusolver',
  26. '-lcusparse_static': '-lcusparse',
  27. '-lmyelin_compiler_static': '-lmyelin',
  28. '-lmyelin_executor_static': '-lnvcaffe_parser',
  29. '-lmyelin_pattern_library_static': '',
  30. '-lmyelin_pattern_runtime_static': '',
  31. '-lnvinfer_static': '-lnvinfer',
  32. '-lnvinfer_plugin_static': '-lnvinfer_plugin',
  33. '-lnvonnxparser_static': '-lnvonnxparser',
  34. '-lnvparsers_static': '-lnvparsers',
  35. }
  36. def prune_cuda_libraries(cmd, prune_arches, nvprune_exe, build_root):
  37. def name_generator(prefix):
  38. for idx in itertools.count():
  39. yield prefix + '_' + str(idx)
  40. def compute_arch(arch):
  41. _, ver = arch.split('_', 1)
  42. return 'compute_{}'.format(ver)
  43. libs_to_prune = set(CUDA_LIBRARIES)
  44. # does not contain device code, nothing to prune
  45. libs_to_prune.remove('-lcudart_static')
  46. tmp_names_gen = name_generator('cuda_pruned_libs')
  47. arch_args = []
  48. for arch in prune_arches.split(':'):
  49. arch_args.append('-gencode')
  50. arch_args.append('arch={},code={}'.format(compute_arch(arch), arch))
  51. flags = []
  52. cuda_deps = set()
  53. for flag in reversed(cmd):
  54. if flag in libs_to_prune:
  55. cuda_deps.add('lib' + flag[2:] + '.a')
  56. flag += '_pruned'
  57. elif flag.startswith('-L') and os.path.exists(flag[2:]) and os.path.isdir(flag[2:]) and any(f in cuda_deps for f in os.listdir(flag[2:])):
  58. from_dirpath = flag[2:]
  59. from_deps = list(cuda_deps & set(os.listdir(from_dirpath)))
  60. if from_deps:
  61. to_dirpath = os.path.abspath(os.path.join(build_root, next(tmp_names_gen)))
  62. os.makedirs(to_dirpath)
  63. for f in from_deps:
  64. # prune lib
  65. from_path = os.path.join(from_dirpath, f)
  66. to_path = os.path.join(to_dirpath, f[:-2] + '_pruned.a')
  67. subprocess.check_call([nvprune_exe] + arch_args + ['--output-file', to_path, from_path])
  68. cuda_deps.remove(f)
  69. # do not remove current directory
  70. # because it can contain other libraries we want link to
  71. # instead we just add new directory with pruned libs
  72. flags.append('-L' + to_dirpath)
  73. flags.append(flag)
  74. assert not cuda_deps, ('Unresolved CUDA deps: ' + ','.join(cuda_deps))
  75. return reversed(flags)
  76. def remove_excessive_flags(cmd):
  77. flags = []
  78. for flag in cmd:
  79. if not flag.endswith('.ios.interface') and not flag.endswith('.pkg.fake'):
  80. flags.append(flag)
  81. return flags
  82. def fix_sanitize_flag(cmd, opts):
  83. """
  84. Remove -fsanitize=address flag if sanitazers are linked explicitly for linux target.
  85. """
  86. for flag in cmd:
  87. if flag.startswith('--target') and 'linux' not in flag.lower():
  88. # use toolchained sanitize libraries
  89. return cmd
  90. assert opts.clang_ver
  91. CLANG_RT = 'contrib/libs/clang' + opts.clang_ver + '-rt/lib/'
  92. sanitize_flags = {
  93. '-fsanitize=address': CLANG_RT + 'asan',
  94. '-fsanitize=memory': CLANG_RT + 'msan',
  95. '-fsanitize=leak': CLANG_RT + 'lsan',
  96. '-fsanitize=undefined': CLANG_RT + 'ubsan',
  97. '-fsanitize=thread': CLANG_RT + 'tsan',
  98. }
  99. used_sanitize_libs = []
  100. aux = []
  101. for flag in cmd:
  102. if flag.startswith('-fsanitize-coverage='):
  103. # do not link sanitizer libraries from clang
  104. aux.append('-fno-sanitize-link-runtime')
  105. if flag in sanitize_flags and any(s.startswith(sanitize_flags[flag]) for s in cmd):
  106. # exclude '-fsanitize=' if appropriate library is linked explicitly
  107. continue
  108. if any(flag.startswith(lib) for lib in sanitize_flags.values()):
  109. used_sanitize_libs.append(flag)
  110. continue
  111. aux.append(flag)
  112. # move sanitize libraries out of the repeatedly searched group of archives
  113. flags = []
  114. for flag in aux:
  115. if flag == '-Wl,--start-group':
  116. flags += ['-Wl,--whole-archive'] + used_sanitize_libs + ['-Wl,--no-whole-archive']
  117. flags.append(flag)
  118. return flags
  119. def fix_cmd_for_musl(cmd):
  120. flags = []
  121. for flag in cmd:
  122. if flag not in MUSL_LIBS:
  123. flags.append(flag)
  124. return flags
  125. def fix_cmd_for_dynamic_cuda(cmd):
  126. flags = []
  127. for flag in cmd:
  128. if flag in CUDA_LIBRARIES:
  129. flags.append(CUDA_LIBRARIES[flag])
  130. else:
  131. flags.append(flag)
  132. return flags
  133. def gen_default_suppressions(inputs, output, source_root):
  134. import collections
  135. import os
  136. supp_map = collections.defaultdict(set)
  137. for filename in inputs:
  138. sanitizer = os.path.basename(filename).split('.', 1)[0]
  139. with open(os.path.join(source_root, filename)) as src:
  140. for line in src:
  141. line = line.strip()
  142. if not line or line.startswith('#'):
  143. continue
  144. supp_map[sanitizer].add(line)
  145. with open(output, "wb") as dst:
  146. for supp_type, supps in supp_map.items():
  147. dst.write('extern "C" const char *__%s_default_suppressions() {\n' % supp_type)
  148. dst.write(' return "{}";\n'.format('\\n'.join(sorted(supps))))
  149. dst.write('}\n')
  150. def fix_blas_resolving(cmd):
  151. # Intel mkl comes as a precompiled static library and thus can not be recompiled with sanitizer runtime instrumentation.
  152. # That's why we prefer to use cblas instead of Intel mkl as a drop-in replacement under sanitizers.
  153. # But if the library has dependencies on mkl and cblas simultaneously, it will get a linking error.
  154. # Hence we assume that it's probably compiling without sanitizers and we can easily remove cblas to prevent multiple definitions of the same symbol at link time.
  155. for arg in cmd:
  156. if arg.startswith('contrib/libs') and arg.endswith('mkl-lp64.a'):
  157. return [arg for arg in cmd if not arg.endswith('libcontrib-libs-cblas.a')]
  158. return cmd
  159. def parse_args():
  160. parser = optparse.OptionParser()
  161. parser.disable_interspersed_args()
  162. parser.add_option('--musl', action='store_true')
  163. parser.add_option('--custom-step')
  164. parser.add_option('--python')
  165. parser.add_option('--source-root')
  166. parser.add_option('--clang-ver')
  167. parser.add_option('--dynamic-cuda', action='store_true')
  168. parser.add_option('--cuda-architectures',
  169. help='List of supported CUDA architectures, separated by ":" (e.g. "sm_52:compute_70:lto_90a"')
  170. parser.add_option('--nvprune-exe')
  171. parser.add_option('--build-root')
  172. parser.add_option('--arch')
  173. parser.add_option('--linker-output')
  174. parser.add_option('--whole-archive-peers', action='append')
  175. parser.add_option('--whole-archive-libs', action='append')
  176. return parser.parse_args()
  177. if __name__ == '__main__':
  178. opts, args = parse_args()
  179. args = pcf.skip_markers(args)
  180. cmd = fix_blas_resolving(args)
  181. cmd = remove_excessive_flags(cmd)
  182. if opts.musl:
  183. cmd = fix_cmd_for_musl(cmd)
  184. cmd = fix_sanitize_flag(cmd, opts)
  185. if 'ld.lld' in str(cmd):
  186. if '-fPIE' in str(cmd) or '-fPIC' in str(cmd):
  187. # support explicit PIE
  188. pass
  189. else:
  190. cmd.append('-Wl,-no-pie')
  191. if opts.dynamic_cuda:
  192. cmd = fix_cmd_for_dynamic_cuda(cmd)
  193. elif opts.cuda_architectures:
  194. cmd = prune_cuda_libraries(cmd, opts.cuda_architectures, opts.nvprune_exe, opts.build_root)
  195. cmd = ProcessWholeArchiveOption(opts.arch, opts.whole_archive_peers, opts.whole_archive_libs).construct_cmd(cmd)
  196. if opts.custom_step:
  197. assert opts.python
  198. subprocess.check_call([opts.python] + [opts.custom_step] + args)
  199. supp, cmd = get_leaks_suppressions(cmd)
  200. if supp:
  201. src_file = "default_suppressions.cpp"
  202. gen_default_suppressions(supp, src_file, opts.source_root)
  203. cmd += [src_file]
  204. if opts.linker_output:
  205. stdout = open(opts.linker_output, 'w')
  206. else:
  207. stdout = sys.stdout
  208. rc = subprocess.call(cmd, shell=False, stderr=sys.stderr, stdout=stdout)
  209. sys.exit(rc)