compile_cuda.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. from __future__ import print_function
  2. import sys
  3. import subprocess
  4. import os
  5. import platform
  6. import collections
  7. import re
  8. import tempfile
  9. def fix_win_bin_name(name):
  10. res = os.path.normpath(name)
  11. if not os.path.splitext(name)[1]:
  12. return res + '.exe'
  13. return res
  14. def find_compiler_bindir(command):
  15. for idx, word in enumerate(command):
  16. if '--compiler-bindir' in word:
  17. return idx
  18. return None
  19. def is_clang(command):
  20. cmplr_dir_idx = find_compiler_bindir(command)
  21. return cmplr_dir_idx is not None and 'clang' in command[cmplr_dir_idx]
  22. def fix_win(command, flags):
  23. if platform.system().lower() == "windows":
  24. command[0] = fix_win_bin_name(command[0])
  25. cmplr_dir_idx = find_compiler_bindir(command)
  26. if cmplr_dir_idx is not None:
  27. key, value = command[cmplr_dir_idx].split('=')
  28. command[cmplr_dir_idx] = key + '=' + fix_win_bin_name(value)
  29. def main():
  30. try:
  31. sys.argv.remove('--y_skip_nocxxinc')
  32. skip_nocxxinc = True
  33. except ValueError:
  34. skip_nocxxinc = False
  35. spl = sys.argv.index('--cflags')
  36. cmd = 1
  37. mtime0 = None
  38. if sys.argv[1] == '--mtime':
  39. mtime0 = sys.argv[2]
  40. cmd = 3
  41. if sys.argv[cmd] == '--custom-pid':
  42. custom_pid = sys.argv[4]
  43. cmd = 5
  44. command = sys.argv[cmd:spl]
  45. cflags = sys.argv[spl + 1 :]
  46. dump_args = False
  47. if '--y_dump_args' in command:
  48. command.remove('--y_dump_args')
  49. dump_args = True
  50. fix_win(command, cflags)
  51. executable = command[0]
  52. if not os.path.exists(executable):
  53. print('{} not found'.format(executable), file=sys.stderr)
  54. sys.exit(1)
  55. if is_clang(command):
  56. # nvcc concatenates the sources for clang, and clang reports unused
  57. # things from .h files as if they they were defined in a .cpp file.
  58. cflags += ['-Wno-unused-function', '-Wno-unused-parameter']
  59. if not is_clang(command) and '-fopenmp=libomp' in cflags:
  60. cflags.append('-fopenmp')
  61. cflags.remove('-fopenmp=libomp')
  62. skip_list = [
  63. '-gline-tables-only',
  64. # clang coverage
  65. '-fprofile-instr-generate',
  66. '-fcoverage-mapping',
  67. '/Zc:inline', # disable unreferenced functions (kernel registrators) remove
  68. '-Wno-c++17-extensions',
  69. '-flto',
  70. '-faligned-allocation',
  71. '-fsized-deallocation',
  72. '-fexperimental-library',
  73. # While it might be reasonable to compile host part of .cu sources with these optimizations enabled,
  74. # nvcc passes these options down towards cicc which lacks x86_64 extensions support.
  75. '-msse2',
  76. '-msse3',
  77. '-mssse3',
  78. '-msse4.1',
  79. '-msse4.2',
  80. ]
  81. if skip_nocxxinc:
  82. skip_list.append('-nostdinc++')
  83. for flag in skip_list:
  84. while flag in cflags:
  85. cflags.remove(flag)
  86. skip_prefix_list = [
  87. '-fsanitize=',
  88. '-fsanitize-coverage=',
  89. '-fsanitize-blacklist=',
  90. '--system-header-prefix',
  91. ]
  92. new_cflags = []
  93. for flag in cflags:
  94. if all(not flag.startswith(skip_prefix) for skip_prefix in skip_prefix_list):
  95. if flag.startswith('-fopenmp-version='):
  96. new_cflags.append(
  97. '-fopenmp-version=45'
  98. ) # Clang 11 only supports OpenMP 4.5, but the default is 5.0, so we need to forcefully redefine it.
  99. else:
  100. new_cflags.append(flag)
  101. cflags = new_cflags
  102. if not is_clang(command):
  103. def good(arg):
  104. if arg.startswith('--target='):
  105. return False
  106. return True
  107. cflags = filter(good, cflags)
  108. cpp_args = []
  109. compiler_args = []
  110. # NVCC requires particular MSVC versions which may differ from the version
  111. # used to compile regular C++ code. We have a separate MSVC in Arcadia for
  112. # the CUDA builds and pass it's root in $Y_VC_Root.
  113. # The separate MSVC for CUDA may absent in Yandex Open Source builds.
  114. vc_root = os.environ.get('Y_VC_Root')
  115. cflags_queue = collections.deque(cflags)
  116. while cflags_queue:
  117. arg = cflags_queue.popleft()
  118. if arg == '-mllvm':
  119. compiler_args.append(arg)
  120. compiler_args.append(cflags_queue.popleft())
  121. continue
  122. if arg[:2].upper() in ('-I', '/I', '-B'):
  123. value = arg[2:]
  124. if not value:
  125. value = cflags_queue.popleft()
  126. if arg[1] == 'I':
  127. cpp_args.append('-I{}'.format(value))
  128. elif arg[1] == 'B': # todo: delete "B" flag check when cuda stop to use gcc
  129. pass
  130. continue
  131. match = re.match(r'[-/]D(.*)', arg)
  132. if match:
  133. define = match.group(1)
  134. # We have C++ flags configured for the regular C++ build.
  135. # There is Y_MSVC_INCLUDE define with a path to the VC header files.
  136. # We need to change the path accordingly when using a separate MSVC for CUDA.
  137. if vc_root and define.startswith('Y_MSVC_INCLUDE'):
  138. define = os.path.expandvars('Y_MSVC_INCLUDE={}/include'.format(vc_root))
  139. cpp_args.append('-D' + define.replace('\\', '/'))
  140. continue
  141. compiler_args.append(arg)
  142. command += cpp_args
  143. if compiler_args:
  144. command += ['--compiler-options', ','.join(compiler_args)]
  145. # --keep is necessary to prevent nvcc from embedding nvcc pid in generated
  146. # symbols. It makes nvcc use the original file name as the prefix in the
  147. # generated files (otherwise it also prepends tmpxft_{pid}_00000000-5), and
  148. # cicc derives the module name from its {input}.cpp1.ii file name.
  149. command += ['--keep', '--keep-dir', tempfile.mkdtemp(prefix='compile_cuda.py.')]
  150. # nvcc generates symbols like __fatbinwrap_{len}_{basename}_{hash}_{pid} where
  151. # {basename} is {input}.cpp1.ii with non-C chars translated to _, {len} is
  152. # {basename} length, {hash} is the hash of first exported symbol in
  153. # {input}.cpp1.ii if there is one, otherwise it is based on its modification
  154. # time (converted to string in the local timezone) and the current working
  155. # directory, and {pid} is a pid of nvcc process. To stabilize the names of
  156. # these symbols we need to fix mtime, timezone, cwd and pid.
  157. preload = [os.environ.get('LD_PRELOAD', ''), mtime0, custom_pid]
  158. os.environ['LD_PRELOAD'] = ' '.join(filter(None, preload))
  159. os.environ['TZ'] = 'UTC0' # POSIX fixed offset format.
  160. os.environ['TZDIR'] = '/var/empty' # Against counterfeit /usr/share/zoneinfo/$TZ.
  161. if dump_args:
  162. sys.stdout.write('\n'.join(command))
  163. else:
  164. sys.exit(subprocess.Popen(command, stdout=sys.stderr, stderr=sys.stderr, cwd='/').wait())
  165. if __name__ == '__main__':
  166. main()