compile_cuda.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. import sys
  2. import subprocess
  3. import os
  4. import platform
  5. import collections
  6. import re
  7. import tempfile
  8. def fix_win_bin_name(name):
  9. res = os.path.normpath(name)
  10. if not os.path.splitext(name)[1]:
  11. return res + '.exe'
  12. return res
  13. def find_compiler_bindir(command):
  14. for idx, word in enumerate(command):
  15. if '--compiler-bindir' in word:
  16. return idx
  17. return None
  18. def is_clang(command):
  19. cmplr_dir_idx = find_compiler_bindir(command)
  20. return cmplr_dir_idx is not None and 'clang' in command[cmplr_dir_idx]
  21. def fix_win(command, flags):
  22. if platform.system().lower() == "windows":
  23. command[0] = fix_win_bin_name(command[0])
  24. cmplr_dir_idx = find_compiler_bindir(command)
  25. if cmplr_dir_idx is not None:
  26. key, value = command[cmplr_dir_idx].split('=')
  27. command[cmplr_dir_idx] = key + '=' + fix_win_bin_name(value)
  28. def main():
  29. try:
  30. sys.argv.remove('--y_skip_nocxxinc')
  31. skip_nocxxinc = True
  32. except ValueError:
  33. skip_nocxxinc = False
  34. spl = sys.argv.index('--cflags')
  35. cmd = 1
  36. mtime0 = None
  37. if sys.argv[1] == '--mtime':
  38. mtime0 = sys.argv[2]
  39. cmd = 3
  40. if sys.argv[cmd] == '--custom-pid':
  41. custom_pid = sys.argv[4]
  42. cmd = 5
  43. command = sys.argv[cmd:spl]
  44. cflags = sys.argv[spl + 1 :]
  45. dump_args = False
  46. if '--y_dump_args' in command:
  47. command.remove('--y_dump_args')
  48. dump_args = True
  49. fix_win(command, cflags)
  50. executable = command[0]
  51. if not os.path.exists(executable):
  52. print >> sys.stderr, '{} not found'.format(executable)
  53. sys.exit(1)
  54. if is_clang(command):
  55. # nvcc concatenates the sources for clang, and clang reports unused
  56. # things from .h files as if they they were defined in a .cpp file.
  57. cflags += ['-Wno-unused-function', '-Wno-unused-parameter']
  58. if not is_clang(command) and '-fopenmp=libomp' in cflags:
  59. cflags.append('-fopenmp')
  60. cflags.remove('-fopenmp=libomp')
  61. skip_list = [
  62. '-gline-tables-only',
  63. # clang coverage
  64. '-fprofile-instr-generate',
  65. '-fcoverage-mapping',
  66. '/Zc:inline', # disable unreferenced functions (kernel registrators) remove
  67. '-Wno-c++17-extensions',
  68. '-flto',
  69. '-faligned-allocation',
  70. '-fsized-deallocation',
  71. '-fexperimental-library',
  72. # While it might be reasonable to compile host part of .cu sources with these optimizations enabled,
  73. # nvcc passes these options down towards cicc which lacks x86_64 extensions support.
  74. '-msse2',
  75. '-msse3',
  76. '-mssse3',
  77. '-msse4.1',
  78. '-msse4.2',
  79. ]
  80. if skip_nocxxinc:
  81. skip_list.append('-nostdinc++')
  82. for flag in skip_list:
  83. while flag in cflags:
  84. cflags.remove(flag)
  85. skip_prefix_list = [
  86. '-fsanitize=',
  87. '-fsanitize-coverage=',
  88. '-fsanitize-blacklist=',
  89. '--system-header-prefix',
  90. ]
  91. new_cflags = []
  92. for flag in cflags:
  93. if all(not flag.startswith(skip_prefix) for skip_prefix in skip_prefix_list):
  94. if flag.startswith('-fopenmp-version='):
  95. new_cflags.append(
  96. '-fopenmp-version=45'
  97. ) # Clang 11 only supports OpenMP 4.5, but the default is 5.0, so we need to forcefully redefine it.
  98. else:
  99. new_cflags.append(flag)
  100. cflags = new_cflags
  101. if not is_clang(command):
  102. def good(arg):
  103. if arg.startswith('--target='):
  104. return False
  105. return True
  106. cflags = filter(good, cflags)
  107. cpp_args = []
  108. compiler_args = []
  109. # NVCC requires particular MSVC versions which may differ from the version
  110. # used to compile regular C++ code. We have a separate MSVC in Arcadia for
  111. # the CUDA builds and pass it's root in $Y_VC_Root.
  112. # The separate MSVC for CUDA may absent in Yandex Open Source builds.
  113. vc_root = os.environ.get('Y_VC_Root')
  114. cflags_queue = collections.deque(cflags)
  115. while cflags_queue:
  116. arg = cflags_queue.popleft()
  117. if arg == '-mllvm':
  118. compiler_args.append(arg)
  119. compiler_args.append(cflags_queue.popleft())
  120. continue
  121. if arg[:2].upper() in ('-I', '/I', '-B'):
  122. value = arg[2:]
  123. if not value:
  124. value = cflags_queue.popleft()
  125. if arg[1] == 'I':
  126. cpp_args.append('-I{}'.format(value))
  127. elif arg[1] == 'B': # todo: delete "B" flag check when cuda stop to use gcc
  128. pass
  129. continue
  130. match = re.match(r'[-/]D(.*)', arg)
  131. if match:
  132. define = match.group(1)
  133. # We have C++ flags configured for the regular C++ build.
  134. # There is Y_MSVC_INCLUDE define with a path to the VC header files.
  135. # We need to change the path accordingly when using a separate MSVC for CUDA.
  136. if vc_root and define.startswith('Y_MSVC_INCLUDE'):
  137. define = os.path.expandvars('Y_MSVC_INCLUDE={}/include'.format(vc_root))
  138. cpp_args.append('-D' + define.replace('\\', '/'))
  139. continue
  140. compiler_args.append(arg)
  141. command += cpp_args
  142. if compiler_args:
  143. command += ['--compiler-options', ','.join(compiler_args)]
  144. # --keep is necessary to prevent nvcc from embedding nvcc pid in generated
  145. # symbols. It makes nvcc use the original file name as the prefix in the
  146. # generated files (otherwise it also prepends tmpxft_{pid}_00000000-5), and
  147. # cicc derives the module name from its {input}.cpp1.ii file name.
  148. command += ['--keep', '--keep-dir', tempfile.mkdtemp(prefix='compile_cuda.py.')]
  149. # nvcc generates symbols like __fatbinwrap_{len}_{basename}_{hash}_{pid} where
  150. # {basename} is {input}.cpp1.ii with non-C chars translated to _, {len} is
  151. # {basename} length, {hash} is the hash of first exported symbol in
  152. # {input}.cpp1.ii if there is one, otherwise it is based on its modification
  153. # time (converted to string in the local timezone) and the current working
  154. # directory, and {pid} is a pid of nvcc process. To stabilize the names of
  155. # these symbols we need to fix mtime, timezone, cwd and pid.
  156. preload = [os.environ.get('LD_PRELOAD', ''), mtime0, custom_pid]
  157. os.environ['LD_PRELOAD'] = ' '.join(filter(None, preload))
  158. os.environ['TZ'] = 'UTC0' # POSIX fixed offset format.
  159. os.environ['TZDIR'] = '/var/empty' # Against counterfeit /usr/share/zoneinfo/$TZ.
  160. if dump_args:
  161. sys.stdout.write('\n'.join(command))
  162. else:
  163. sys.exit(subprocess.Popen(command, stdout=sys.stderr, stderr=sys.stderr, cwd='/').wait())
  164. if __name__ == '__main__':
  165. main()