compile_cuda.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import sys
  2. import subprocess
  3. import os
  4. import platform
  5. import collections
  6. import re
  7. import tempfile
  8. def fix_win_bin_name(name):
  9. res = os.path.normpath(name)
  10. if not os.path.splitext(name)[1]:
  11. return res + '.exe'
  12. return res
  13. def find_compiler_bindir(command):
  14. for idx, word in enumerate(command):
  15. if '--compiler-bindir' in word:
  16. return idx
  17. return None
  18. def is_clang(command):
  19. cmplr_dir_idx = find_compiler_bindir(command)
  20. return cmplr_dir_idx is not None and 'clang' in command[cmplr_dir_idx]
  21. def fix_win(command, flags):
  22. if platform.system().lower() == "windows":
  23. command[0] = fix_win_bin_name(command[0])
  24. cmplr_dir_idx = find_compiler_bindir(command)
  25. if cmplr_dir_idx is not None:
  26. key, value = command[cmplr_dir_idx].split('=')
  27. command[cmplr_dir_idx] = key + '=' + fix_win_bin_name(value)
  28. def main():
  29. try:
  30. sys.argv.remove('--y_skip_nocxxinc')
  31. skip_nocxxinc = True
  32. except ValueError:
  33. skip_nocxxinc = False
  34. spl = sys.argv.index('--cflags')
  35. cmd = 1
  36. mtime0 = None
  37. if sys.argv[1] == '--mtime':
  38. mtime0 = sys.argv[2]
  39. cmd = 3
  40. command = sys.argv[cmd:spl]
  41. cflags = sys.argv[spl + 1 :]
  42. dump_args = False
  43. if '--y_dump_args' in command:
  44. command.remove('--y_dump_args')
  45. dump_args = True
  46. fix_win(command, cflags)
  47. executable = command[0]
  48. if not os.path.exists(executable):
  49. print >> sys.stderr, '{} not found'.format(executable)
  50. sys.exit(1)
  51. if is_clang(command):
  52. # nvcc concatenates the sources for clang, and clang reports unused
  53. # things from .h files as if they they were defined in a .cpp file.
  54. cflags += ['-Wno-unused-function', '-Wno-unused-parameter']
  55. if not is_clang(command) and '-fopenmp=libomp' in cflags:
  56. cflags.append('-fopenmp')
  57. cflags.remove('-fopenmp=libomp')
  58. skip_list = [
  59. '-gline-tables-only',
  60. # clang coverage
  61. '-fprofile-instr-generate',
  62. '-fcoverage-mapping',
  63. '/Zc:inline', # disable unreferenced functions (kernel registrators) remove
  64. '-Wno-c++17-extensions',
  65. '-flto',
  66. '-faligned-allocation',
  67. '-fsized-deallocation',
  68. # While it might be reasonable to compile host part of .cu sources with these optimizations enabled,
  69. # nvcc passes these options down towards cicc which lacks x86_64 extensions support.
  70. '-msse2',
  71. '-msse3',
  72. '-mssse3',
  73. '-msse4.1',
  74. '-msse4.2',
  75. ]
  76. if skip_nocxxinc:
  77. skip_list.append('-nostdinc++')
  78. for flag in skip_list:
  79. if flag in cflags:
  80. cflags.remove(flag)
  81. skip_prefix_list = [
  82. '-fsanitize=',
  83. '-fsanitize-coverage=',
  84. '-fsanitize-blacklist=',
  85. '--system-header-prefix',
  86. ]
  87. new_cflags = []
  88. for flag in cflags:
  89. if all(not flag.startswith(skip_prefix) for skip_prefix in skip_prefix_list):
  90. if flag.startswith('-fopenmp-version='):
  91. new_cflags.append(
  92. '-fopenmp-version=45'
  93. ) # Clang 11 only supports OpenMP 4.5, but the default is 5.0, so we need to forcefully redefine it.
  94. else:
  95. new_cflags.append(flag)
  96. cflags = new_cflags
  97. if not is_clang(command):
  98. def good(arg):
  99. if arg.startswith('--target='):
  100. return False
  101. return True
  102. cflags = filter(good, cflags)
  103. cpp_args = []
  104. compiler_args = []
  105. # NVCC requires particular MSVC versions which may differ from the version
  106. # used to compile regular C++ code. We have a separate MSVC in Arcadia for
  107. # the CUDA builds and pass it's root in $Y_VC_Root.
  108. # The separate MSVC for CUDA may absent in Yandex Open Source builds.
  109. vc_root = os.environ.get('Y_VC_Root')
  110. cflags_queue = collections.deque(cflags)
  111. while cflags_queue:
  112. arg = cflags_queue.popleft()
  113. if arg == '-mllvm':
  114. compiler_args.append(arg)
  115. compiler_args.append(cflags_queue.popleft())
  116. continue
  117. if arg[:2].upper() in ('-I', '/I', '-B'):
  118. value = arg[2:]
  119. if not value:
  120. value = cflags_queue.popleft()
  121. if arg[1] == 'I':
  122. cpp_args.append('-I{}'.format(value))
  123. elif arg[1] == 'B': # todo: delete "B" flag check when cuda stop to use gcc
  124. pass
  125. continue
  126. match = re.match(r'[-/]D(.*)', arg)
  127. if match:
  128. define = match.group(1)
  129. # We have C++ flags configured for the regular C++ build.
  130. # There is Y_MSVC_INCLUDE define with a path to the VC header files.
  131. # We need to change the path accordingly when using a separate MSVC for CUDA.
  132. if vc_root and define.startswith('Y_MSVC_INCLUDE'):
  133. define = os.path.expandvars('Y_MSVC_INCLUDE={}/include'.format(vc_root))
  134. cpp_args.append('-D' + define.replace('\\', '/'))
  135. continue
  136. compiler_args.append(arg)
  137. command += cpp_args
  138. if compiler_args:
  139. command += ['--compiler-options', ','.join(compiler_args)]
  140. # --keep is necessary to prevent nvcc from embedding nvcc pid in generated
  141. # symbols. It makes nvcc use the original file name as the prefix in the
  142. # generated files (otherwise it also prepends tmpxft_{pid}_00000000-5), and
  143. # cicc derives the module name from its {input}.cpp1.ii file name.
  144. command += ['--keep', '--keep-dir', tempfile.mkdtemp(prefix='compile_cuda.py.')]
  145. # nvcc generates symbols like __fatbinwrap_{len}_{basename}_{hash} where
  146. # {basename} is {input}.cpp1.ii with non-C chars translated to _, {len} is
  147. # {basename} length, and {hash} is the hash of first exported symbol in
  148. # {input}.cpp1.ii if there is one, otherwise it is based on its modification
  149. # time (converted to string in the local timezone) and the current working
  150. # directory. To stabilize the names of these symbols we need to fix mtime,
  151. # timezone, and cwd.
  152. if mtime0:
  153. os.environ['LD_PRELOAD'] = mtime0
  154. os.environ['TZ'] = 'UTC0' # POSIX fixed offset format.
  155. os.environ['TZDIR'] = '/var/empty' # Against counterfeit /usr/share/zoneinfo/$TZ.
  156. if dump_args:
  157. sys.stdout.write('\n'.join(command))
  158. else:
  159. sys.exit(subprocess.Popen(command, stdout=sys.stderr, stderr=sys.stderr, cwd='/').wait())
  160. if __name__ == '__main__':
  161. main()