Cuda.cpp 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013
  1. //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "Cuda.h"
  9. #include "CommonArgs.h"
  10. #include "clang/Basic/Cuda.h"
  11. #include "clang/Config/config.h"
  12. #include "clang/Driver/Compilation.h"
  13. #include "clang/Driver/Distro.h"
  14. #include "clang/Driver/Driver.h"
  15. #include "clang/Driver/DriverDiagnostic.h"
  16. #include "clang/Driver/InputInfo.h"
  17. #include "clang/Driver/Options.h"
  18. #include "llvm/ADT/StringExtras.h"
  19. #include "llvm/Option/ArgList.h"
  20. #include "llvm/Support/FileSystem.h"
  21. #include "llvm/Support/FormatAdapters.h"
  22. #include "llvm/Support/FormatVariadic.h"
  23. #include "llvm/Support/Host.h"
  24. #include "llvm/Support/Path.h"
  25. #include "llvm/Support/Process.h"
  26. #include "llvm/Support/Program.h"
  27. #include "llvm/Support/TargetParser.h"
  28. #include "llvm/Support/VirtualFileSystem.h"
  29. #include <system_error>
  30. using namespace clang::driver;
  31. using namespace clang::driver::toolchains;
  32. using namespace clang::driver::tools;
  33. using namespace clang;
  34. using namespace llvm::opt;
  35. namespace {
  36. CudaVersion getCudaVersion(uint32_t raw_version) {
  37. if (raw_version < 7050)
  38. return CudaVersion::CUDA_70;
  39. if (raw_version < 8000)
  40. return CudaVersion::CUDA_75;
  41. if (raw_version < 9000)
  42. return CudaVersion::CUDA_80;
  43. if (raw_version < 9010)
  44. return CudaVersion::CUDA_90;
  45. if (raw_version < 9020)
  46. return CudaVersion::CUDA_91;
  47. if (raw_version < 10000)
  48. return CudaVersion::CUDA_92;
  49. if (raw_version < 10010)
  50. return CudaVersion::CUDA_100;
  51. if (raw_version < 10020)
  52. return CudaVersion::CUDA_101;
  53. if (raw_version < 11000)
  54. return CudaVersion::CUDA_102;
  55. if (raw_version < 11010)
  56. return CudaVersion::CUDA_110;
  57. if (raw_version < 11020)
  58. return CudaVersion::CUDA_111;
  59. if (raw_version < 11030)
  60. return CudaVersion::CUDA_112;
  61. if (raw_version < 11040)
  62. return CudaVersion::CUDA_113;
  63. if (raw_version < 11050)
  64. return CudaVersion::CUDA_114;
  65. if (raw_version < 11060)
  66. return CudaVersion::CUDA_115;
  67. if (raw_version < 11070)
  68. return CudaVersion::CUDA_116;
  69. if (raw_version < 11080)
  70. return CudaVersion::CUDA_117;
  71. if (raw_version < 11090)
  72. return CudaVersion::CUDA_118;
  73. return CudaVersion::NEW;
  74. }
  75. CudaVersion parseCudaHFile(llvm::StringRef Input) {
  76. // Helper lambda which skips the words if the line starts with them or returns
  77. // std::nullopt otherwise.
  78. auto StartsWithWords =
  79. [](llvm::StringRef Line,
  80. const SmallVector<StringRef, 3> words) -> std::optional<StringRef> {
  81. for (StringRef word : words) {
  82. if (!Line.consume_front(word))
  83. return {};
  84. Line = Line.ltrim();
  85. }
  86. return Line;
  87. };
  88. Input = Input.ltrim();
  89. while (!Input.empty()) {
  90. if (auto Line =
  91. StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
  92. uint32_t RawVersion;
  93. Line->consumeInteger(10, RawVersion);
  94. return getCudaVersion(RawVersion);
  95. }
  96. // Find next non-empty line.
  97. Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
  98. }
  99. return CudaVersion::UNKNOWN;
  100. }
  101. } // namespace
  102. void CudaInstallationDetector::WarnIfUnsupportedVersion() {
  103. if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
  104. std::string VersionString = CudaVersionToString(Version);
  105. if (!VersionString.empty())
  106. VersionString.insert(0, " ");
  107. D.Diag(diag::warn_drv_new_cuda_version)
  108. << VersionString
  109. << (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
  110. << CudaVersionToString(CudaVersion::PARTIALLY_SUPPORTED);
  111. } else if (Version > CudaVersion::FULLY_SUPPORTED)
  112. D.Diag(diag::warn_drv_partially_supported_cuda_version)
  113. << CudaVersionToString(Version);
  114. }
  115. CudaInstallationDetector::CudaInstallationDetector(
  116. const Driver &D, const llvm::Triple &HostTriple,
  117. const llvm::opt::ArgList &Args)
  118. : D(D) {
  119. struct Candidate {
  120. std::string Path;
  121. bool StrictChecking;
  122. Candidate(std::string Path, bool StrictChecking = false)
  123. : Path(Path), StrictChecking(StrictChecking) {}
  124. };
  125. SmallVector<Candidate, 4> Candidates;
  126. // In decreasing order so we prefer newer versions to older versions.
  127. std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
  128. auto &FS = D.getVFS();
  129. if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
  130. Candidates.emplace_back(
  131. Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
  132. } else if (HostTriple.isOSWindows()) {
  133. for (const char *Ver : Versions)
  134. Candidates.emplace_back(
  135. D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
  136. Ver);
  137. } else {
  138. if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
  139. // Try to find ptxas binary. If the executable is located in a directory
  140. // called 'bin/', its parent directory might be a good guess for a valid
  141. // CUDA installation.
  142. // However, some distributions might installs 'ptxas' to /usr/bin. In that
  143. // case the candidate would be '/usr' which passes the following checks
  144. // because '/usr/include' exists as well. To avoid this case, we always
  145. // check for the directory potentially containing files for libdevice,
  146. // even if the user passes -nocudalib.
  147. if (llvm::ErrorOr<std::string> ptxas =
  148. llvm::sys::findProgramByName("ptxas")) {
  149. SmallString<256> ptxasAbsolutePath;
  150. llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
  151. StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
  152. if (llvm::sys::path::filename(ptxasDir) == "bin")
  153. Candidates.emplace_back(
  154. std::string(llvm::sys::path::parent_path(ptxasDir)),
  155. /*StrictChecking=*/true);
  156. }
  157. }
  158. Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
  159. for (const char *Ver : Versions)
  160. Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
  161. Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
  162. if (Dist.IsDebian() || Dist.IsUbuntu())
  163. // Special case for Debian to have nvidia-cuda-toolkit work
  164. // out of the box. More info on http://bugs.debian.org/882505
  165. Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
  166. }
  167. bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
  168. for (const auto &Candidate : Candidates) {
  169. InstallPath = Candidate.Path;
  170. if (InstallPath.empty() || !FS.exists(InstallPath))
  171. continue;
  172. BinPath = InstallPath + "/bin";
  173. IncludePath = InstallPath + "/include";
  174. LibDevicePath = InstallPath + "/nvvm/libdevice";
  175. if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
  176. continue;
  177. bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
  178. if (CheckLibDevice && !FS.exists(LibDevicePath))
  179. continue;
  180. Version = CudaVersion::UNKNOWN;
  181. if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
  182. Version = parseCudaHFile((*CudaHFile)->getBuffer());
  183. // As the last resort, make an educated guess between CUDA-7.0, which had
  184. // old-style libdevice bitcode, and an unknown recent CUDA version.
  185. if (Version == CudaVersion::UNKNOWN) {
  186. Version = FS.exists(LibDevicePath + "/libdevice.10.bc")
  187. ? CudaVersion::NEW
  188. : CudaVersion::CUDA_70;
  189. }
  190. if (Version >= CudaVersion::CUDA_90) {
  191. // CUDA-9+ uses single libdevice file for all GPU variants.
  192. std::string FilePath = LibDevicePath + "/libdevice.10.bc";
  193. if (FS.exists(FilePath)) {
  194. for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
  195. ++Arch) {
  196. CudaArch GpuArch = static_cast<CudaArch>(Arch);
  197. if (!IsNVIDIAGpuArch(GpuArch))
  198. continue;
  199. std::string GpuArchName(CudaArchToString(GpuArch));
  200. LibDeviceMap[GpuArchName] = FilePath;
  201. }
  202. }
  203. } else {
  204. std::error_code EC;
  205. for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
  206. LE;
  207. !EC && LI != LE; LI = LI.increment(EC)) {
  208. StringRef FilePath = LI->path();
  209. StringRef FileName = llvm::sys::path::filename(FilePath);
  210. // Process all bitcode filenames that look like
  211. // libdevice.compute_XX.YY.bc
  212. const StringRef LibDeviceName = "libdevice.";
  213. if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
  214. continue;
  215. StringRef GpuArch = FileName.slice(
  216. LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
  217. LibDeviceMap[GpuArch] = FilePath.str();
  218. // Insert map entries for specific devices with this compute
  219. // capability. NVCC's choice of the libdevice library version is
  220. // rather peculiar and depends on the CUDA version.
  221. if (GpuArch == "compute_20") {
  222. LibDeviceMap["sm_20"] = std::string(FilePath);
  223. LibDeviceMap["sm_21"] = std::string(FilePath);
  224. LibDeviceMap["sm_32"] = std::string(FilePath);
  225. } else if (GpuArch == "compute_30") {
  226. LibDeviceMap["sm_30"] = std::string(FilePath);
  227. if (Version < CudaVersion::CUDA_80) {
  228. LibDeviceMap["sm_50"] = std::string(FilePath);
  229. LibDeviceMap["sm_52"] = std::string(FilePath);
  230. LibDeviceMap["sm_53"] = std::string(FilePath);
  231. }
  232. LibDeviceMap["sm_60"] = std::string(FilePath);
  233. LibDeviceMap["sm_61"] = std::string(FilePath);
  234. LibDeviceMap["sm_62"] = std::string(FilePath);
  235. } else if (GpuArch == "compute_35") {
  236. LibDeviceMap["sm_35"] = std::string(FilePath);
  237. LibDeviceMap["sm_37"] = std::string(FilePath);
  238. } else if (GpuArch == "compute_50") {
  239. if (Version >= CudaVersion::CUDA_80) {
  240. LibDeviceMap["sm_50"] = std::string(FilePath);
  241. LibDeviceMap["sm_52"] = std::string(FilePath);
  242. LibDeviceMap["sm_53"] = std::string(FilePath);
  243. }
  244. }
  245. }
  246. }
  247. // Check that we have found at least one libdevice that we can link in if
  248. // -nocudalib hasn't been specified.
  249. if (LibDeviceMap.empty() && !NoCudaLib)
  250. continue;
  251. IsValid = true;
  252. break;
  253. }
  254. }
  255. void CudaInstallationDetector::AddCudaIncludeArgs(
  256. const ArgList &DriverArgs, ArgStringList &CC1Args) const {
  257. if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
  258. // Add cuda_wrappers/* to our system include path. This lets us wrap
  259. // standard library headers.
  260. SmallString<128> P(D.ResourceDir);
  261. llvm::sys::path::append(P, "include");
  262. llvm::sys::path::append(P, "cuda_wrappers");
  263. CC1Args.push_back("-internal-isystem");
  264. CC1Args.push_back(DriverArgs.MakeArgString(P));
  265. }
  266. if (DriverArgs.hasArg(options::OPT_nogpuinc))
  267. return;
  268. if (!isValid()) {
  269. D.Diag(diag::err_drv_no_cuda_installation);
  270. return;
  271. }
  272. CC1Args.push_back("-include");
  273. CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
  274. }
  275. void CudaInstallationDetector::CheckCudaVersionSupportsArch(
  276. CudaArch Arch) const {
  277. if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
  278. ArchsWithBadVersion[(int)Arch])
  279. return;
  280. auto MinVersion = MinVersionForCudaArch(Arch);
  281. auto MaxVersion = MaxVersionForCudaArch(Arch);
  282. if (Version < MinVersion || Version > MaxVersion) {
  283. ArchsWithBadVersion[(int)Arch] = true;
  284. D.Diag(diag::err_drv_cuda_version_unsupported)
  285. << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
  286. << CudaVersionToString(MaxVersion) << InstallPath
  287. << CudaVersionToString(Version);
  288. }
  289. }
  290. void CudaInstallationDetector::print(raw_ostream &OS) const {
  291. if (isValid())
  292. OS << "Found CUDA installation: " << InstallPath << ", version "
  293. << CudaVersionToString(Version) << "\n";
  294. }
  295. namespace {
  296. /// Debug info level for the NVPTX devices. We may need to emit different debug
  297. /// info level for the host and for the device itselfi. This type controls
  298. /// emission of the debug info for the devices. It either prohibits disable info
  299. /// emission completely, or emits debug directives only, or emits same debug
  300. /// info as for the host.
  301. enum DeviceDebugInfoLevel {
  302. DisableDebugInfo, /// Do not emit debug info for the devices.
  303. DebugDirectivesOnly, /// Emit only debug directives.
  304. EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
  305. /// host.
  306. };
  307. } // anonymous namespace
  308. /// Define debug info level for the NVPTX devices. If the debug info for both
  309. /// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
  310. /// only debug directives are requested for the both host and device
  311. /// (-gline-directvies-only), or the debug info only for the device is disabled
  312. /// (optimization is on and --cuda-noopt-device-debug was not specified), the
  313. /// debug directves only must be emitted for the device. Otherwise, use the same
  314. /// debug info level just like for the host (with the limitations of only
  315. /// supported DWARF2 standard).
  316. static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
  317. const Arg *A = Args.getLastArg(options::OPT_O_Group);
  318. bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
  319. Args.hasFlag(options::OPT_cuda_noopt_device_debug,
  320. options::OPT_no_cuda_noopt_device_debug,
  321. /*Default=*/false);
  322. if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
  323. const Option &Opt = A->getOption();
  324. if (Opt.matches(options::OPT_gN_Group)) {
  325. if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
  326. return DisableDebugInfo;
  327. if (Opt.matches(options::OPT_gline_directives_only))
  328. return DebugDirectivesOnly;
  329. }
  330. return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
  331. }
  332. return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
  333. }
  334. void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
  335. const InputInfo &Output,
  336. const InputInfoList &Inputs,
  337. const ArgList &Args,
  338. const char *LinkingOutput) const {
  339. const auto &TC =
  340. static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
  341. assert(TC.getTriple().isNVPTX() && "Wrong platform");
  342. StringRef GPUArchName;
  343. // If this is a CUDA action we need to extract the device architecture
  344. // from the Job's associated architecture, otherwise use the -march=arch
  345. // option. This option may come from -Xopenmp-target flag or the default
  346. // value.
  347. if (JA.isDeviceOffloading(Action::OFK_Cuda)) {
  348. GPUArchName = JA.getOffloadingArch();
  349. } else {
  350. GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
  351. assert(!GPUArchName.empty() && "Must have an architecture passed in.");
  352. }
  353. // Obtain architecture from the action.
  354. CudaArch gpu_arch = StringToCudaArch(GPUArchName);
  355. assert(gpu_arch != CudaArch::UNKNOWN &&
  356. "Device action expected to have an architecture.");
  357. // Check that our installation's ptxas supports gpu_arch.
  358. if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
  359. TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
  360. }
  361. ArgStringList CmdArgs;
  362. CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
  363. DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
  364. if (DIKind == EmitSameDebugInfoAsHost) {
  365. // ptxas does not accept -g option if optimization is enabled, so
  366. // we ignore the compiler's -O* options if we want debug info.
  367. CmdArgs.push_back("-g");
  368. CmdArgs.push_back("--dont-merge-basicblocks");
  369. CmdArgs.push_back("--return-at-end");
  370. } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
  371. // Map the -O we received to -O{0,1,2,3}.
  372. //
  373. // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
  374. // default, so it may correspond more closely to the spirit of clang -O2.
  375. // -O3 seems like the least-bad option when -Osomething is specified to
  376. // clang but it isn't handled below.
  377. StringRef OOpt = "3";
  378. if (A->getOption().matches(options::OPT_O4) ||
  379. A->getOption().matches(options::OPT_Ofast))
  380. OOpt = "3";
  381. else if (A->getOption().matches(options::OPT_O0))
  382. OOpt = "0";
  383. else if (A->getOption().matches(options::OPT_O)) {
  384. // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
  385. OOpt = llvm::StringSwitch<const char *>(A->getValue())
  386. .Case("1", "1")
  387. .Case("2", "2")
  388. .Case("3", "3")
  389. .Case("s", "2")
  390. .Case("z", "2")
  391. .Default("2");
  392. }
  393. CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
  394. } else {
  395. // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
  396. // to no optimizations, but ptxas's default is -O3.
  397. CmdArgs.push_back("-O0");
  398. }
  399. if (DIKind == DebugDirectivesOnly)
  400. CmdArgs.push_back("-lineinfo");
  401. // Pass -v to ptxas if it was passed to the driver.
  402. if (Args.hasArg(options::OPT_v))
  403. CmdArgs.push_back("-v");
  404. CmdArgs.push_back("--gpu-name");
  405. CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
  406. CmdArgs.push_back("--output-file");
  407. std::string OutputFileName = TC.getInputFilename(Output);
  408. // If we are invoking `nvlink` internally we need to output a `.cubin` file.
  409. // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
  410. if (!C.getInputArgs().getLastArg(options::OPT_c)) {
  411. SmallString<256> Filename(Output.getFilename());
  412. llvm::sys::path::replace_extension(Filename, "cubin");
  413. OutputFileName = Filename.str();
  414. }
  415. if (Output.isFilename() && OutputFileName != Output.getFilename())
  416. C.addTempFile(Args.MakeArgString(OutputFileName));
  417. CmdArgs.push_back(Args.MakeArgString(OutputFileName));
  418. for (const auto &II : Inputs)
  419. CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
  420. for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
  421. CmdArgs.push_back(Args.MakeArgString(A));
  422. bool Relocatable;
  423. if (JA.isOffloading(Action::OFK_OpenMP))
  424. // In OpenMP we need to generate relocatable code.
  425. Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
  426. options::OPT_fnoopenmp_relocatable_target,
  427. /*Default=*/true);
  428. else if (JA.isOffloading(Action::OFK_Cuda))
  429. // In CUDA we generate relocatable code by default.
  430. Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
  431. /*Default=*/false);
  432. else
  433. // Otherwise, we are compiling directly and should create linkable output.
  434. Relocatable = true;
  435. if (Relocatable)
  436. CmdArgs.push_back("-c");
  437. const char *Exec;
  438. if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
  439. Exec = A->getValue();
  440. else
  441. Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
  442. C.addCommand(std::make_unique<Command>(
  443. JA, *this,
  444. ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
  445. "--options-file"},
  446. Exec, CmdArgs, Inputs, Output));
  447. }
  448. static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
  449. bool includePTX = true;
  450. for (Arg *A : Args) {
  451. if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
  452. A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
  453. continue;
  454. A->claim();
  455. const StringRef ArchStr = A->getValue();
  456. if (ArchStr == "all" || ArchStr == gpu_arch) {
  457. includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
  458. continue;
  459. }
  460. }
  461. return includePTX;
  462. }
  463. // All inputs to this linker must be from CudaDeviceActions, as we need to look
  464. // at the Inputs' Actions in order to figure out which GPU architecture they
  465. // correspond to.
  466. void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
  467. const InputInfo &Output,
  468. const InputInfoList &Inputs,
  469. const ArgList &Args,
  470. const char *LinkingOutput) const {
  471. const auto &TC =
  472. static_cast<const toolchains::CudaToolChain &>(getToolChain());
  473. assert(TC.getTriple().isNVPTX() && "Wrong platform");
  474. ArgStringList CmdArgs;
  475. if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
  476. CmdArgs.push_back("--cuda");
  477. CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
  478. CmdArgs.push_back(Args.MakeArgString("--create"));
  479. CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
  480. if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
  481. CmdArgs.push_back("-g");
  482. for (const auto &II : Inputs) {
  483. auto *A = II.getAction();
  484. assert(A->getInputs().size() == 1 &&
  485. "Device offload action is expected to have a single input");
  486. const char *gpu_arch_str = A->getOffloadingArch();
  487. assert(gpu_arch_str &&
  488. "Device action expected to have associated a GPU architecture!");
  489. CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
  490. if (II.getType() == types::TY_PP_Asm &&
  491. !shouldIncludePTX(Args, gpu_arch_str))
  492. continue;
  493. // We need to pass an Arch of the form "sm_XX" for cubin files and
  494. // "compute_XX" for ptx.
  495. const char *Arch = (II.getType() == types::TY_PP_Asm)
  496. ? CudaArchToVirtualArchString(gpu_arch)
  497. : gpu_arch_str;
  498. CmdArgs.push_back(
  499. Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
  500. ",file=" + getToolChain().getInputFilename(II)));
  501. }
  502. for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
  503. CmdArgs.push_back(Args.MakeArgString(A));
  504. const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
  505. C.addCommand(std::make_unique<Command>(
  506. JA, *this,
  507. ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
  508. "--options-file"},
  509. Exec, CmdArgs, Inputs, Output));
  510. }
  511. void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
  512. const InputInfo &Output,
  513. const InputInfoList &Inputs,
  514. const ArgList &Args,
  515. const char *LinkingOutput) const {
  516. const auto &TC =
  517. static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
  518. assert(TC.getTriple().isNVPTX() && "Wrong platform");
  519. ArgStringList CmdArgs;
  520. if (Output.isFilename()) {
  521. CmdArgs.push_back("-o");
  522. CmdArgs.push_back(Output.getFilename());
  523. } else {
  524. assert(Output.isNothing() && "Invalid output.");
  525. }
  526. if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
  527. CmdArgs.push_back("-g");
  528. if (Args.hasArg(options::OPT_v))
  529. CmdArgs.push_back("-v");
  530. StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
  531. assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
  532. CmdArgs.push_back("-arch");
  533. CmdArgs.push_back(Args.MakeArgString(GPUArch));
  534. // Add paths specified in LIBRARY_PATH environment variable as -L options.
  535. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
  536. // Add paths for the default clang library path.
  537. SmallString<256> DefaultLibPath =
  538. llvm::sys::path::parent_path(TC.getDriver().Dir);
  539. llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
  540. CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
  541. for (const auto &II : Inputs) {
  542. if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
  543. II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) {
  544. C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
  545. << getToolChain().getTripleString();
  546. continue;
  547. }
  548. // Currently, we only pass the input files to the linker, we do not pass
  549. // any libraries that may be valid only for the host.
  550. if (!II.isFilename())
  551. continue;
  552. // The 'nvlink' application performs RDC-mode linking when given a '.o'
  553. // file and device linking when given a '.cubin' file. We always want to
  554. // perform device linking, so just rename any '.o' files.
  555. // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
  556. auto InputFile = getToolChain().getInputFilename(II);
  557. if (llvm::sys::path::extension(InputFile) != ".cubin") {
  558. // If there are no actions above this one then this is direct input and we
  559. // can copy it. Otherwise the input is internal so a `.cubin` file should
  560. // exist.
  561. if (II.getAction() && II.getAction()->getInputs().size() == 0) {
  562. const char *CubinF =
  563. Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
  564. llvm::sys::path::stem(InputFile), "cubin"));
  565. if (std::error_code EC =
  566. llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
  567. continue;
  568. CmdArgs.push_back(CubinF);
  569. } else {
  570. SmallString<256> Filename(InputFile);
  571. llvm::sys::path::replace_extension(Filename, "cubin");
  572. CmdArgs.push_back(Args.MakeArgString(Filename));
  573. }
  574. } else {
  575. CmdArgs.push_back(Args.MakeArgString(InputFile));
  576. }
  577. }
  578. C.addCommand(std::make_unique<Command>(
  579. JA, *this,
  580. ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
  581. "--options-file"},
  582. Args.MakeArgString(getToolChain().GetProgramPath("nvlink")), CmdArgs,
  583. Inputs, Output));
  584. }
  585. void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
  586. const llvm::opt::ArgList &Args,
  587. std::vector<StringRef> &Features) {
  588. if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
  589. StringRef PtxFeature =
  590. Args.getLastArgValue(options::OPT_cuda_feature_EQ, "+ptx42");
  591. Features.push_back(Args.MakeArgString(PtxFeature));
  592. return;
  593. }
  594. CudaInstallationDetector CudaInstallation(D, Triple, Args);
  595. // New CUDA versions often introduce new instructions that are only supported
  596. // by new PTX version, so we need to raise PTX level to enable them in NVPTX
  597. // back-end.
  598. const char *PtxFeature = nullptr;
  599. switch (CudaInstallation.version()) {
  600. #define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
  601. case CudaVersion::CUDA_##CUDA_VER: \
  602. PtxFeature = "+ptx" #PTX_VER; \
  603. break;
  604. CASE_CUDA_VERSION(118, 78);
  605. CASE_CUDA_VERSION(117, 77);
  606. CASE_CUDA_VERSION(116, 76);
  607. CASE_CUDA_VERSION(115, 75);
  608. CASE_CUDA_VERSION(114, 74);
  609. CASE_CUDA_VERSION(113, 73);
  610. CASE_CUDA_VERSION(112, 72);
  611. CASE_CUDA_VERSION(111, 71);
  612. CASE_CUDA_VERSION(110, 70);
  613. CASE_CUDA_VERSION(102, 65);
  614. CASE_CUDA_VERSION(101, 64);
  615. CASE_CUDA_VERSION(100, 63);
  616. CASE_CUDA_VERSION(92, 61);
  617. CASE_CUDA_VERSION(91, 61);
  618. CASE_CUDA_VERSION(90, 60);
  619. #undef CASE_CUDA_VERSION
  620. default:
  621. PtxFeature = "+ptx42";
  622. }
  623. Features.push_back(PtxFeature);
  624. }
  625. /// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
  626. /// operates as a stand-alone version of the NVPTX tools without the host
  627. /// toolchain.
  628. NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
  629. const llvm::Triple &HostTriple,
  630. const ArgList &Args)
  631. : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
  632. if (CudaInstallation.isValid()) {
  633. CudaInstallation.WarnIfUnsupportedVersion();
  634. getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
  635. }
  636. // Lookup binaries into the driver directory, this is used to
  637. // discover the clang-offload-bundler executable.
  638. getProgramPaths().push_back(getDriver().Dir);
  639. }
  640. /// We only need the host triple to locate the CUDA binary utilities, use the
  641. /// system's default triple if not provided.
  642. NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
  643. const ArgList &Args)
  644. : NVPTXToolChain(D, Triple,
  645. llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args) {}
  646. llvm::opt::DerivedArgList *
  647. NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
  648. StringRef BoundArch,
  649. Action::OffloadKind DeviceOffloadKind) const {
  650. DerivedArgList *DAL =
  651. ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
  652. if (!DAL)
  653. DAL = new DerivedArgList(Args.getBaseArgs());
  654. const OptTable &Opts = getDriver().getOpts();
  655. for (Arg *A : Args)
  656. if (!llvm::is_contained(*DAL, A))
  657. DAL->append(A);
  658. if (!DAL->hasArg(options::OPT_march_EQ))
  659. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
  660. CudaArchToString(CudaArch::CudaDefault));
  661. return DAL;
  662. }
  663. bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
  664. const Option &O = A->getOption();
  665. return (O.matches(options::OPT_gN_Group) &&
  666. !O.matches(options::OPT_gmodules)) ||
  667. O.matches(options::OPT_g_Flag) ||
  668. O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
  669. O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
  670. O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
  671. O.matches(options::OPT_gdwarf_5) ||
  672. O.matches(options::OPT_gcolumn_info);
  673. }
  674. void NVPTXToolChain::adjustDebugInfoKind(
  675. codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
  676. switch (mustEmitDebugInfo(Args)) {
  677. case DisableDebugInfo:
  678. DebugInfoKind = codegenoptions::NoDebugInfo;
  679. break;
  680. case DebugDirectivesOnly:
  681. DebugInfoKind = codegenoptions::DebugDirectivesOnly;
  682. break;
  683. case EmitSameDebugInfoAsHost:
  684. // Use same debug info level as the host.
  685. break;
  686. }
  687. }
  688. /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
  689. /// which isn't properly a linker but nonetheless performs the step of stitching
  690. /// together object files from the assembler into a single blob.
  691. CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
  692. const ToolChain &HostTC, const ArgList &Args)
  693. : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
  694. void CudaToolChain::addClangTargetOptions(
  695. const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
  696. Action::OffloadKind DeviceOffloadingKind) const {
  697. HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
  698. StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
  699. assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
  700. assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
  701. DeviceOffloadingKind == Action::OFK_Cuda) &&
  702. "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
  703. if (DeviceOffloadingKind == Action::OFK_Cuda) {
  704. CC1Args.append(
  705. {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
  706. if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
  707. options::OPT_fno_cuda_approx_transcendentals, false))
  708. CC1Args.push_back("-fcuda-approx-transcendentals");
  709. }
  710. if (DriverArgs.hasArg(options::OPT_nogpulib))
  711. return;
  712. if (DeviceOffloadingKind == Action::OFK_OpenMP &&
  713. DriverArgs.hasArg(options::OPT_S))
  714. return;
  715. std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
  716. if (LibDeviceFile.empty()) {
  717. getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
  718. return;
  719. }
  720. CC1Args.push_back("-mlink-builtin-bitcode");
  721. CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
  722. clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
  723. if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
  724. options::OPT_fno_cuda_short_ptr, false))
  725. CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
  726. if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
  727. CC1Args.push_back(
  728. DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
  729. CudaVersionToString(CudaInstallationVersion)));
  730. if (DeviceOffloadingKind == Action::OFK_OpenMP) {
  731. if (CudaInstallationVersion < CudaVersion::CUDA_92) {
  732. getDriver().Diag(
  733. diag::err_drv_omp_offload_target_cuda_version_not_support)
  734. << CudaVersionToString(CudaInstallationVersion);
  735. return;
  736. }
  737. // Link the bitcode library late if we're using device LTO.
  738. if (getDriver().isUsingLTO(/* IsOffload */ true))
  739. return;
  740. addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
  741. getTriple());
  742. }
  743. }
  744. llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
  745. const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
  746. const llvm::fltSemantics *FPType) const {
  747. if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
  748. if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
  749. DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
  750. options::OPT_fno_gpu_flush_denormals_to_zero, false))
  751. return llvm::DenormalMode::getPreserveSign();
  752. }
  753. assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
  754. return llvm::DenormalMode::getIEEE();
  755. }
  756. void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
  757. ArgStringList &CC1Args) const {
  758. // Check our CUDA version if we're going to include the CUDA headers.
  759. if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
  760. !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
  761. StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
  762. assert(!Arch.empty() && "Must have an explicit GPU arch.");
  763. CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
  764. }
  765. CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
  766. }
  767. std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
  768. // Only object files are changed, for example assembly files keep their .s
  769. // extensions. If the user requested device-only compilation don't change it.
  770. if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
  771. return ToolChain::getInputFilename(Input);
  772. // Replace extension for object files with cubin because nvlink relies on
  773. // these particular file names.
  774. SmallString<256> Filename(ToolChain::getInputFilename(Input));
  775. llvm::sys::path::replace_extension(Filename, "cubin");
  776. return std::string(Filename.str());
  777. }
  778. llvm::opt::DerivedArgList *
  779. CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
  780. StringRef BoundArch,
  781. Action::OffloadKind DeviceOffloadKind) const {
  782. DerivedArgList *DAL =
  783. HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
  784. if (!DAL)
  785. DAL = new DerivedArgList(Args.getBaseArgs());
  786. const OptTable &Opts = getDriver().getOpts();
  787. // For OpenMP device offloading, append derived arguments. Make sure
  788. // flags are not duplicated.
  789. // Also append the compute capability.
  790. if (DeviceOffloadKind == Action::OFK_OpenMP) {
  791. for (Arg *A : Args)
  792. if (!llvm::is_contained(*DAL, A))
  793. DAL->append(A);
  794. if (!DAL->hasArg(options::OPT_march_EQ)) {
  795. StringRef Arch = BoundArch;
  796. if (Arch.empty()) {
  797. auto ArchsOrErr = getSystemGPUArchs(Args);
  798. if (!ArchsOrErr) {
  799. std::string ErrMsg =
  800. llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
  801. getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
  802. << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
  803. Arch = CudaArchToString(CudaArch::CudaDefault);
  804. } else {
  805. Arch = Args.MakeArgString(ArchsOrErr->front());
  806. }
  807. }
  808. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
  809. }
  810. return DAL;
  811. }
  812. for (Arg *A : Args) {
  813. DAL->append(A);
  814. }
  815. if (!BoundArch.empty()) {
  816. DAL->eraseArg(options::OPT_march_EQ);
  817. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
  818. BoundArch);
  819. }
  820. return DAL;
  821. }
  822. Expected<SmallVector<std::string>>
  823. CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
  824. // Detect NVIDIA GPUs availible on the system.
  825. std::string Program;
  826. if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
  827. Program = A->getValue();
  828. else
  829. Program = GetProgramPath("nvptx-arch");
  830. auto StdoutOrErr = executeToolChainProgram(Program);
  831. if (!StdoutOrErr)
  832. return StdoutOrErr.takeError();
  833. SmallVector<std::string, 1> GPUArchs;
  834. for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
  835. if (!Arch.empty())
  836. GPUArchs.push_back(Arch.str());
  837. if (GPUArchs.empty())
  838. return llvm::createStringError(std::error_code(),
  839. "No NVIDIA GPU detected in the system");
  840. return std::move(GPUArchs);
  841. }
  842. Tool *NVPTXToolChain::buildAssembler() const {
  843. return new tools::NVPTX::Assembler(*this);
  844. }
  845. Tool *NVPTXToolChain::buildLinker() const {
  846. return new tools::NVPTX::Linker(*this);
  847. }
  848. Tool *CudaToolChain::buildAssembler() const {
  849. return new tools::NVPTX::Assembler(*this);
  850. }
  851. Tool *CudaToolChain::buildLinker() const {
  852. return new tools::NVPTX::FatBinary(*this);
  853. }
  854. void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
  855. HostTC.addClangWarningOptions(CC1Args);
  856. }
  857. ToolChain::CXXStdlibType
  858. CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
  859. return HostTC.GetCXXStdlibType(Args);
  860. }
  861. void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
  862. ArgStringList &CC1Args) const {
  863. HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
  864. if (!DriverArgs.hasArg(options::OPT_nogpuinc) && CudaInstallation.isValid())
  865. CC1Args.append(
  866. {"-internal-isystem",
  867. DriverArgs.MakeArgString(CudaInstallation.getIncludePath())});
  868. }
  869. void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
  870. ArgStringList &CC1Args) const {
  871. HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
  872. }
  873. void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
  874. ArgStringList &CC1Args) const {
  875. HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
  876. }
  877. SanitizerMask CudaToolChain::getSupportedSanitizers() const {
  878. // The CudaToolChain only supports sanitizers in the sense that it allows
  879. // sanitizer arguments on the command line if they are supported by the host
  880. // toolchain. The CudaToolChain will actually ignore any command line
  881. // arguments for any of these "supported" sanitizers. That means that no
  882. // sanitization of device code is actually supported at this time.
  883. //
  884. // This behavior is necessary because the host and device toolchains
  885. // invocations often share the command line, so the device toolchain must
  886. // tolerate flags meant only for the host toolchain.
  887. return HostTC.getSupportedSanitizers();
  888. }
  889. VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
  890. const ArgList &Args) const {
  891. return HostTC.computeMSVCVersion(D, Args);
  892. }