HIPAMD.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "HIPAMD.h"
  9. #include "AMDGPU.h"
  10. #include "CommonArgs.h"
  11. #include "HIPUtility.h"
  12. #include "clang/Basic/Cuda.h"
  13. #include "clang/Basic/TargetID.h"
  14. #include "clang/Driver/Compilation.h"
  15. #include "clang/Driver/Driver.h"
  16. #include "clang/Driver/DriverDiagnostic.h"
  17. #include "clang/Driver/InputInfo.h"
  18. #include "clang/Driver/Options.h"
  19. #include "clang/Driver/SanitizerArgs.h"
  20. #include "llvm/Support/Alignment.h"
  21. #include "llvm/Support/FileSystem.h"
  22. #include "llvm/Support/Path.h"
  23. #include "llvm/Support/TargetParser.h"
  24. using namespace clang::driver;
  25. using namespace clang::driver::toolchains;
  26. using namespace clang::driver::tools;
  27. using namespace clang;
  28. using namespace llvm::opt;
  29. #if defined(_WIN32) || defined(_WIN64)
  30. #define NULL_FILE "nul"
  31. #else
  32. #define NULL_FILE "/dev/null"
  33. #endif
  34. static bool shouldSkipSanitizeOption(const ToolChain &TC,
  35. const llvm::opt::ArgList &DriverArgs,
  36. StringRef TargetID,
  37. const llvm::opt::Arg *A) {
  38. // For actions without targetID, do nothing.
  39. if (TargetID.empty())
  40. return false;
  41. Option O = A->getOption();
  42. if (!O.matches(options::OPT_fsanitize_EQ))
  43. return false;
  44. if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
  45. options::OPT_fno_gpu_sanitize, true))
  46. return true;
  47. auto &Diags = TC.getDriver().getDiags();
  48. // For simplicity, we only allow -fsanitize=address
  49. SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
  50. if (K != SanitizerKind::Address)
  51. return true;
  52. llvm::StringMap<bool> FeatureMap;
  53. auto OptionalGpuArch = parseTargetID(TC.getTriple(), TargetID, &FeatureMap);
  54. assert(OptionalGpuArch && "Invalid Target ID");
  55. (void)OptionalGpuArch;
  56. auto Loc = FeatureMap.find("xnack");
  57. if (Loc == FeatureMap.end() || !Loc->second) {
  58. Diags.Report(
  59. clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature)
  60. << A->getAsString(DriverArgs) << TargetID << "xnack+";
  61. return true;
  62. }
  63. return false;
  64. }
  65. void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
  66. const JobAction &JA,
  67. const InputInfoList &Inputs,
  68. const InputInfo &Output,
  69. const llvm::opt::ArgList &Args) const {
  70. // Construct llvm-link command.
  71. // The output from llvm-link is a bitcode file.
  72. ArgStringList LlvmLinkArgs;
  73. assert(!Inputs.empty() && "Must have at least one input.");
  74. LlvmLinkArgs.append({"-o", Output.getFilename()});
  75. for (auto Input : Inputs)
  76. LlvmLinkArgs.push_back(Input.getFilename());
  77. // Look for archive of bundled bitcode in arguments, and add temporary files
  78. // for the extracted archive of bitcode to inputs.
  79. auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
  80. AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn",
  81. TargetID,
  82. /*IsBitCodeSDL=*/true,
  83. /*PostClangLink=*/false);
  84. const char *LlvmLink =
  85. Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
  86. C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
  87. LlvmLink, LlvmLinkArgs, Inputs,
  88. Output));
  89. }
  90. void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
  91. const InputInfoList &Inputs,
  92. const InputInfo &Output,
  93. const llvm::opt::ArgList &Args) const {
  94. // Construct lld command.
  95. // The output from ld.lld is an HSA code object file.
  96. ArgStringList LldArgs{"-flavor",
  97. "gnu",
  98. "-m",
  99. "elf64_amdgpu",
  100. "--no-undefined",
  101. "-shared",
  102. "-plugin-opt=-amdgpu-internalize-symbols"};
  103. auto &TC = getToolChain();
  104. auto &D = TC.getDriver();
  105. assert(!Inputs.empty() && "Must have at least one input.");
  106. bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin;
  107. addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO);
  108. // Extract all the -m options
  109. std::vector<llvm::StringRef> Features;
  110. amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
  111. // Add features to mattr such as cumode
  112. std::string MAttrString = "-plugin-opt=-mattr=";
  113. for (auto OneFeature : unifyTargetFeatures(Features)) {
  114. MAttrString.append(Args.MakeArgString(OneFeature));
  115. if (OneFeature != Features.back())
  116. MAttrString.append(",");
  117. }
  118. if (!Features.empty())
  119. LldArgs.push_back(Args.MakeArgString(MAttrString));
  120. // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
  121. // Since AMDGPU backend currently does not support ISA-level linking, all
  122. // called functions need to be imported.
  123. if (IsThinLTO)
  124. LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));
  125. for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
  126. LldArgs.push_back(
  127. Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
  128. }
  129. if (C.getDriver().isSaveTempsEnabled())
  130. LldArgs.push_back("-save-temps");
  131. addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
  132. for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker))
  133. LldArgs.push_back(Arg->getValue(1));
  134. LldArgs.append({"-o", Output.getFilename()});
  135. for (auto Input : Inputs)
  136. LldArgs.push_back(Input.getFilename());
  137. // Look for archive of bundled bitcode in arguments, and add temporary files
  138. // for the extracted archive of bitcode to inputs.
  139. auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
  140. AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LldArgs, "amdgcn",
  141. TargetID,
  142. /*IsBitCodeSDL=*/true,
  143. /*PostClangLink=*/false);
  144. const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
  145. C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
  146. Lld, LldArgs, Inputs, Output));
  147. }
  148. // For amdgcn the inputs of the linker job are device bitcode and output is
  149. // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
  150. // llc, then lld steps.
  151. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
  152. const InputInfo &Output,
  153. const InputInfoList &Inputs,
  154. const ArgList &Args,
  155. const char *LinkingOutput) const {
  156. if (Inputs.size() > 0 &&
  157. Inputs[0].getType() == types::TY_Image &&
  158. JA.getType() == types::TY_Object)
  159. return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs,
  160. Args, JA, *this);
  161. if (JA.getType() == types::TY_HIP_FATBIN)
  162. return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs,
  163. Args, *this);
  164. if (JA.getType() == types::TY_LLVM_BC)
  165. return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
  166. return constructLldCommand(C, JA, Inputs, Output, Args);
  167. }
  168. HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple,
  169. const ToolChain &HostTC, const ArgList &Args)
  170. : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
  171. // Lookup binaries into the driver directory, this is used to
  172. // discover the clang-offload-bundler executable.
  173. getProgramPaths().push_back(getDriver().Dir);
  174. // Diagnose unsupported sanitizer options only once.
  175. if (!Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
  176. true))
  177. return;
  178. for (auto *A : Args.filtered(options::OPT_fsanitize_EQ)) {
  179. SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
  180. if (K != SanitizerKind::Address)
  181. D.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target)
  182. << A->getAsString(Args) << getTriple().str();
  183. }
  184. }
  185. void HIPAMDToolChain::addClangTargetOptions(
  186. const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
  187. Action::OffloadKind DeviceOffloadingKind) const {
  188. HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
  189. assert(DeviceOffloadingKind == Action::OFK_HIP &&
  190. "Only HIP offloading kinds are supported for GPUs.");
  191. CC1Args.push_back("-fcuda-is-device");
  192. if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
  193. options::OPT_fno_cuda_approx_transcendentals, false))
  194. CC1Args.push_back("-fcuda-approx-transcendentals");
  195. if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
  196. false))
  197. CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
  198. StringRef MaxThreadsPerBlock =
  199. DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
  200. if (!MaxThreadsPerBlock.empty()) {
  201. std::string ArgStr =
  202. (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock).str();
  203. CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
  204. }
  205. CC1Args.push_back("-fcuda-allow-variadic-functions");
  206. // Default to "hidden" visibility, as object level linking will not be
  207. // supported for the foreseeable future.
  208. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
  209. options::OPT_fvisibility_ms_compat)) {
  210. CC1Args.append({"-fvisibility=hidden"});
  211. CC1Args.push_back("-fapply-global-visibility-to-externs");
  212. }
  213. for (auto BCFile : getDeviceLibs(DriverArgs)) {
  214. CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
  215. : "-mlink-bitcode-file");
  216. CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
  217. }
  218. }
  219. llvm::opt::DerivedArgList *
  220. HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
  221. StringRef BoundArch,
  222. Action::OffloadKind DeviceOffloadKind) const {
  223. DerivedArgList *DAL =
  224. HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
  225. if (!DAL)
  226. DAL = new DerivedArgList(Args.getBaseArgs());
  227. const OptTable &Opts = getDriver().getOpts();
  228. for (Arg *A : Args) {
  229. if (!shouldSkipArgument(A) &&
  230. !shouldSkipSanitizeOption(*this, Args, BoundArch, A))
  231. DAL->append(A);
  232. }
  233. if (!BoundArch.empty()) {
  234. DAL->eraseArg(options::OPT_mcpu_EQ);
  235. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
  236. checkTargetID(*DAL);
  237. }
  238. return DAL;
  239. }
  240. Tool *HIPAMDToolChain::buildLinker() const {
  241. assert(getTriple().getArch() == llvm::Triple::amdgcn);
  242. return new tools::AMDGCN::Linker(*this);
  243. }
  244. void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
  245. HostTC.addClangWarningOptions(CC1Args);
  246. }
  247. ToolChain::CXXStdlibType
  248. HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const {
  249. return HostTC.GetCXXStdlibType(Args);
  250. }
  251. void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
  252. ArgStringList &CC1Args) const {
  253. HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
  254. }
  255. void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs(
  256. const ArgList &Args, ArgStringList &CC1Args) const {
  257. HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
  258. }
  259. void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
  260. ArgStringList &CC1Args) const {
  261. HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
  262. }
  263. void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
  264. ArgStringList &CC1Args) const {
  265. RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
  266. }
  267. SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const {
  268. // The HIPAMDToolChain only supports sanitizers in the sense that it allows
  269. // sanitizer arguments on the command line if they are supported by the host
  270. // toolchain. The HIPAMDToolChain will actually ignore any command line
  271. // arguments for any of these "supported" sanitizers. That means that no
  272. // sanitization of device code is actually supported at this time.
  273. //
  274. // This behavior is necessary because the host and device toolchains
  275. // invocations often share the command line, so the device toolchain must
  276. // tolerate flags meant only for the host toolchain.
  277. return HostTC.getSupportedSanitizers();
  278. }
  279. VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
  280. const ArgList &Args) const {
  281. return HostTC.computeMSVCVersion(D, Args);
  282. }
  283. llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
  284. HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
  285. llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
  286. if (DriverArgs.hasArg(options::OPT_nogpulib))
  287. return {};
  288. ArgStringList LibraryPaths;
  289. // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
  290. for (StringRef Path : RocmInstallation.getRocmDeviceLibPathArg())
  291. LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
  292. addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
  293. // Maintain compatability with --hip-device-lib.
  294. auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
  295. if (!BCLibArgs.empty()) {
  296. llvm::for_each(BCLibArgs, [&](StringRef BCName) {
  297. StringRef FullName;
  298. for (StringRef LibraryPath : LibraryPaths) {
  299. SmallString<128> Path(LibraryPath);
  300. llvm::sys::path::append(Path, BCName);
  301. FullName = Path;
  302. if (llvm::sys::fs::exists(FullName)) {
  303. BCLibs.push_back(FullName);
  304. return;
  305. }
  306. }
  307. getDriver().Diag(diag::err_drv_no_such_file) << BCName;
  308. });
  309. } else {
  310. if (!RocmInstallation.hasDeviceLibrary()) {
  311. getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
  312. return {};
  313. }
  314. StringRef GpuArch = getGPUArch(DriverArgs);
  315. assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
  316. // If --hip-device-lib is not set, add the default bitcode libraries.
  317. if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
  318. options::OPT_fno_gpu_sanitize, true) &&
  319. getSanitizerArgs(DriverArgs).needsAsanRt()) {
  320. auto AsanRTL = RocmInstallation.getAsanRTLPath();
  321. if (AsanRTL.empty()) {
  322. unsigned DiagID = getDriver().getDiags().getCustomDiagID(
  323. DiagnosticsEngine::Error,
  324. "AMDGPU address sanitizer runtime library (asanrtl) is not found. "
  325. "Please install ROCm device library which supports address "
  326. "sanitizer");
  327. getDriver().Diag(DiagID);
  328. return {};
  329. } else
  330. BCLibs.emplace_back(AsanRTL, /*ShouldInternalize=*/false);
  331. }
  332. // Add the HIP specific bitcode library.
  333. BCLibs.push_back(RocmInstallation.getHIPPath());
  334. // Add common device libraries like ocml etc.
  335. for (StringRef N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
  336. BCLibs.emplace_back(N);
  337. // Add instrument lib.
  338. auto InstLib =
  339. DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
  340. if (InstLib.empty())
  341. return BCLibs;
  342. if (llvm::sys::fs::exists(InstLib))
  343. BCLibs.push_back(InstLib);
  344. else
  345. getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
  346. }
  347. return BCLibs;
  348. }
  349. void HIPAMDToolChain::checkTargetID(
  350. const llvm::opt::ArgList &DriverArgs) const {
  351. auto PTID = getParsedTargetID(DriverArgs);
  352. if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
  353. getDriver().Diag(clang::diag::err_drv_bad_target_id)
  354. << *PTID.OptionalTargetID;
  355. }
  356. }