AMDGPU.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915
  1. //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "AMDGPU.h"
  9. #include "CommonArgs.h"
  10. #include "clang/Basic/TargetID.h"
  11. #include "clang/Config/config.h"
  12. #include "clang/Driver/Compilation.h"
  13. #include "clang/Driver/Distro.h"
  14. #include "clang/Driver/DriverDiagnostic.h"
  15. #include "clang/Driver/InputInfo.h"
  16. #include "clang/Driver/Options.h"
  17. #include "llvm/Option/ArgList.h"
  18. #include "llvm/Support/Error.h"
  19. #include "llvm/Support/Host.h"
  20. #include "llvm/Support/LineIterator.h"
  21. #include "llvm/Support/Path.h"
  22. #include "llvm/Support/Process.h"
  23. #include "llvm/Support/VirtualFileSystem.h"
  24. #include <optional>
  25. #include <system_error>
  26. using namespace clang::driver;
  27. using namespace clang::driver::tools;
  28. using namespace clang::driver::toolchains;
  29. using namespace clang;
  30. using namespace llvm::opt;
  31. // Look for sub-directory starts with PackageName under ROCm candidate path.
  32. // If there is one and only one matching sub-directory found, append the
  33. // sub-directory to Path. If there is no matching sub-directory or there are
  34. // more than one matching sub-directories, diagnose them. Returns the full
  35. // path of the package if there is only one matching sub-directory, otherwise
  36. // returns an empty string.
  37. llvm::SmallString<0>
  38. RocmInstallationDetector::findSPACKPackage(const Candidate &Cand,
  39. StringRef PackageName) {
  40. if (!Cand.isSPACK())
  41. return {};
  42. std::error_code EC;
  43. std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str();
  44. llvm::SmallVector<llvm::SmallString<0>> SubDirs;
  45. for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC),
  46. FileEnd;
  47. File != FileEnd && !EC; File.increment(EC)) {
  48. llvm::StringRef FileName = llvm::sys::path::filename(File->path());
  49. if (FileName.startswith(Prefix)) {
  50. SubDirs.push_back(FileName);
  51. if (SubDirs.size() > 1)
  52. break;
  53. }
  54. }
  55. if (SubDirs.size() == 1) {
  56. auto PackagePath = Cand.Path;
  57. llvm::sys::path::append(PackagePath, SubDirs[0]);
  58. return PackagePath;
  59. }
  60. if (SubDirs.size() == 0 && Verbose) {
  61. llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path
  62. << '\n';
  63. return {};
  64. }
  65. if (SubDirs.size() > 1 && Verbose) {
  66. llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path
  67. << " due to multiple installations for the same version\n";
  68. }
  69. return {};
  70. }
  71. void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
  72. assert(!Path.empty());
  73. const StringRef Suffix(".bc");
  74. const StringRef Suffix2(".amdgcn.bc");
  75. std::error_code EC;
  76. for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
  77. !EC && LI != LE; LI = LI.increment(EC)) {
  78. StringRef FilePath = LI->path();
  79. StringRef FileName = llvm::sys::path::filename(FilePath);
  80. if (!FileName.endswith(Suffix))
  81. continue;
  82. StringRef BaseName;
  83. if (FileName.endswith(Suffix2))
  84. BaseName = FileName.drop_back(Suffix2.size());
  85. else if (FileName.endswith(Suffix))
  86. BaseName = FileName.drop_back(Suffix.size());
  87. const StringRef ABIVersionPrefix = "oclc_abi_version_";
  88. if (BaseName == "ocml") {
  89. OCML = FilePath;
  90. } else if (BaseName == "ockl") {
  91. OCKL = FilePath;
  92. } else if (BaseName == "opencl") {
  93. OpenCL = FilePath;
  94. } else if (BaseName == "hip") {
  95. HIP = FilePath;
  96. } else if (BaseName == "asanrtl") {
  97. AsanRTL = FilePath;
  98. } else if (BaseName == "oclc_finite_only_off") {
  99. FiniteOnly.Off = FilePath;
  100. } else if (BaseName == "oclc_finite_only_on") {
  101. FiniteOnly.On = FilePath;
  102. } else if (BaseName == "oclc_daz_opt_on") {
  103. DenormalsAreZero.On = FilePath;
  104. } else if (BaseName == "oclc_daz_opt_off") {
  105. DenormalsAreZero.Off = FilePath;
  106. } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
  107. CorrectlyRoundedSqrt.On = FilePath;
  108. } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
  109. CorrectlyRoundedSqrt.Off = FilePath;
  110. } else if (BaseName == "oclc_unsafe_math_on") {
  111. UnsafeMath.On = FilePath;
  112. } else if (BaseName == "oclc_unsafe_math_off") {
  113. UnsafeMath.Off = FilePath;
  114. } else if (BaseName == "oclc_wavefrontsize64_on") {
  115. WavefrontSize64.On = FilePath;
  116. } else if (BaseName == "oclc_wavefrontsize64_off") {
  117. WavefrontSize64.Off = FilePath;
  118. } else if (BaseName.startswith(ABIVersionPrefix)) {
  119. unsigned ABIVersionNumber;
  120. if (BaseName.drop_front(ABIVersionPrefix.size())
  121. .getAsInteger(/*Redex=*/0, ABIVersionNumber))
  122. continue;
  123. ABIVersionMap[ABIVersionNumber] = FilePath.str();
  124. } else {
  125. // Process all bitcode filenames that look like
  126. // ocl_isa_version_XXX.amdgcn.bc
  127. const StringRef DeviceLibPrefix = "oclc_isa_version_";
  128. if (!BaseName.startswith(DeviceLibPrefix))
  129. continue;
  130. StringRef IsaVersionNumber =
  131. BaseName.drop_front(DeviceLibPrefix.size());
  132. llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
  133. SmallString<8> Tmp;
  134. LibDeviceMap.insert(
  135. std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
  136. }
  137. }
  138. }
  139. // Parse and extract version numbers from `.hipVersion`. Return `true` if
  140. // the parsing fails.
  141. bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) {
  142. SmallVector<StringRef, 4> VersionParts;
  143. V.split(VersionParts, '\n');
  144. unsigned Major = ~0U;
  145. unsigned Minor = ~0U;
  146. for (auto Part : VersionParts) {
  147. auto Splits = Part.rtrim().split('=');
  148. if (Splits.first == "HIP_VERSION_MAJOR") {
  149. if (Splits.second.getAsInteger(0, Major))
  150. return true;
  151. } else if (Splits.first == "HIP_VERSION_MINOR") {
  152. if (Splits.second.getAsInteger(0, Minor))
  153. return true;
  154. } else if (Splits.first == "HIP_VERSION_PATCH")
  155. VersionPatch = Splits.second.str();
  156. }
  157. if (Major == ~0U || Minor == ~0U)
  158. return true;
  159. VersionMajorMinor = llvm::VersionTuple(Major, Minor);
  160. DetectedVersion =
  161. (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
  162. return false;
  163. }
  164. /// \returns a list of candidate directories for ROCm installation, which is
  165. /// cached and populated only once.
  166. const SmallVectorImpl<RocmInstallationDetector::Candidate> &
  167. RocmInstallationDetector::getInstallationPathCandidates() {
  168. // Return the cached candidate list if it has already been populated.
  169. if (!ROCmSearchDirs.empty())
  170. return ROCmSearchDirs;
  171. auto DoPrintROCmSearchDirs = [&]() {
  172. if (PrintROCmSearchDirs)
  173. for (auto Cand : ROCmSearchDirs) {
  174. llvm::errs() << "ROCm installation search path";
  175. if (Cand.isSPACK())
  176. llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")";
  177. llvm::errs() << ": " << Cand.Path << '\n';
  178. }
  179. };
  180. // For candidate specified by --rocm-path we do not do strict check, i.e.,
  181. // checking existence of HIP version file and device library files.
  182. if (!RocmPathArg.empty()) {
  183. ROCmSearchDirs.emplace_back(RocmPathArg.str());
  184. DoPrintROCmSearchDirs();
  185. return ROCmSearchDirs;
  186. } else if (std::optional<std::string> RocmPathEnv =
  187. llvm::sys::Process::GetEnv("ROCM_PATH")) {
  188. if (!RocmPathEnv->empty()) {
  189. ROCmSearchDirs.emplace_back(std::move(*RocmPathEnv));
  190. DoPrintROCmSearchDirs();
  191. return ROCmSearchDirs;
  192. }
  193. }
  194. // Try to find relative to the compiler binary.
  195. const char *InstallDir = D.getInstalledDir();
  196. // Check both a normal Unix prefix position of the clang binary, as well as
  197. // the Windows-esque layout the ROCm packages use with the host architecture
  198. // subdirectory of bin.
  199. auto DeduceROCmPath = [](StringRef ClangPath) {
  200. // Strip off directory (usually bin)
  201. StringRef ParentDir = llvm::sys::path::parent_path(ClangPath);
  202. StringRef ParentName = llvm::sys::path::filename(ParentDir);
  203. // Some builds use bin/{host arch}, so go up again.
  204. if (ParentName == "bin") {
  205. ParentDir = llvm::sys::path::parent_path(ParentDir);
  206. ParentName = llvm::sys::path::filename(ParentDir);
  207. }
  208. // Detect ROCm packages built with SPACK.
  209. // clang is installed at
  210. // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
  211. // We only consider the parent directory of llvm-amdgpu package as ROCm
  212. // installation candidate for SPACK.
  213. if (ParentName.startswith("llvm-amdgpu-")) {
  214. auto SPACKPostfix =
  215. ParentName.drop_front(strlen("llvm-amdgpu-")).split('-');
  216. auto SPACKReleaseStr = SPACKPostfix.first;
  217. if (!SPACKReleaseStr.empty()) {
  218. ParentDir = llvm::sys::path::parent_path(ParentDir);
  219. return Candidate(ParentDir.str(), /*StrictChecking=*/true,
  220. SPACKReleaseStr);
  221. }
  222. }
  223. // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
  224. // Some versions of the aomp package install to /opt/rocm/aomp/bin
  225. if (ParentName == "llvm" || ParentName.startswith("aomp"))
  226. ParentDir = llvm::sys::path::parent_path(ParentDir);
  227. return Candidate(ParentDir.str(), /*StrictChecking=*/true);
  228. };
  229. // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
  230. // link of clang itself.
  231. ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir));
  232. // Deduce ROCm path by the real path of the invoked clang, resolving symbolic
  233. // link of clang itself.
  234. llvm::SmallString<256> RealClangPath;
  235. llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath);
  236. auto ParentPath = llvm::sys::path::parent_path(RealClangPath);
  237. if (ParentPath != InstallDir)
  238. ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath));
  239. // Device library may be installed in clang or resource directory.
  240. auto ClangRoot = llvm::sys::path::parent_path(InstallDir);
  241. auto RealClangRoot = llvm::sys::path::parent_path(ParentPath);
  242. ROCmSearchDirs.emplace_back(ClangRoot.str(), /*StrictChecking=*/true);
  243. if (RealClangRoot != ClangRoot)
  244. ROCmSearchDirs.emplace_back(RealClangRoot.str(), /*StrictChecking=*/true);
  245. ROCmSearchDirs.emplace_back(D.ResourceDir,
  246. /*StrictChecking=*/true);
  247. ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm",
  248. /*StrictChecking=*/true);
  249. // Find the latest /opt/rocm-{release} directory.
  250. std::error_code EC;
  251. std::string LatestROCm;
  252. llvm::VersionTuple LatestVer;
  253. // Get ROCm version from ROCm directory name.
  254. auto GetROCmVersion = [](StringRef DirName) {
  255. llvm::VersionTuple V;
  256. std::string VerStr = DirName.drop_front(strlen("rocm-")).str();
  257. // The ROCm directory name follows the format of
  258. // rocm-{major}.{minor}.{subMinor}[-{build}]
  259. std::replace(VerStr.begin(), VerStr.end(), '-', '.');
  260. V.tryParse(VerStr);
  261. return V;
  262. };
  263. for (llvm::vfs::directory_iterator
  264. File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC),
  265. FileEnd;
  266. File != FileEnd && !EC; File.increment(EC)) {
  267. llvm::StringRef FileName = llvm::sys::path::filename(File->path());
  268. if (!FileName.startswith("rocm-"))
  269. continue;
  270. if (LatestROCm.empty()) {
  271. LatestROCm = FileName.str();
  272. LatestVer = GetROCmVersion(LatestROCm);
  273. continue;
  274. }
  275. auto Ver = GetROCmVersion(FileName);
  276. if (LatestVer < Ver) {
  277. LatestROCm = FileName.str();
  278. LatestVer = Ver;
  279. }
  280. }
  281. if (!LatestROCm.empty())
  282. ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm,
  283. /*StrictChecking=*/true);
  284. Distro Dist(D.getVFS(), llvm::Triple(llvm::sys::getProcessTriple()));
  285. if (Dist.IsDebian() || Dist.IsRedhat()) {
  286. ROCmSearchDirs.emplace_back(D.SysRoot + "/usr/local",
  287. /*StrictChecking=*/true);
  288. ROCmSearchDirs.emplace_back(D.SysRoot + "/usr",
  289. /*StrictChecking=*/true);
  290. }
  291. DoPrintROCmSearchDirs();
  292. return ROCmSearchDirs;
  293. }
  294. RocmInstallationDetector::RocmInstallationDetector(
  295. const Driver &D, const llvm::Triple &HostTriple,
  296. const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
  297. : D(D) {
  298. Verbose = Args.hasArg(options::OPT_v);
  299. RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
  300. PrintROCmSearchDirs =
  301. Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs);
  302. RocmDeviceLibPathArg =
  303. Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
  304. HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
  305. if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
  306. HIPVersionArg = A->getValue();
  307. unsigned Major = ~0U;
  308. unsigned Minor = ~0U;
  309. SmallVector<StringRef, 3> Parts;
  310. HIPVersionArg.split(Parts, '.');
  311. if (Parts.size())
  312. Parts[0].getAsInteger(0, Major);
  313. if (Parts.size() > 1)
  314. Parts[1].getAsInteger(0, Minor);
  315. if (Parts.size() > 2)
  316. VersionPatch = Parts[2].str();
  317. if (VersionPatch.empty())
  318. VersionPatch = "0";
  319. if (Major != ~0U && Minor == ~0U)
  320. Minor = 0;
  321. if (Major == ~0U || Minor == ~0U)
  322. D.Diag(diag::err_drv_invalid_value)
  323. << A->getAsString(Args) << HIPVersionArg;
  324. VersionMajorMinor = llvm::VersionTuple(Major, Minor);
  325. DetectedVersion =
  326. (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
  327. } else {
  328. VersionPatch = DefaultVersionPatch;
  329. VersionMajorMinor =
  330. llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
  331. DetectedVersion = (Twine(DefaultVersionMajor) + "." +
  332. Twine(DefaultVersionMinor) + "." + VersionPatch)
  333. .str();
  334. }
  335. if (DetectHIPRuntime)
  336. detectHIPRuntime();
  337. if (DetectDeviceLib)
  338. detectDeviceLibrary();
  339. }
  340. void RocmInstallationDetector::detectDeviceLibrary() {
  341. assert(LibDevicePath.empty());
  342. if (!RocmDeviceLibPathArg.empty())
  343. LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
  344. else if (std::optional<std::string> LibPathEnv =
  345. llvm::sys::Process::GetEnv("HIP_DEVICE_LIB_PATH"))
  346. LibDevicePath = std::move(*LibPathEnv);
  347. auto &FS = D.getVFS();
  348. if (!LibDevicePath.empty()) {
  349. // Maintain compatability with HIP flag/envvar pointing directly at the
  350. // bitcode library directory. This points directly at the library path instead
  351. // of the rocm root installation.
  352. if (!FS.exists(LibDevicePath))
  353. return;
  354. scanLibDevicePath(LibDevicePath);
  355. HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
  356. return;
  357. }
  358. // Check device library exists at the given path.
  359. auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) {
  360. bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking);
  361. if (CheckLibDevice && !FS.exists(Path))
  362. return false;
  363. scanLibDevicePath(Path);
  364. if (!NoBuiltinLibs) {
  365. // Check that the required non-target libraries are all available.
  366. if (!allGenericLibsValid())
  367. return false;
  368. // Check that we have found at least one libdevice that we can link in
  369. // if -nobuiltinlib hasn't been specified.
  370. if (LibDeviceMap.empty())
  371. return false;
  372. }
  373. return true;
  374. };
  375. // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode
  376. LibDevicePath = D.ResourceDir;
  377. llvm::sys::path::append(LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME,
  378. "amdgcn", "bitcode");
  379. HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true);
  380. if (HasDeviceLibrary)
  381. return;
  382. // Find device libraries in a legacy ROCm directory structure
  383. // ${ROCM_ROOT}/amdgcn/bitcode/*
  384. auto &ROCmDirs = getInstallationPathCandidates();
  385. for (const auto &Candidate : ROCmDirs) {
  386. LibDevicePath = Candidate.Path;
  387. llvm::sys::path::append(LibDevicePath, "amdgcn", "bitcode");
  388. HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking);
  389. if (HasDeviceLibrary)
  390. return;
  391. }
  392. }
  393. void RocmInstallationDetector::detectHIPRuntime() {
  394. SmallVector<Candidate, 4> HIPSearchDirs;
  395. if (!HIPPathArg.empty())
  396. HIPSearchDirs.emplace_back(HIPPathArg.str(), /*StrictChecking=*/true);
  397. else
  398. HIPSearchDirs.append(getInstallationPathCandidates());
  399. auto &FS = D.getVFS();
  400. for (const auto &Candidate : HIPSearchDirs) {
  401. InstallPath = Candidate.Path;
  402. if (InstallPath.empty() || !FS.exists(InstallPath))
  403. continue;
  404. // HIP runtime built by SPACK is installed to
  405. // <rocm_root>/hip-<rocm_release_string>-<hash> directory.
  406. auto SPACKPath = findSPACKPackage(Candidate, "hip");
  407. InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath;
  408. BinPath = InstallPath;
  409. llvm::sys::path::append(BinPath, "bin");
  410. IncludePath = InstallPath;
  411. llvm::sys::path::append(IncludePath, "include");
  412. LibPath = InstallPath;
  413. llvm::sys::path::append(LibPath, "lib");
  414. SharePath = InstallPath;
  415. llvm::sys::path::append(SharePath, "share");
  416. // If HIP version file can be found and parsed, use HIP version from there.
  417. for (const auto &VersionFilePath :
  418. {std::string(SharePath) + "/hip/version",
  419. std::string(BinPath) + "/.hipVersion"}) {
  420. llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
  421. FS.getBufferForFile(VersionFilePath);
  422. if (!VersionFile)
  423. continue;
  424. if (HIPVersionArg.empty() && VersionFile)
  425. if (parseHIPVersionFile((*VersionFile)->getBuffer()))
  426. continue;
  427. HasHIPRuntime = true;
  428. return;
  429. }
  430. // Otherwise, if -rocm-path is specified (no strict checking), use the
  431. // default HIP version or specified by --hip-version.
  432. if (!Candidate.StrictChecking) {
  433. HasHIPRuntime = true;
  434. return;
  435. }
  436. }
  437. HasHIPRuntime = false;
  438. }
  439. void RocmInstallationDetector::print(raw_ostream &OS) const {
  440. if (hasHIPRuntime())
  441. OS << "Found HIP installation: " << InstallPath << ", version "
  442. << DetectedVersion << '\n';
  443. }
  444. void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
  445. ArgStringList &CC1Args) const {
  446. bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) &&
  447. !DriverArgs.hasArg(options::OPT_nohipwrapperinc);
  448. if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
  449. // HIP header includes standard library wrapper headers under clang
  450. // cuda_wrappers directory. Since these wrapper headers include_next
  451. // standard C++ headers, whereas libc++ headers include_next other clang
  452. // headers. The include paths have to follow this order:
  453. // - wrapper include path
  454. // - standard C++ include path
  455. // - other clang include path
  456. // Since standard C++ and other clang include paths are added in other
  457. // places after this function, here we only need to make sure wrapper
  458. // include path is added.
  459. //
  460. // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
  461. // a workaround.
  462. SmallString<128> P(D.ResourceDir);
  463. if (UsesRuntimeWrapper)
  464. llvm::sys::path::append(P, "include", "cuda_wrappers");
  465. CC1Args.push_back("-internal-isystem");
  466. CC1Args.push_back(DriverArgs.MakeArgString(P));
  467. }
  468. if (DriverArgs.hasArg(options::OPT_nogpuinc))
  469. return;
  470. if (!hasHIPRuntime()) {
  471. D.Diag(diag::err_drv_no_hip_runtime);
  472. return;
  473. }
  474. CC1Args.push_back("-idirafter");
  475. CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
  476. if (UsesRuntimeWrapper)
  477. CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
  478. }
  479. void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
  480. const InputInfo &Output,
  481. const InputInfoList &Inputs,
  482. const ArgList &Args,
  483. const char *LinkingOutput) const {
  484. std::string Linker = getToolChain().GetProgramPath(getShortName());
  485. ArgStringList CmdArgs;
  486. addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
  487. AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
  488. CmdArgs.push_back("-shared");
  489. CmdArgs.push_back("-o");
  490. CmdArgs.push_back(Output.getFilename());
  491. C.addCommand(std::make_unique<Command>(
  492. JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
  493. CmdArgs, Inputs, Output));
  494. }
  495. void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
  496. const llvm::Triple &Triple,
  497. const llvm::opt::ArgList &Args,
  498. std::vector<StringRef> &Features) {
  499. // Add target ID features to -target-feature options. No diagnostics should
  500. // be emitted here since invalid target ID is diagnosed at other places.
  501. StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
  502. if (!TargetID.empty()) {
  503. llvm::StringMap<bool> FeatureMap;
  504. auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
  505. if (OptionalGpuArch) {
  506. StringRef GpuArch = *OptionalGpuArch;
  507. // Iterate through all possible target ID features for the given GPU.
  508. // If it is mapped to true, add +feature.
  509. // If it is mapped to false, add -feature.
  510. // If it is not in the map (default), do not add it
  511. for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
  512. auto Pos = FeatureMap.find(Feature);
  513. if (Pos == FeatureMap.end())
  514. continue;
  515. Features.push_back(Args.MakeArgStringRef(
  516. (Twine(Pos->second ? "+" : "-") + Feature).str()));
  517. }
  518. }
  519. }
  520. if (Args.hasFlag(options::OPT_mwavefrontsize64,
  521. options::OPT_mno_wavefrontsize64, false))
  522. Features.push_back("+wavefrontsize64");
  523. handleTargetFeaturesGroup(
  524. Args, Features, options::OPT_m_amdgpu_Features_Group);
  525. }
  526. /// AMDGPU Toolchain
  527. AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
  528. const ArgList &Args)
  529. : Generic_ELF(D, Triple, Args),
  530. OptionsDefault(
  531. {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
  532. // Check code object version options. Emit warnings for legacy options
  533. // and errors for the last invalid code object version options.
  534. // It is done here to avoid repeated warning or error messages for
  535. // each tool invocation.
  536. checkAMDGPUCodeObjectVersion(D, Args);
  537. }
  538. Tool *AMDGPUToolChain::buildLinker() const {
  539. return new tools::amdgpu::Linker(*this);
  540. }
  541. DerivedArgList *
  542. AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
  543. Action::OffloadKind DeviceOffloadKind) const {
  544. DerivedArgList *DAL =
  545. Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
  546. const OptTable &Opts = getDriver().getOpts();
  547. if (!DAL)
  548. DAL = new DerivedArgList(Args.getBaseArgs());
  549. for (Arg *A : Args) {
  550. if (!shouldSkipArgument(A))
  551. DAL->append(A);
  552. }
  553. checkTargetID(*DAL);
  554. if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
  555. return DAL;
  556. // Phase 1 (.cl -> .bc)
  557. if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
  558. DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
  559. ? options::OPT_m64
  560. : options::OPT_m32));
  561. // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
  562. // as they defined that way in Options.td
  563. if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
  564. options::OPT_Ofast))
  565. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
  566. getOptionDefault(options::OPT_O));
  567. }
  568. return DAL;
  569. }
  570. bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
  571. llvm::AMDGPU::GPUKind Kind) {
  572. // Assume nothing without a specific target.
  573. if (Kind == llvm::AMDGPU::GK_NONE)
  574. return false;
  575. const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
  576. // Default to enabling f32 denormals by default on subtargets where fma is
  577. // fast with denormals
  578. const bool BothDenormAndFMAFast =
  579. (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
  580. (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
  581. return !BothDenormAndFMAFast;
  582. }
  583. llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
  584. const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
  585. const llvm::fltSemantics *FPType) const {
  586. // Denormals should always be enabled for f16 and f64.
  587. if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
  588. return llvm::DenormalMode::getIEEE();
  589. if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
  590. JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
  591. auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
  592. auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
  593. if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
  594. DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
  595. options::OPT_fno_gpu_flush_denormals_to_zero,
  596. getDefaultDenormsAreZeroForTarget(Kind)))
  597. return llvm::DenormalMode::getPreserveSign();
  598. return llvm::DenormalMode::getIEEE();
  599. }
  600. const StringRef GpuArch = getGPUArch(DriverArgs);
  601. auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
  602. // TODO: There are way too many flags that change this. Do we need to check
  603. // them all?
  604. bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
  605. getDefaultDenormsAreZeroForTarget(Kind);
  606. // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
  607. // also implicit treated as zero (DAZ).
  608. return DAZ ? llvm::DenormalMode::getPreserveSign() :
  609. llvm::DenormalMode::getIEEE();
  610. }
  611. bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
  612. llvm::AMDGPU::GPUKind Kind) {
  613. const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
  614. bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
  615. return !HasWave32 || DriverArgs.hasFlag(
  616. options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
  617. }
  618. /// ROCM Toolchain
  619. ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
  620. const ArgList &Args)
  621. : AMDGPUToolChain(D, Triple, Args) {
  622. RocmInstallation.detectDeviceLibrary();
  623. }
  624. void AMDGPUToolChain::addClangTargetOptions(
  625. const llvm::opt::ArgList &DriverArgs,
  626. llvm::opt::ArgStringList &CC1Args,
  627. Action::OffloadKind DeviceOffloadingKind) const {
  628. // Default to "hidden" visibility, as object level linking will not be
  629. // supported for the foreseeable future.
  630. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
  631. options::OPT_fvisibility_ms_compat)) {
  632. CC1Args.push_back("-fvisibility=hidden");
  633. CC1Args.push_back("-fapply-global-visibility-to-externs");
  634. }
  635. }
  636. StringRef
  637. AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
  638. return getProcessorFromTargetID(
  639. getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
  640. }
  641. AMDGPUToolChain::ParsedTargetIDType
  642. AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const {
  643. StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
  644. if (TargetID.empty())
  645. return {std::nullopt, std::nullopt, std::nullopt};
  646. llvm::StringMap<bool> FeatureMap;
  647. auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
  648. if (!OptionalGpuArch)
  649. return {TargetID.str(), std::nullopt, std::nullopt};
  650. return {TargetID.str(), OptionalGpuArch->str(), FeatureMap};
  651. }
  652. void AMDGPUToolChain::checkTargetID(
  653. const llvm::opt::ArgList &DriverArgs) const {
  654. auto PTID = getParsedTargetID(DriverArgs);
  655. if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
  656. getDriver().Diag(clang::diag::err_drv_bad_target_id)
  657. << *PTID.OptionalTargetID;
  658. }
  659. }
  660. Expected<SmallVector<std::string>>
  661. AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
  662. // Detect AMD GPUs availible on the system.
  663. std::string Program;
  664. if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
  665. Program = A->getValue();
  666. else
  667. Program = GetProgramPath("amdgpu-arch");
  668. auto StdoutOrErr = executeToolChainProgram(Program);
  669. if (!StdoutOrErr)
  670. return StdoutOrErr.takeError();
  671. SmallVector<std::string, 1> GPUArchs;
  672. for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
  673. if (!Arch.empty())
  674. GPUArchs.push_back(Arch.str());
  675. if (GPUArchs.empty())
  676. return llvm::createStringError(std::error_code(),
  677. "No AMD GPU detected in the system");
  678. return std::move(GPUArchs);
  679. }
  680. void ROCMToolChain::addClangTargetOptions(
  681. const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
  682. Action::OffloadKind DeviceOffloadingKind) const {
  683. AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
  684. DeviceOffloadingKind);
  685. // For the OpenCL case where there is no offload target, accept -nostdlib to
  686. // disable bitcode linking.
  687. if (DeviceOffloadingKind == Action::OFK_None &&
  688. DriverArgs.hasArg(options::OPT_nostdlib))
  689. return;
  690. if (DriverArgs.hasArg(options::OPT_nogpulib))
  691. return;
  692. // Get the device name and canonicalize it
  693. const StringRef GpuArch = getGPUArch(DriverArgs);
  694. auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
  695. const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
  696. StringRef LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
  697. auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
  698. getAMDGPUCodeObjectVersion(getDriver(), DriverArgs));
  699. if (!RocmInstallation.checkCommonBitcodeLibs(CanonArch, LibDeviceFile,
  700. ABIVer))
  701. return;
  702. bool Wave64 = isWave64(DriverArgs, Kind);
  703. // TODO: There are way too many flags that change this. Do we need to check
  704. // them all?
  705. bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
  706. getDefaultDenormsAreZeroForTarget(Kind);
  707. bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
  708. bool UnsafeMathOpt =
  709. DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
  710. bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
  711. bool CorrectSqrt =
  712. DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
  713. // Add the OpenCL specific bitcode library.
  714. llvm::SmallVector<std::string, 12> BCLibs;
  715. BCLibs.push_back(RocmInstallation.getOpenCLPath().str());
  716. // Add the generic set of libraries.
  717. BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
  718. DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
  719. FastRelaxedMath, CorrectSqrt, ABIVer, false));
  720. for (StringRef BCFile : BCLibs) {
  721. CC1Args.push_back("-mlink-builtin-bitcode");
  722. CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
  723. }
  724. }
  725. bool RocmInstallationDetector::checkCommonBitcodeLibs(
  726. StringRef GPUArch, StringRef LibDeviceFile,
  727. DeviceLibABIVersion ABIVer) const {
  728. if (!hasDeviceLibrary()) {
  729. D.Diag(diag::err_drv_no_rocm_device_lib) << 0;
  730. return false;
  731. }
  732. if (LibDeviceFile.empty()) {
  733. D.Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
  734. return false;
  735. }
  736. if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) {
  737. D.Diag(diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString();
  738. return false;
  739. }
  740. return true;
  741. }
  742. llvm::SmallVector<std::string, 12>
  743. RocmInstallationDetector::getCommonBitcodeLibs(
  744. const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
  745. bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
  746. bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool isOpenMP = false) const {
  747. llvm::SmallVector<std::string, 12> BCLibs;
  748. auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); };
  749. AddBCLib(getOCMLPath());
  750. AddBCLib(getOCKLPath());
  751. AddBCLib(getDenormalsAreZeroPath(DAZ));
  752. AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
  753. AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
  754. AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
  755. AddBCLib(getWavefrontSize64Path(Wave64));
  756. AddBCLib(LibDeviceFile);
  757. auto ABIVerPath = getABIVersionPath(ABIVer);
  758. if (!ABIVerPath.empty())
  759. AddBCLib(ABIVerPath);
  760. return BCLibs;
  761. }
  762. bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
  763. Option O = A->getOption();
  764. if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie))
  765. return true;
  766. return false;
  767. }
  768. llvm::SmallVector<std::string, 12>
  769. ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
  770. const std::string &GPUArch,
  771. bool isOpenMP) const {
  772. auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
  773. const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
  774. StringRef LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
  775. auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
  776. getAMDGPUCodeObjectVersion(getDriver(), DriverArgs));
  777. if (!RocmInstallation.checkCommonBitcodeLibs(CanonArch, LibDeviceFile,
  778. ABIVer))
  779. return {};
  780. // If --hip-device-lib is not set, add the default bitcode libraries.
  781. // TODO: There are way too many flags that change this. Do we need to check
  782. // them all?
  783. bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
  784. options::OPT_fno_gpu_flush_denormals_to_zero,
  785. getDefaultDenormsAreZeroForTarget(Kind));
  786. bool FiniteOnly = DriverArgs.hasFlag(
  787. options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
  788. bool UnsafeMathOpt =
  789. DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
  790. options::OPT_fno_unsafe_math_optimizations, false);
  791. bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
  792. options::OPT_fno_fast_math, false);
  793. bool CorrectSqrt = DriverArgs.hasFlag(
  794. options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
  795. options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
  796. bool Wave64 = isWave64(DriverArgs, Kind);
  797. return RocmInstallation.getCommonBitcodeLibs(
  798. DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
  799. FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP);
  800. }