AMDGPU.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "AMDGPU.h"
  9. #include "CommonArgs.h"
  10. #include "clang/Basic/TargetID.h"
  11. #include "clang/Driver/Compilation.h"
  12. #include "clang/Driver/DriverDiagnostic.h"
  13. #include "clang/Driver/InputInfo.h"
  14. #include "clang/Driver/Options.h"
  15. #include "llvm/Option/ArgList.h"
  16. #include "llvm/Support/Error.h"
  17. #include "llvm/Support/FileUtilities.h"
  18. #include "llvm/Support/LineIterator.h"
  19. #include "llvm/Support/Path.h"
  20. #include "llvm/Support/VirtualFileSystem.h"
  21. #include <system_error>
  22. #define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
  23. using namespace clang::driver;
  24. using namespace clang::driver::tools;
  25. using namespace clang::driver::toolchains;
  26. using namespace clang;
  27. using namespace llvm::opt;
  28. // Look for sub-directory starts with PackageName under ROCm candidate path.
  29. // If there is one and only one matching sub-directory found, append the
  30. // sub-directory to Path. If there is no matching sub-directory or there are
  31. // more than one matching sub-directories, diagnose them. Returns the full
  32. // path of the package if there is only one matching sub-directory, otherwise
  33. // returns an empty string.
  34. llvm::SmallString<0>
  35. RocmInstallationDetector::findSPACKPackage(const Candidate &Cand,
  36. StringRef PackageName) {
  37. if (!Cand.isSPACK())
  38. return {};
  39. std::error_code EC;
  40. std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str();
  41. llvm::SmallVector<llvm::SmallString<0>> SubDirs;
  42. for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC),
  43. FileEnd;
  44. File != FileEnd && !EC; File.increment(EC)) {
  45. llvm::StringRef FileName = llvm::sys::path::filename(File->path());
  46. if (FileName.startswith(Prefix)) {
  47. SubDirs.push_back(FileName);
  48. if (SubDirs.size() > 1)
  49. break;
  50. }
  51. }
  52. if (SubDirs.size() == 1) {
  53. auto PackagePath = Cand.Path;
  54. llvm::sys::path::append(PackagePath, SubDirs[0]);
  55. return PackagePath;
  56. }
  57. if (SubDirs.size() == 0 && Verbose) {
  58. llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path
  59. << '\n';
  60. return {};
  61. }
  62. if (SubDirs.size() > 1 && Verbose) {
  63. llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path
  64. << " due to multiple installations for the same version\n";
  65. }
  66. return {};
  67. }
  68. void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
  69. assert(!Path.empty());
  70. const StringRef Suffix(".bc");
  71. const StringRef Suffix2(".amdgcn.bc");
  72. std::error_code EC;
  73. for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
  74. !EC && LI != LE; LI = LI.increment(EC)) {
  75. StringRef FilePath = LI->path();
  76. StringRef FileName = llvm::sys::path::filename(FilePath);
  77. if (!FileName.endswith(Suffix))
  78. continue;
  79. StringRef BaseName;
  80. if (FileName.endswith(Suffix2))
  81. BaseName = FileName.drop_back(Suffix2.size());
  82. else if (FileName.endswith(Suffix))
  83. BaseName = FileName.drop_back(Suffix.size());
  84. if (BaseName == "ocml") {
  85. OCML = FilePath;
  86. } else if (BaseName == "ockl") {
  87. OCKL = FilePath;
  88. } else if (BaseName == "opencl") {
  89. OpenCL = FilePath;
  90. } else if (BaseName == "hip") {
  91. HIP = FilePath;
  92. } else if (BaseName == "asanrtl") {
  93. AsanRTL = FilePath;
  94. } else if (BaseName == "oclc_finite_only_off") {
  95. FiniteOnly.Off = FilePath;
  96. } else if (BaseName == "oclc_finite_only_on") {
  97. FiniteOnly.On = FilePath;
  98. } else if (BaseName == "oclc_daz_opt_on") {
  99. DenormalsAreZero.On = FilePath;
  100. } else if (BaseName == "oclc_daz_opt_off") {
  101. DenormalsAreZero.Off = FilePath;
  102. } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
  103. CorrectlyRoundedSqrt.On = FilePath;
  104. } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
  105. CorrectlyRoundedSqrt.Off = FilePath;
  106. } else if (BaseName == "oclc_unsafe_math_on") {
  107. UnsafeMath.On = FilePath;
  108. } else if (BaseName == "oclc_unsafe_math_off") {
  109. UnsafeMath.Off = FilePath;
  110. } else if (BaseName == "oclc_wavefrontsize64_on") {
  111. WavefrontSize64.On = FilePath;
  112. } else if (BaseName == "oclc_wavefrontsize64_off") {
  113. WavefrontSize64.Off = FilePath;
  114. } else {
  115. // Process all bitcode filenames that look like
  116. // ocl_isa_version_XXX.amdgcn.bc
  117. const StringRef DeviceLibPrefix = "oclc_isa_version_";
  118. if (!BaseName.startswith(DeviceLibPrefix))
  119. continue;
  120. StringRef IsaVersionNumber =
  121. BaseName.drop_front(DeviceLibPrefix.size());
  122. llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
  123. SmallString<8> Tmp;
  124. LibDeviceMap.insert(
  125. std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
  126. }
  127. }
  128. }
  129. // Parse and extract version numbers from `.hipVersion`. Return `true` if
  130. // the parsing fails.
  131. bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) {
  132. SmallVector<StringRef, 4> VersionParts;
  133. V.split(VersionParts, '\n');
  134. unsigned Major = ~0U;
  135. unsigned Minor = ~0U;
  136. for (auto Part : VersionParts) {
  137. auto Splits = Part.rtrim().split('=');
  138. if (Splits.first == "HIP_VERSION_MAJOR") {
  139. if (Splits.second.getAsInteger(0, Major))
  140. return true;
  141. } else if (Splits.first == "HIP_VERSION_MINOR") {
  142. if (Splits.second.getAsInteger(0, Minor))
  143. return true;
  144. } else if (Splits.first == "HIP_VERSION_PATCH")
  145. VersionPatch = Splits.second.str();
  146. }
  147. if (Major == ~0U || Minor == ~0U)
  148. return true;
  149. VersionMajorMinor = llvm::VersionTuple(Major, Minor);
  150. DetectedVersion =
  151. (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
  152. return false;
  153. }
  154. /// \returns a list of candidate directories for ROCm installation, which is
  155. /// cached and populated only once.
  156. const SmallVectorImpl<RocmInstallationDetector::Candidate> &
  157. RocmInstallationDetector::getInstallationPathCandidates() {
  158. // Return the cached candidate list if it has already been populated.
  159. if (!ROCmSearchDirs.empty())
  160. return ROCmSearchDirs;
  161. auto DoPrintROCmSearchDirs = [&]() {
  162. if (PrintROCmSearchDirs)
  163. for (auto Cand : ROCmSearchDirs) {
  164. llvm::errs() << "ROCm installation search path";
  165. if (Cand.isSPACK())
  166. llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")";
  167. llvm::errs() << ": " << Cand.Path << '\n';
  168. }
  169. };
  170. // For candidate specified by --rocm-path we do not do strict check, i.e.,
  171. // checking existence of HIP version file and device library files.
  172. if (!RocmPathArg.empty()) {
  173. ROCmSearchDirs.emplace_back(RocmPathArg.str());
  174. DoPrintROCmSearchDirs();
  175. return ROCmSearchDirs;
  176. } else if (const char *RocmPathEnv = ::getenv("ROCM_PATH")) {
  177. if (!StringRef(RocmPathEnv).empty()) {
  178. ROCmSearchDirs.emplace_back(RocmPathEnv);
  179. DoPrintROCmSearchDirs();
  180. return ROCmSearchDirs;
  181. }
  182. }
  183. // Try to find relative to the compiler binary.
  184. const char *InstallDir = D.getInstalledDir();
  185. // Check both a normal Unix prefix position of the clang binary, as well as
  186. // the Windows-esque layout the ROCm packages use with the host architecture
  187. // subdirectory of bin.
  188. auto DeduceROCmPath = [](StringRef ClangPath) {
  189. // Strip off directory (usually bin)
  190. StringRef ParentDir = llvm::sys::path::parent_path(ClangPath);
  191. StringRef ParentName = llvm::sys::path::filename(ParentDir);
  192. // Some builds use bin/{host arch}, so go up again.
  193. if (ParentName == "bin") {
  194. ParentDir = llvm::sys::path::parent_path(ParentDir);
  195. ParentName = llvm::sys::path::filename(ParentDir);
  196. }
  197. // Detect ROCm packages built with SPACK.
  198. // clang is installed at
  199. // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
  200. // We only consider the parent directory of llvm-amdgpu package as ROCm
  201. // installation candidate for SPACK.
  202. if (ParentName.startswith("llvm-amdgpu-")) {
  203. auto SPACKPostfix =
  204. ParentName.drop_front(strlen("llvm-amdgpu-")).split('-');
  205. auto SPACKReleaseStr = SPACKPostfix.first;
  206. if (!SPACKReleaseStr.empty()) {
  207. ParentDir = llvm::sys::path::parent_path(ParentDir);
  208. return Candidate(ParentDir.str(), /*StrictChecking=*/true,
  209. SPACKReleaseStr);
  210. }
  211. }
  212. // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
  213. // Some versions of the aomp package install to /opt/rocm/aomp/bin
  214. if (ParentName == "llvm" || ParentName.startswith("aomp"))
  215. ParentDir = llvm::sys::path::parent_path(ParentDir);
  216. return Candidate(ParentDir.str(), /*StrictChecking=*/true);
  217. };
  218. // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
  219. // link of clang itself.
  220. ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir));
  221. // Deduce ROCm path by the real path of the invoked clang, resolving symbolic
  222. // link of clang itself.
  223. llvm::SmallString<256> RealClangPath;
  224. llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath);
  225. auto ParentPath = llvm::sys::path::parent_path(RealClangPath);
  226. if (ParentPath != InstallDir)
  227. ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath));
  228. // Device library may be installed in clang or resource directory.
  229. auto ClangRoot = llvm::sys::path::parent_path(InstallDir);
  230. auto RealClangRoot = llvm::sys::path::parent_path(ParentPath);
  231. ROCmSearchDirs.emplace_back(ClangRoot.str(), /*StrictChecking=*/true);
  232. if (RealClangRoot != ClangRoot)
  233. ROCmSearchDirs.emplace_back(RealClangRoot.str(), /*StrictChecking=*/true);
  234. ROCmSearchDirs.emplace_back(D.ResourceDir,
  235. /*StrictChecking=*/true);
  236. ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm",
  237. /*StrictChecking=*/true);
  238. // Find the latest /opt/rocm-{release} directory.
  239. std::error_code EC;
  240. std::string LatestROCm;
  241. llvm::VersionTuple LatestVer;
  242. // Get ROCm version from ROCm directory name.
  243. auto GetROCmVersion = [](StringRef DirName) {
  244. llvm::VersionTuple V;
  245. std::string VerStr = DirName.drop_front(strlen("rocm-")).str();
  246. // The ROCm directory name follows the format of
  247. // rocm-{major}.{minor}.{subMinor}[-{build}]
  248. std::replace(VerStr.begin(), VerStr.end(), '-', '.');
  249. V.tryParse(VerStr);
  250. return V;
  251. };
  252. for (llvm::vfs::directory_iterator
  253. File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC),
  254. FileEnd;
  255. File != FileEnd && !EC; File.increment(EC)) {
  256. llvm::StringRef FileName = llvm::sys::path::filename(File->path());
  257. if (!FileName.startswith("rocm-"))
  258. continue;
  259. if (LatestROCm.empty()) {
  260. LatestROCm = FileName.str();
  261. LatestVer = GetROCmVersion(LatestROCm);
  262. continue;
  263. }
  264. auto Ver = GetROCmVersion(FileName);
  265. if (LatestVer < Ver) {
  266. LatestROCm = FileName.str();
  267. LatestVer = Ver;
  268. }
  269. }
  270. if (!LatestROCm.empty())
  271. ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm,
  272. /*StrictChecking=*/true);
  273. DoPrintROCmSearchDirs();
  274. return ROCmSearchDirs;
  275. }
  276. RocmInstallationDetector::RocmInstallationDetector(
  277. const Driver &D, const llvm::Triple &HostTriple,
  278. const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
  279. : D(D) {
  280. Verbose = Args.hasArg(options::OPT_v);
  281. RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
  282. PrintROCmSearchDirs =
  283. Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs);
  284. RocmDeviceLibPathArg =
  285. Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
  286. HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
  287. if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
  288. HIPVersionArg = A->getValue();
  289. unsigned Major = ~0U;
  290. unsigned Minor = ~0U;
  291. SmallVector<StringRef, 3> Parts;
  292. HIPVersionArg.split(Parts, '.');
  293. if (Parts.size())
  294. Parts[0].getAsInteger(0, Major);
  295. if (Parts.size() > 1)
  296. Parts[1].getAsInteger(0, Minor);
  297. if (Parts.size() > 2)
  298. VersionPatch = Parts[2].str();
  299. if (VersionPatch.empty())
  300. VersionPatch = "0";
  301. if (Major != ~0U && Minor == ~0U)
  302. Minor = 0;
  303. if (Major == ~0U || Minor == ~0U)
  304. D.Diag(diag::err_drv_invalid_value)
  305. << A->getAsString(Args) << HIPVersionArg;
  306. VersionMajorMinor = llvm::VersionTuple(Major, Minor);
  307. DetectedVersion =
  308. (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
  309. } else {
  310. VersionPatch = DefaultVersionPatch;
  311. VersionMajorMinor =
  312. llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
  313. DetectedVersion = (Twine(DefaultVersionMajor) + "." +
  314. Twine(DefaultVersionMinor) + "." + VersionPatch)
  315. .str();
  316. }
  317. if (DetectHIPRuntime)
  318. detectHIPRuntime();
  319. if (DetectDeviceLib)
  320. detectDeviceLibrary();
  321. }
  322. void RocmInstallationDetector::detectDeviceLibrary() {
  323. assert(LibDevicePath.empty());
  324. if (!RocmDeviceLibPathArg.empty())
  325. LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
  326. else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
  327. LibDevicePath = LibPathEnv;
  328. auto &FS = D.getVFS();
  329. if (!LibDevicePath.empty()) {
  330. // Maintain compatability with HIP flag/envvar pointing directly at the
  331. // bitcode library directory. This points directly at the library path instead
  332. // of the rocm root installation.
  333. if (!FS.exists(LibDevicePath))
  334. return;
  335. scanLibDevicePath(LibDevicePath);
  336. HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
  337. return;
  338. }
  339. // The install path situation in old versions of ROCm is a real mess, and
  340. // use a different install layout. Multiple copies of the device libraries
  341. // exist for each frontend project, and differ depending on which build
  342. // system produced the packages. Standalone OpenCL builds also have a
  343. // different directory structure from the ROCm OpenCL package.
  344. auto &ROCmDirs = getInstallationPathCandidates();
  345. for (const auto &Candidate : ROCmDirs) {
  346. auto CandidatePath = Candidate.Path;
  347. // Check device library exists at the given path.
  348. auto CheckDeviceLib = [&](StringRef Path) {
  349. bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
  350. if (CheckLibDevice && !FS.exists(Path))
  351. return false;
  352. scanLibDevicePath(Path);
  353. if (!NoBuiltinLibs) {
  354. // Check that the required non-target libraries are all available.
  355. if (!allGenericLibsValid())
  356. return false;
  357. // Check that we have found at least one libdevice that we can link in
  358. // if -nobuiltinlib hasn't been specified.
  359. if (LibDeviceMap.empty())
  360. return false;
  361. }
  362. return true;
  363. };
  364. // The possible structures are:
  365. // - ${ROCM_ROOT}/amdgcn/bitcode/*
  366. // - ${ROCM_ROOT}/lib/*
  367. // - ${ROCM_ROOT}/lib/bitcode/*
  368. // so try to detect these layouts.
  369. static constexpr std::array<const char *, 2> SubDirsList[] = {
  370. {"amdgcn", "bitcode"},
  371. {"lib", ""},
  372. {"lib", "bitcode"},
  373. };
  374. // Make a path by appending sub-directories to InstallPath.
  375. auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
  376. auto Path = CandidatePath;
  377. for (auto SubDir : SubDirs)
  378. llvm::sys::path::append(Path, SubDir);
  379. return Path;
  380. };
  381. for (auto SubDirs : SubDirsList) {
  382. LibDevicePath = MakePath(SubDirs);
  383. HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
  384. if (HasDeviceLibrary)
  385. return;
  386. }
  387. }
  388. }
  389. void RocmInstallationDetector::detectHIPRuntime() {
  390. SmallVector<Candidate, 4> HIPSearchDirs;
  391. if (!HIPPathArg.empty())
  392. HIPSearchDirs.emplace_back(HIPPathArg.str(), /*StrictChecking=*/true);
  393. else
  394. HIPSearchDirs.append(getInstallationPathCandidates());
  395. auto &FS = D.getVFS();
  396. for (const auto &Candidate : HIPSearchDirs) {
  397. InstallPath = Candidate.Path;
  398. if (InstallPath.empty() || !FS.exists(InstallPath))
  399. continue;
  400. // HIP runtime built by SPACK is installed to
  401. // <rocm_root>/hip-<rocm_release_string>-<hash> directory.
  402. auto SPACKPath = findSPACKPackage(Candidate, "hip");
  403. InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath;
  404. BinPath = InstallPath;
  405. llvm::sys::path::append(BinPath, "bin");
  406. IncludePath = InstallPath;
  407. llvm::sys::path::append(IncludePath, "include");
  408. LibPath = InstallPath;
  409. llvm::sys::path::append(LibPath, "lib");
  410. llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
  411. FS.getBufferForFile(BinPath + "/.hipVersion");
  412. if (!VersionFile && Candidate.StrictChecking)
  413. continue;
  414. if (HIPVersionArg.empty() && VersionFile)
  415. if (parseHIPVersionFile((*VersionFile)->getBuffer()))
  416. continue;
  417. HasHIPRuntime = true;
  418. return;
  419. }
  420. HasHIPRuntime = false;
  421. }
  422. void RocmInstallationDetector::print(raw_ostream &OS) const {
  423. if (hasHIPRuntime())
  424. OS << "Found HIP installation: " << InstallPath << ", version "
  425. << DetectedVersion << '\n';
  426. }
  427. void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
  428. ArgStringList &CC1Args) const {
  429. bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) &&
  430. !DriverArgs.hasArg(options::OPT_nohipwrapperinc);
  431. if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
  432. // HIP header includes standard library wrapper headers under clang
  433. // cuda_wrappers directory. Since these wrapper headers include_next
  434. // standard C++ headers, whereas libc++ headers include_next other clang
  435. // headers. The include paths have to follow this order:
  436. // - wrapper include path
  437. // - standard C++ include path
  438. // - other clang include path
  439. // Since standard C++ and other clang include paths are added in other
  440. // places after this function, here we only need to make sure wrapper
  441. // include path is added.
  442. //
  443. // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
  444. // a workaround.
  445. SmallString<128> P(D.ResourceDir);
  446. if (UsesRuntimeWrapper)
  447. llvm::sys::path::append(P, "include", "cuda_wrappers");
  448. CC1Args.push_back("-internal-isystem");
  449. CC1Args.push_back(DriverArgs.MakeArgString(P));
  450. }
  451. if (DriverArgs.hasArg(options::OPT_nogpuinc))
  452. return;
  453. if (!hasHIPRuntime()) {
  454. D.Diag(diag::err_drv_no_hip_runtime);
  455. return;
  456. }
  457. CC1Args.push_back("-idirafter");
  458. CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
  459. if (UsesRuntimeWrapper)
  460. CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
  461. }
  462. void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
  463. const InputInfo &Output,
  464. const InputInfoList &Inputs,
  465. const ArgList &Args,
  466. const char *LinkingOutput) const {
  467. std::string Linker = getToolChain().GetProgramPath(getShortName());
  468. ArgStringList CmdArgs;
  469. addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
  470. AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
  471. CmdArgs.push_back("-shared");
  472. CmdArgs.push_back("-o");
  473. CmdArgs.push_back(Output.getFilename());
  474. C.addCommand(std::make_unique<Command>(
  475. JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
  476. CmdArgs, Inputs, Output));
  477. }
  478. void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
  479. const llvm::Triple &Triple,
  480. const llvm::opt::ArgList &Args,
  481. std::vector<StringRef> &Features) {
  482. // Add target ID features to -target-feature options. No diagnostics should
  483. // be emitted here since invalid target ID is diagnosed at other places.
  484. StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
  485. if (!TargetID.empty()) {
  486. llvm::StringMap<bool> FeatureMap;
  487. auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
  488. if (OptionalGpuArch) {
  489. StringRef GpuArch = OptionalGpuArch.getValue();
  490. // Iterate through all possible target ID features for the given GPU.
  491. // If it is mapped to true, add +feature.
  492. // If it is mapped to false, add -feature.
  493. // If it is not in the map (default), do not add it
  494. for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
  495. auto Pos = FeatureMap.find(Feature);
  496. if (Pos == FeatureMap.end())
  497. continue;
  498. Features.push_back(Args.MakeArgStringRef(
  499. (Twine(Pos->second ? "+" : "-") + Feature).str()));
  500. }
  501. }
  502. }
  503. if (Args.hasFlag(options::OPT_mwavefrontsize64,
  504. options::OPT_mno_wavefrontsize64, false))
  505. Features.push_back("+wavefrontsize64");
  506. handleTargetFeaturesGroup(
  507. Args, Features, options::OPT_m_amdgpu_Features_Group);
  508. }
  509. /// AMDGPU Toolchain
  510. AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
  511. const ArgList &Args)
  512. : Generic_ELF(D, Triple, Args),
  513. OptionsDefault(
  514. {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
  515. // Check code object version options. Emit warnings for legacy options
  516. // and errors for the last invalid code object version options.
  517. // It is done here to avoid repeated warning or error messages for
  518. // each tool invocation.
  519. checkAMDGPUCodeObjectVersion(D, Args);
  520. }
  521. Tool *AMDGPUToolChain::buildLinker() const {
  522. return new tools::amdgpu::Linker(*this);
  523. }
  524. DerivedArgList *
  525. AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
  526. Action::OffloadKind DeviceOffloadKind) const {
  527. DerivedArgList *DAL =
  528. Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
  529. const OptTable &Opts = getDriver().getOpts();
  530. if (!DAL)
  531. DAL = new DerivedArgList(Args.getBaseArgs());
  532. for (Arg *A : Args) {
  533. if (!shouldSkipArgument(A))
  534. DAL->append(A);
  535. }
  536. checkTargetID(*DAL);
  537. if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
  538. return DAL;
  539. // Phase 1 (.cl -> .bc)
  540. if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
  541. DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
  542. ? options::OPT_m64
  543. : options::OPT_m32));
  544. // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
  545. // as they defined that way in Options.td
  546. if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
  547. options::OPT_Ofast))
  548. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
  549. getOptionDefault(options::OPT_O));
  550. }
  551. return DAL;
  552. }
  553. bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
  554. llvm::AMDGPU::GPUKind Kind) {
  555. // Assume nothing without a specific target.
  556. if (Kind == llvm::AMDGPU::GK_NONE)
  557. return false;
  558. const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
  559. // Default to enabling f32 denormals by default on subtargets where fma is
  560. // fast with denormals
  561. const bool BothDenormAndFMAFast =
  562. (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
  563. (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
  564. return !BothDenormAndFMAFast;
  565. }
  566. llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
  567. const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
  568. const llvm::fltSemantics *FPType) const {
  569. // Denormals should always be enabled for f16 and f64.
  570. if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
  571. return llvm::DenormalMode::getIEEE();
  572. if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
  573. JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
  574. auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
  575. auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
  576. if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
  577. DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
  578. options::OPT_fno_gpu_flush_denormals_to_zero,
  579. getDefaultDenormsAreZeroForTarget(Kind)))
  580. return llvm::DenormalMode::getPreserveSign();
  581. return llvm::DenormalMode::getIEEE();
  582. }
  583. const StringRef GpuArch = getGPUArch(DriverArgs);
  584. auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
  585. // TODO: There are way too many flags that change this. Do we need to check
  586. // them all?
  587. bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
  588. getDefaultDenormsAreZeroForTarget(Kind);
  589. // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
  590. // also implicit treated as zero (DAZ).
  591. return DAZ ? llvm::DenormalMode::getPreserveSign() :
  592. llvm::DenormalMode::getIEEE();
  593. }
  594. bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
  595. llvm::AMDGPU::GPUKind Kind) {
  596. const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
  597. bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
  598. return !HasWave32 || DriverArgs.hasFlag(
  599. options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
  600. }
  601. /// ROCM Toolchain
  602. ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
  603. const ArgList &Args)
  604. : AMDGPUToolChain(D, Triple, Args) {
  605. RocmInstallation.detectDeviceLibrary();
  606. }
  607. void AMDGPUToolChain::addClangTargetOptions(
  608. const llvm::opt::ArgList &DriverArgs,
  609. llvm::opt::ArgStringList &CC1Args,
  610. Action::OffloadKind DeviceOffloadingKind) const {
  611. // Default to "hidden" visibility, as object level linking will not be
  612. // supported for the foreseeable future.
  613. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
  614. options::OPT_fvisibility_ms_compat)) {
  615. CC1Args.push_back("-fvisibility");
  616. CC1Args.push_back("hidden");
  617. CC1Args.push_back("-fapply-global-visibility-to-externs");
  618. }
  619. }
  620. StringRef
  621. AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
  622. return getProcessorFromTargetID(
  623. getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
  624. }
  625. AMDGPUToolChain::ParsedTargetIDType
  626. AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const {
  627. StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
  628. if (TargetID.empty())
  629. return {None, None, None};
  630. llvm::StringMap<bool> FeatureMap;
  631. auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
  632. if (!OptionalGpuArch)
  633. return {TargetID.str(), None, None};
  634. return {TargetID.str(), OptionalGpuArch.getValue().str(), FeatureMap};
  635. }
  636. void AMDGPUToolChain::checkTargetID(
  637. const llvm::opt::ArgList &DriverArgs) const {
  638. auto PTID = getParsedTargetID(DriverArgs);
  639. if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
  640. getDriver().Diag(clang::diag::err_drv_bad_target_id)
  641. << PTID.OptionalTargetID.getValue();
  642. }
  643. }
  644. llvm::Error
  645. AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
  646. SmallVector<std::string, 1> &GPUArchs) const {
  647. std::string Program;
  648. if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
  649. Program = A->getValue();
  650. else
  651. Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
  652. llvm::SmallString<64> OutputFile;
  653. llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
  654. OutputFile);
  655. llvm::FileRemover OutputRemover(OutputFile.c_str());
  656. llvm::Optional<llvm::StringRef> Redirects[] = {
  657. {""},
  658. OutputFile.str(),
  659. {""},
  660. };
  661. std::string ErrorMessage;
  662. if (int Result = llvm::sys::ExecuteAndWait(
  663. Program, {}, {}, Redirects, /* SecondsToWait */ 0,
  664. /*MemoryLimit*/ 0, &ErrorMessage)) {
  665. if (Result > 0) {
  666. ErrorMessage = "Exited with error code " + std::to_string(Result);
  667. } else if (Result == -1) {
  668. ErrorMessage = "Execute failed: " + ErrorMessage;
  669. } else {
  670. ErrorMessage = "Crashed: " + ErrorMessage;
  671. }
  672. return llvm::createStringError(std::error_code(),
  673. Program + ": " + ErrorMessage);
  674. }
  675. llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
  676. llvm::MemoryBuffer::getFile(OutputFile.c_str());
  677. if (!OutputBuf) {
  678. return llvm::createStringError(OutputBuf.getError(),
  679. "Failed to read stdout of " + Program +
  680. ": " + OutputBuf.getError().message());
  681. }
  682. for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) {
  683. GPUArchs.push_back(LineIt->str());
  684. }
  685. return llvm::Error::success();
  686. }
  687. llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
  688. std::string &GPUArch) const {
  689. // detect the AMDGPU installed in system
  690. SmallVector<std::string, 1> GPUArchs;
  691. auto Err = detectSystemGPUs(Args, GPUArchs);
  692. if (Err) {
  693. return Err;
  694. }
  695. if (GPUArchs.empty()) {
  696. return llvm::createStringError(std::error_code(),
  697. "No AMD GPU detected in the system");
  698. }
  699. GPUArch = GPUArchs[0];
  700. if (GPUArchs.size() > 1) {
  701. bool AllSame = llvm::all_of(GPUArchs, [&](const StringRef &GPUArch) {
  702. return GPUArch == GPUArchs.front();
  703. });
  704. if (!AllSame)
  705. return llvm::createStringError(
  706. std::error_code(), "Multiple AMD GPUs found with different archs");
  707. }
  708. return llvm::Error::success();
  709. }
  710. void ROCMToolChain::addClangTargetOptions(
  711. const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
  712. Action::OffloadKind DeviceOffloadingKind) const {
  713. AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
  714. DeviceOffloadingKind);
  715. // For the OpenCL case where there is no offload target, accept -nostdlib to
  716. // disable bitcode linking.
  717. if (DeviceOffloadingKind == Action::OFK_None &&
  718. DriverArgs.hasArg(options::OPT_nostdlib))
  719. return;
  720. if (DriverArgs.hasArg(options::OPT_nogpulib))
  721. return;
  722. if (!RocmInstallation.hasDeviceLibrary()) {
  723. getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
  724. return;
  725. }
  726. // Get the device name and canonicalize it
  727. const StringRef GpuArch = getGPUArch(DriverArgs);
  728. auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
  729. const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
  730. std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
  731. if (LibDeviceFile.empty()) {
  732. getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
  733. return;
  734. }
  735. bool Wave64 = isWave64(DriverArgs, Kind);
  736. // TODO: There are way too many flags that change this. Do we need to check
  737. // them all?
  738. bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
  739. getDefaultDenormsAreZeroForTarget(Kind);
  740. bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
  741. bool UnsafeMathOpt =
  742. DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
  743. bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
  744. bool CorrectSqrt =
  745. DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
  746. // Add the OpenCL specific bitcode library.
  747. llvm::SmallVector<std::string, 12> BCLibs;
  748. BCLibs.push_back(RocmInstallation.getOpenCLPath().str());
  749. // Add the generic set of libraries.
  750. BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
  751. DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
  752. FastRelaxedMath, CorrectSqrt));
  753. llvm::for_each(BCLibs, [&](StringRef BCFile) {
  754. CC1Args.push_back("-mlink-builtin-bitcode");
  755. CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
  756. });
  757. }
  758. llvm::SmallVector<std::string, 12>
  759. RocmInstallationDetector::getCommonBitcodeLibs(
  760. const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
  761. bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
  762. bool CorrectSqrt) const {
  763. llvm::SmallVector<std::string, 12> BCLibs;
  764. auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); };
  765. AddBCLib(getOCMLPath());
  766. AddBCLib(getOCKLPath());
  767. AddBCLib(getDenormalsAreZeroPath(DAZ));
  768. AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
  769. AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
  770. AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
  771. AddBCLib(getWavefrontSize64Path(Wave64));
  772. AddBCLib(LibDeviceFile);
  773. return BCLibs;
  774. }
  775. bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
  776. Option O = A->getOption();
  777. if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie))
  778. return true;
  779. return false;
  780. }
  781. llvm::SmallVector<std::string, 12>
  782. ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
  783. const std::string &GPUArch) const {
  784. auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
  785. const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
  786. std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
  787. if (LibDeviceFile.empty()) {
  788. getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
  789. return {};
  790. }
  791. // If --hip-device-lib is not set, add the default bitcode libraries.
  792. // TODO: There are way too many flags that change this. Do we need to check
  793. // them all?
  794. bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
  795. options::OPT_fno_gpu_flush_denormals_to_zero,
  796. getDefaultDenormsAreZeroForTarget(Kind));
  797. bool FiniteOnly = DriverArgs.hasFlag(
  798. options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
  799. bool UnsafeMathOpt =
  800. DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
  801. options::OPT_fno_unsafe_math_optimizations, false);
  802. bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
  803. options::OPT_fno_fast_math, false);
  804. bool CorrectSqrt = DriverArgs.hasFlag(
  805. options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
  806. options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
  807. bool Wave64 = isWave64(DriverArgs, Kind);
  808. return RocmInstallation.getCommonBitcodeLibs(
  809. DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
  810. FastRelaxedMath, CorrectSqrt);
  811. }