NVPTXAsmPrinter.cpp 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152
  1. //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a printer that converts from our internal representation
  10. // of machine-dependent LLVM code to NVPTX assembly language.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "NVPTXAsmPrinter.h"
  14. #include "MCTargetDesc/NVPTXBaseInfo.h"
  15. #include "MCTargetDesc/NVPTXInstPrinter.h"
  16. #include "MCTargetDesc/NVPTXMCAsmInfo.h"
  17. #include "MCTargetDesc/NVPTXTargetStreamer.h"
  18. #include "NVPTX.h"
  19. #include "NVPTXMCExpr.h"
  20. #include "NVPTXMachineFunctionInfo.h"
  21. #include "NVPTXRegisterInfo.h"
  22. #include "NVPTXSubtarget.h"
  23. #include "NVPTXTargetMachine.h"
  24. #include "NVPTXUtilities.h"
  25. #include "TargetInfo/NVPTXTargetInfo.h"
  26. #include "cl_common_defines.h"
  27. #include "llvm/ADT/APFloat.h"
  28. #include "llvm/ADT/APInt.h"
  29. #include "llvm/ADT/DenseMap.h"
  30. #include "llvm/ADT/DenseSet.h"
  31. #include "llvm/ADT/SmallString.h"
  32. #include "llvm/ADT/SmallVector.h"
  33. #include "llvm/ADT/StringExtras.h"
  34. #include "llvm/ADT/StringRef.h"
  35. #include "llvm/ADT/Triple.h"
  36. #include "llvm/ADT/Twine.h"
  37. #include "llvm/Analysis/ConstantFolding.h"
  38. #include "llvm/CodeGen/Analysis.h"
  39. #include "llvm/CodeGen/MachineBasicBlock.h"
  40. #include "llvm/CodeGen/MachineFrameInfo.h"
  41. #include "llvm/CodeGen/MachineFunction.h"
  42. #include "llvm/CodeGen/MachineInstr.h"
  43. #include "llvm/CodeGen/MachineLoopInfo.h"
  44. #include "llvm/CodeGen/MachineModuleInfo.h"
  45. #include "llvm/CodeGen/MachineOperand.h"
  46. #include "llvm/CodeGen/MachineRegisterInfo.h"
  47. #include "llvm/CodeGen/TargetLowering.h"
  48. #include "llvm/CodeGen/TargetRegisterInfo.h"
  49. #include "llvm/CodeGen/ValueTypes.h"
  50. #include "llvm/IR/Attributes.h"
  51. #include "llvm/IR/BasicBlock.h"
  52. #include "llvm/IR/Constant.h"
  53. #include "llvm/IR/Constants.h"
  54. #include "llvm/IR/DataLayout.h"
  55. #include "llvm/IR/DebugInfo.h"
  56. #include "llvm/IR/DebugInfoMetadata.h"
  57. #include "llvm/IR/DebugLoc.h"
  58. #include "llvm/IR/DerivedTypes.h"
  59. #include "llvm/IR/Function.h"
  60. #include "llvm/IR/GlobalValue.h"
  61. #include "llvm/IR/GlobalVariable.h"
  62. #include "llvm/IR/Instruction.h"
  63. #include "llvm/IR/LLVMContext.h"
  64. #include "llvm/IR/Module.h"
  65. #include "llvm/IR/Operator.h"
  66. #include "llvm/IR/Type.h"
  67. #include "llvm/IR/User.h"
  68. #include "llvm/MC/MCExpr.h"
  69. #include "llvm/MC/MCInst.h"
  70. #include "llvm/MC/MCInstrDesc.h"
  71. #include "llvm/MC/MCStreamer.h"
  72. #include "llvm/MC/MCSymbol.h"
  73. #include "llvm/MC/TargetRegistry.h"
  74. #include "llvm/Support/Casting.h"
  75. #include "llvm/Support/CommandLine.h"
  76. #include "llvm/Support/ErrorHandling.h"
  77. #include "llvm/Support/MachineValueType.h"
  78. #include "llvm/Support/Path.h"
  79. #include "llvm/Support/raw_ostream.h"
  80. #include "llvm/Target/TargetLoweringObjectFile.h"
  81. #include "llvm/Target/TargetMachine.h"
  82. #include "llvm/Transforms/Utils/UnrollLoop.h"
  83. #include <cassert>
  84. #include <cstdint>
  85. #include <cstring>
  86. #include <new>
  87. #include <string>
  88. #include <utility>
  89. #include <vector>
  90. using namespace llvm;
  91. #define DEPOTNAME "__local_depot"
  92. /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
  93. /// depends.
  94. static void
  95. DiscoverDependentGlobals(const Value *V,
  96. DenseSet<const GlobalVariable *> &Globals) {
  97. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
  98. Globals.insert(GV);
  99. else {
  100. if (const User *U = dyn_cast<User>(V)) {
  101. for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
  102. DiscoverDependentGlobals(U->getOperand(i), Globals);
  103. }
  104. }
  105. }
  106. }
  107. /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
  108. /// instances to be emitted, but only after any dependents have been added
  109. /// first.s
  110. static void
  111. VisitGlobalVariableForEmission(const GlobalVariable *GV,
  112. SmallVectorImpl<const GlobalVariable *> &Order,
  113. DenseSet<const GlobalVariable *> &Visited,
  114. DenseSet<const GlobalVariable *> &Visiting) {
  115. // Have we already visited this one?
  116. if (Visited.count(GV))
  117. return;
  118. // Do we have a circular dependency?
  119. if (!Visiting.insert(GV).second)
  120. report_fatal_error("Circular dependency found in global variable set");
  121. // Make sure we visit all dependents first
  122. DenseSet<const GlobalVariable *> Others;
  123. for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
  124. DiscoverDependentGlobals(GV->getOperand(i), Others);
  125. for (const GlobalVariable *GV : Others)
  126. VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
  127. // Now we can visit ourself
  128. Order.push_back(GV);
  129. Visited.insert(GV);
  130. Visiting.erase(GV);
  131. }
  132. void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
  133. MCInst Inst;
  134. lowerToMCInst(MI, Inst);
  135. EmitToStreamer(*OutStreamer, Inst);
  136. }
  137. // Handle symbol backtracking for targets that do not support image handles
  138. bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
  139. unsigned OpNo, MCOperand &MCOp) {
  140. const MachineOperand &MO = MI->getOperand(OpNo);
  141. const MCInstrDesc &MCID = MI->getDesc();
  142. if (MCID.TSFlags & NVPTXII::IsTexFlag) {
  143. // This is a texture fetch, so operand 4 is a texref and operand 5 is
  144. // a samplerref
  145. if (OpNo == 4 && MO.isImm()) {
  146. lowerImageHandleSymbol(MO.getImm(), MCOp);
  147. return true;
  148. }
  149. if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
  150. lowerImageHandleSymbol(MO.getImm(), MCOp);
  151. return true;
  152. }
  153. return false;
  154. } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
  155. unsigned VecSize =
  156. 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
  157. // For a surface load of vector size N, the Nth operand will be the surfref
  158. if (OpNo == VecSize && MO.isImm()) {
  159. lowerImageHandleSymbol(MO.getImm(), MCOp);
  160. return true;
  161. }
  162. return false;
  163. } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
  164. // This is a surface store, so operand 0 is a surfref
  165. if (OpNo == 0 && MO.isImm()) {
  166. lowerImageHandleSymbol(MO.getImm(), MCOp);
  167. return true;
  168. }
  169. return false;
  170. } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
  171. // This is a query, so operand 1 is a surfref/texref
  172. if (OpNo == 1 && MO.isImm()) {
  173. lowerImageHandleSymbol(MO.getImm(), MCOp);
  174. return true;
  175. }
  176. return false;
  177. }
  178. return false;
  179. }
  180. void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
  181. // Ewwww
  182. LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
  183. NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
  184. const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
  185. const char *Sym = MFI->getImageHandleSymbol(Index);
  186. std::string *SymNamePtr =
  187. nvTM.getManagedStrPool()->getManagedString(Sym);
  188. MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
  189. }
  190. void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
  191. OutMI.setOpcode(MI->getOpcode());
  192. // Special: Do not mangle symbol operand of CALL_PROTOTYPE
  193. if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
  194. const MachineOperand &MO = MI->getOperand(0);
  195. OutMI.addOperand(GetSymbolRef(
  196. OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
  197. return;
  198. }
  199. const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
  200. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  201. const MachineOperand &MO = MI->getOperand(i);
  202. MCOperand MCOp;
  203. if (!STI.hasImageHandles()) {
  204. if (lowerImageHandleOperand(MI, i, MCOp)) {
  205. OutMI.addOperand(MCOp);
  206. continue;
  207. }
  208. }
  209. if (lowerOperand(MO, MCOp))
  210. OutMI.addOperand(MCOp);
  211. }
  212. }
  213. bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
  214. MCOperand &MCOp) {
  215. switch (MO.getType()) {
  216. default: llvm_unreachable("unknown operand type");
  217. case MachineOperand::MO_Register:
  218. MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
  219. break;
  220. case MachineOperand::MO_Immediate:
  221. MCOp = MCOperand::createImm(MO.getImm());
  222. break;
  223. case MachineOperand::MO_MachineBasicBlock:
  224. MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
  225. MO.getMBB()->getSymbol(), OutContext));
  226. break;
  227. case MachineOperand::MO_ExternalSymbol:
  228. MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
  229. break;
  230. case MachineOperand::MO_GlobalAddress:
  231. MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
  232. break;
  233. case MachineOperand::MO_FPImmediate: {
  234. const ConstantFP *Cnt = MO.getFPImm();
  235. const APFloat &Val = Cnt->getValueAPF();
  236. switch (Cnt->getType()->getTypeID()) {
  237. default: report_fatal_error("Unsupported FP type"); break;
  238. case Type::HalfTyID:
  239. MCOp = MCOperand::createExpr(
  240. NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
  241. break;
  242. case Type::FloatTyID:
  243. MCOp = MCOperand::createExpr(
  244. NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
  245. break;
  246. case Type::DoubleTyID:
  247. MCOp = MCOperand::createExpr(
  248. NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
  249. break;
  250. }
  251. break;
  252. }
  253. }
  254. return true;
  255. }
  256. unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
  257. if (Register::isVirtualRegister(Reg)) {
  258. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  259. DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
  260. unsigned RegNum = RegMap[Reg];
  261. // Encode the register class in the upper 4 bits
  262. // Must be kept in sync with NVPTXInstPrinter::printRegName
  263. unsigned Ret = 0;
  264. if (RC == &NVPTX::Int1RegsRegClass) {
  265. Ret = (1 << 28);
  266. } else if (RC == &NVPTX::Int16RegsRegClass) {
  267. Ret = (2 << 28);
  268. } else if (RC == &NVPTX::Int32RegsRegClass) {
  269. Ret = (3 << 28);
  270. } else if (RC == &NVPTX::Int64RegsRegClass) {
  271. Ret = (4 << 28);
  272. } else if (RC == &NVPTX::Float32RegsRegClass) {
  273. Ret = (5 << 28);
  274. } else if (RC == &NVPTX::Float64RegsRegClass) {
  275. Ret = (6 << 28);
  276. } else if (RC == &NVPTX::Float16RegsRegClass) {
  277. Ret = (7 << 28);
  278. } else if (RC == &NVPTX::Float16x2RegsRegClass) {
  279. Ret = (8 << 28);
  280. } else {
  281. report_fatal_error("Bad register class");
  282. }
  283. // Insert the vreg number
  284. Ret |= (RegNum & 0x0FFFFFFF);
  285. return Ret;
  286. } else {
  287. // Some special-use registers are actually physical registers.
  288. // Encode this as the register class ID of 0 and the real register ID.
  289. return Reg & 0x0FFFFFFF;
  290. }
  291. }
  292. MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
  293. const MCExpr *Expr;
  294. Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
  295. OutContext);
  296. return MCOperand::createExpr(Expr);
  297. }
  298. void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
  299. const DataLayout &DL = getDataLayout();
  300. const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
  301. const TargetLowering *TLI = STI.getTargetLowering();
  302. Type *Ty = F->getReturnType();
  303. bool isABI = (STI.getSmVersion() >= 20);
  304. if (Ty->getTypeID() == Type::VoidTyID)
  305. return;
  306. O << " (";
  307. if (isABI) {
  308. if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
  309. unsigned size = 0;
  310. if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
  311. size = ITy->getBitWidth();
  312. } else {
  313. assert(Ty->isFloatingPointTy() && "Floating point type expected here");
  314. size = Ty->getPrimitiveSizeInBits();
  315. }
  316. // PTX ABI requires all scalar return values to be at least 32
  317. // bits in size. fp16 normally uses .b16 as its storage type in
  318. // PTX, so its size must be adjusted here, too.
  319. if (size < 32)
  320. size = 32;
  321. O << ".param .b" << size << " func_retval0";
  322. } else if (isa<PointerType>(Ty)) {
  323. O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
  324. << " func_retval0";
  325. } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
  326. unsigned totalsz = DL.getTypeAllocSize(Ty);
  327. unsigned retAlignment = 0;
  328. if (!getAlign(*F, 0, retAlignment))
  329. retAlignment = DL.getABITypeAlignment(Ty);
  330. O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
  331. << "]";
  332. } else
  333. llvm_unreachable("Unknown return type");
  334. } else {
  335. SmallVector<EVT, 16> vtparts;
  336. ComputeValueVTs(*TLI, DL, Ty, vtparts);
  337. unsigned idx = 0;
  338. for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
  339. unsigned elems = 1;
  340. EVT elemtype = vtparts[i];
  341. if (vtparts[i].isVector()) {
  342. elems = vtparts[i].getVectorNumElements();
  343. elemtype = vtparts[i].getVectorElementType();
  344. }
  345. for (unsigned j = 0, je = elems; j != je; ++j) {
  346. unsigned sz = elemtype.getSizeInBits();
  347. if (elemtype.isInteger() && (sz < 32))
  348. sz = 32;
  349. O << ".reg .b" << sz << " func_retval" << idx;
  350. if (j < je - 1)
  351. O << ", ";
  352. ++idx;
  353. }
  354. if (i < e - 1)
  355. O << ", ";
  356. }
  357. }
  358. O << ") ";
  359. }
  360. void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
  361. raw_ostream &O) {
  362. const Function &F = MF.getFunction();
  363. printReturnValStr(&F, O);
  364. }
  365. // Return true if MBB is the header of a loop marked with
  366. // llvm.loop.unroll.disable.
  367. // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
  368. bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
  369. const MachineBasicBlock &MBB) const {
  370. MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
  371. // We insert .pragma "nounroll" only to the loop header.
  372. if (!LI.isLoopHeader(&MBB))
  373. return false;
  374. // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
  375. // we iterate through each back edge of the loop with header MBB, and check
  376. // whether its metadata contains llvm.loop.unroll.disable.
  377. for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
  378. if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
  379. // Edges from other loops to MBB are not back edges.
  380. continue;
  381. }
  382. if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
  383. if (MDNode *LoopID =
  384. PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
  385. if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
  386. return true;
  387. }
  388. }
  389. }
  390. return false;
  391. }
  392. void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
  393. AsmPrinter::emitBasicBlockStart(MBB);
  394. if (isLoopHeaderOfNoUnroll(MBB))
  395. OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
  396. }
  397. void NVPTXAsmPrinter::emitFunctionEntryLabel() {
  398. SmallString<128> Str;
  399. raw_svector_ostream O(Str);
  400. if (!GlobalsEmitted) {
  401. emitGlobals(*MF->getFunction().getParent());
  402. GlobalsEmitted = true;
  403. }
  404. // Set up
  405. MRI = &MF->getRegInfo();
  406. F = &MF->getFunction();
  407. emitLinkageDirective(F, O);
  408. if (isKernelFunction(*F))
  409. O << ".entry ";
  410. else {
  411. O << ".func ";
  412. printReturnValStr(*MF, O);
  413. }
  414. CurrentFnSym->print(O, MAI);
  415. emitFunctionParamList(*MF, O);
  416. if (isKernelFunction(*F))
  417. emitKernelFunctionDirectives(*F, O);
  418. OutStreamer->emitRawText(O.str());
  419. VRegMapping.clear();
  420. // Emit open brace for function body.
  421. OutStreamer->emitRawText(StringRef("{\n"));
  422. setAndEmitFunctionVirtualRegisters(*MF);
  423. // Emit initial .loc debug directive for correct relocation symbol data.
  424. if (MMI && MMI->hasDebugInfo())
  425. emitInitialRawDwarfLocDirective(*MF);
  426. }
  427. bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
  428. bool Result = AsmPrinter::runOnMachineFunction(F);
  429. // Emit closing brace for the body of function F.
  430. // The closing brace must be emitted here because we need to emit additional
  431. // debug labels/data after the last basic block.
  432. // We need to emit the closing brace here because we don't have function that
  433. // finished emission of the function body.
  434. OutStreamer->emitRawText(StringRef("}\n"));
  435. return Result;
  436. }
  437. void NVPTXAsmPrinter::emitFunctionBodyStart() {
  438. SmallString<128> Str;
  439. raw_svector_ostream O(Str);
  440. emitDemotedVars(&MF->getFunction(), O);
  441. OutStreamer->emitRawText(O.str());
  442. }
  443. void NVPTXAsmPrinter::emitFunctionBodyEnd() {
  444. VRegMapping.clear();
  445. }
  446. const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
  447. SmallString<128> Str;
  448. raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber();
  449. return OutContext.getOrCreateSymbol(Str);
  450. }
  451. void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
  452. Register RegNo = MI->getOperand(0).getReg();
  453. if (Register::isVirtualRegister(RegNo)) {
  454. OutStreamer->AddComment(Twine("implicit-def: ") +
  455. getVirtualRegisterName(RegNo));
  456. } else {
  457. const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
  458. OutStreamer->AddComment(Twine("implicit-def: ") +
  459. STI.getRegisterInfo()->getName(RegNo));
  460. }
  461. OutStreamer->AddBlankLine();
  462. }
  463. void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
  464. raw_ostream &O) const {
  465. // If the NVVM IR has some of reqntid* specified, then output
  466. // the reqntid directive, and set the unspecified ones to 1.
  467. // If none of reqntid* is specified, don't output reqntid directive.
  468. unsigned reqntidx, reqntidy, reqntidz;
  469. bool specified = false;
  470. if (!getReqNTIDx(F, reqntidx))
  471. reqntidx = 1;
  472. else
  473. specified = true;
  474. if (!getReqNTIDy(F, reqntidy))
  475. reqntidy = 1;
  476. else
  477. specified = true;
  478. if (!getReqNTIDz(F, reqntidz))
  479. reqntidz = 1;
  480. else
  481. specified = true;
  482. if (specified)
  483. O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
  484. << "\n";
  485. // If the NVVM IR has some of maxntid* specified, then output
  486. // the maxntid directive, and set the unspecified ones to 1.
  487. // If none of maxntid* is specified, don't output maxntid directive.
  488. unsigned maxntidx, maxntidy, maxntidz;
  489. specified = false;
  490. if (!getMaxNTIDx(F, maxntidx))
  491. maxntidx = 1;
  492. else
  493. specified = true;
  494. if (!getMaxNTIDy(F, maxntidy))
  495. maxntidy = 1;
  496. else
  497. specified = true;
  498. if (!getMaxNTIDz(F, maxntidz))
  499. maxntidz = 1;
  500. else
  501. specified = true;
  502. if (specified)
  503. O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
  504. << "\n";
  505. unsigned mincta;
  506. if (getMinCTASm(F, mincta))
  507. O << ".minnctapersm " << mincta << "\n";
  508. unsigned maxnreg;
  509. if (getMaxNReg(F, maxnreg))
  510. O << ".maxnreg " << maxnreg << "\n";
  511. }
  512. std::string
  513. NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
  514. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  515. std::string Name;
  516. raw_string_ostream NameStr(Name);
  517. VRegRCMap::const_iterator I = VRegMapping.find(RC);
  518. assert(I != VRegMapping.end() && "Bad register class");
  519. const DenseMap<unsigned, unsigned> &RegMap = I->second;
  520. VRegMap::const_iterator VI = RegMap.find(Reg);
  521. assert(VI != RegMap.end() && "Bad virtual register");
  522. unsigned MappedVR = VI->second;
  523. NameStr << getNVPTXRegClassStr(RC) << MappedVR;
  524. NameStr.flush();
  525. return Name;
  526. }
  527. void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
  528. raw_ostream &O) {
  529. O << getVirtualRegisterName(vr);
  530. }
  531. void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
  532. emitLinkageDirective(F, O);
  533. if (isKernelFunction(*F))
  534. O << ".entry ";
  535. else
  536. O << ".func ";
  537. printReturnValStr(F, O);
  538. getSymbol(F)->print(O, MAI);
  539. O << "\n";
  540. emitFunctionParamList(F, O);
  541. O << ";\n";
  542. }
  543. static bool usedInGlobalVarDef(const Constant *C) {
  544. if (!C)
  545. return false;
  546. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
  547. return GV->getName() != "llvm.used";
  548. }
  549. for (const User *U : C->users())
  550. if (const Constant *C = dyn_cast<Constant>(U))
  551. if (usedInGlobalVarDef(C))
  552. return true;
  553. return false;
  554. }
  555. static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
  556. if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
  557. if (othergv->getName() == "llvm.used")
  558. return true;
  559. }
  560. if (const Instruction *instr = dyn_cast<Instruction>(U)) {
  561. if (instr->getParent() && instr->getParent()->getParent()) {
  562. const Function *curFunc = instr->getParent()->getParent();
  563. if (oneFunc && (curFunc != oneFunc))
  564. return false;
  565. oneFunc = curFunc;
  566. return true;
  567. } else
  568. return false;
  569. }
  570. for (const User *UU : U->users())
  571. if (!usedInOneFunc(UU, oneFunc))
  572. return false;
  573. return true;
  574. }
  575. /* Find out if a global variable can be demoted to local scope.
  576. * Currently, this is valid for CUDA shared variables, which have local
  577. * scope and global lifetime. So the conditions to check are :
  578. * 1. Is the global variable in shared address space?
  579. * 2. Does it have internal linkage?
  580. * 3. Is the global variable referenced only in one function?
  581. */
  582. static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
  583. if (!gv->hasInternalLinkage())
  584. return false;
  585. PointerType *Pty = gv->getType();
  586. if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
  587. return false;
  588. const Function *oneFunc = nullptr;
  589. bool flag = usedInOneFunc(gv, oneFunc);
  590. if (!flag)
  591. return false;
  592. if (!oneFunc)
  593. return false;
  594. f = oneFunc;
  595. return true;
  596. }
  597. static bool useFuncSeen(const Constant *C,
  598. DenseMap<const Function *, bool> &seenMap) {
  599. for (const User *U : C->users()) {
  600. if (const Constant *cu = dyn_cast<Constant>(U)) {
  601. if (useFuncSeen(cu, seenMap))
  602. return true;
  603. } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
  604. const BasicBlock *bb = I->getParent();
  605. if (!bb)
  606. continue;
  607. const Function *caller = bb->getParent();
  608. if (!caller)
  609. continue;
  610. if (seenMap.find(caller) != seenMap.end())
  611. return true;
  612. }
  613. }
  614. return false;
  615. }
  616. void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
  617. DenseMap<const Function *, bool> seenMap;
  618. for (const Function &F : M) {
  619. if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
  620. emitDeclaration(&F, O);
  621. continue;
  622. }
  623. if (F.isDeclaration()) {
  624. if (F.use_empty())
  625. continue;
  626. if (F.getIntrinsicID())
  627. continue;
  628. emitDeclaration(&F, O);
  629. continue;
  630. }
  631. for (const User *U : F.users()) {
  632. if (const Constant *C = dyn_cast<Constant>(U)) {
  633. if (usedInGlobalVarDef(C)) {
  634. // The use is in the initialization of a global variable
  635. // that is a function pointer, so print a declaration
  636. // for the original function
  637. emitDeclaration(&F, O);
  638. break;
  639. }
  640. // Emit a declaration of this function if the function that
  641. // uses this constant expr has already been seen.
  642. if (useFuncSeen(C, seenMap)) {
  643. emitDeclaration(&F, O);
  644. break;
  645. }
  646. }
  647. if (!isa<Instruction>(U))
  648. continue;
  649. const Instruction *instr = cast<Instruction>(U);
  650. const BasicBlock *bb = instr->getParent();
  651. if (!bb)
  652. continue;
  653. const Function *caller = bb->getParent();
  654. if (!caller)
  655. continue;
  656. // If a caller has already been seen, then the caller is
  657. // appearing in the module before the callee. so print out
  658. // a declaration for the callee.
  659. if (seenMap.find(caller) != seenMap.end()) {
  660. emitDeclaration(&F, O);
  661. break;
  662. }
  663. }
  664. seenMap[&F] = true;
  665. }
  666. }
  667. static bool isEmptyXXStructor(GlobalVariable *GV) {
  668. if (!GV) return true;
  669. const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
  670. if (!InitList) return true; // Not an array; we don't know how to parse.
  671. return InitList->getNumOperands() == 0;
  672. }
  673. void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
  674. // Construct a default subtarget off of the TargetMachine defaults. The
  675. // rest of NVPTX isn't friendly to change subtargets per function and
  676. // so the default TargetMachine will have all of the options.
  677. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
  678. const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
  679. SmallString<128> Str1;
  680. raw_svector_ostream OS1(Str1);
  681. // Emit header before any dwarf directives are emitted below.
  682. emitHeader(M, OS1, *STI);
  683. OutStreamer->emitRawText(OS1.str());
  684. }
  685. bool NVPTXAsmPrinter::doInitialization(Module &M) {
  686. if (M.alias_size()) {
  687. report_fatal_error("Module has aliases, which NVPTX does not support.");
  688. return true; // error
  689. }
  690. if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
  691. report_fatal_error(
  692. "Module has a nontrivial global ctor, which NVPTX does not support.");
  693. return true; // error
  694. }
  695. if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
  696. report_fatal_error(
  697. "Module has a nontrivial global dtor, which NVPTX does not support.");
  698. return true; // error
  699. }
  700. // We need to call the parent's one explicitly.
  701. bool Result = AsmPrinter::doInitialization(M);
  702. GlobalsEmitted = false;
  703. return Result;
  704. }
  705. void NVPTXAsmPrinter::emitGlobals(const Module &M) {
  706. SmallString<128> Str2;
  707. raw_svector_ostream OS2(Str2);
  708. emitDeclarations(M, OS2);
  709. // As ptxas does not support forward references of globals, we need to first
  710. // sort the list of module-level globals in def-use order. We visit each
  711. // global variable in order, and ensure that we emit it *after* its dependent
  712. // globals. We use a little extra memory maintaining both a set and a list to
  713. // have fast searches while maintaining a strict ordering.
  714. SmallVector<const GlobalVariable *, 8> Globals;
  715. DenseSet<const GlobalVariable *> GVVisited;
  716. DenseSet<const GlobalVariable *> GVVisiting;
  717. // Visit each global variable, in order
  718. for (const GlobalVariable &I : M.globals())
  719. VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
  720. assert(GVVisited.size() == M.getGlobalList().size() &&
  721. "Missed a global variable");
  722. assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
  723. // Print out module-level global variables in proper order
  724. for (unsigned i = 0, e = Globals.size(); i != e; ++i)
  725. printModuleLevelGV(Globals[i], OS2);
  726. OS2 << '\n';
  727. OutStreamer->emitRawText(OS2.str());
  728. }
  729. void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
  730. const NVPTXSubtarget &STI) {
  731. O << "//\n";
  732. O << "// Generated by LLVM NVPTX Back-End\n";
  733. O << "//\n";
  734. O << "\n";
  735. unsigned PTXVersion = STI.getPTXVersion();
  736. O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
  737. O << ".target ";
  738. O << STI.getTargetName();
  739. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
  740. if (NTM.getDrvInterface() == NVPTX::NVCL)
  741. O << ", texmode_independent";
  742. bool HasFullDebugInfo = false;
  743. for (DICompileUnit *CU : M.debug_compile_units()) {
  744. switch(CU->getEmissionKind()) {
  745. case DICompileUnit::NoDebug:
  746. case DICompileUnit::DebugDirectivesOnly:
  747. break;
  748. case DICompileUnit::LineTablesOnly:
  749. case DICompileUnit::FullDebug:
  750. HasFullDebugInfo = true;
  751. break;
  752. }
  753. if (HasFullDebugInfo)
  754. break;
  755. }
  756. if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
  757. O << ", debug";
  758. O << "\n";
  759. O << ".address_size ";
  760. if (NTM.is64Bit())
  761. O << "64";
  762. else
  763. O << "32";
  764. O << "\n";
  765. O << "\n";
  766. }
  767. bool NVPTXAsmPrinter::doFinalization(Module &M) {
  768. bool HasDebugInfo = MMI && MMI->hasDebugInfo();
  769. // If we did not emit any functions, then the global declarations have not
  770. // yet been emitted.
  771. if (!GlobalsEmitted) {
  772. emitGlobals(M);
  773. GlobalsEmitted = true;
  774. }
  775. // call doFinalization
  776. bool ret = AsmPrinter::doFinalization(M);
  777. clearAnnotationCache(&M);
  778. // Close the last emitted section
  779. if (HasDebugInfo) {
  780. static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
  781. ->closeLastSection();
  782. // Emit empty .debug_loc section for better support of the empty files.
  783. OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
  784. }
  785. // Output last DWARF .file directives, if any.
  786. static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
  787. ->outputDwarfFileDirectives();
  788. return ret;
  789. //bool Result = AsmPrinter::doFinalization(M);
  790. // Instead of calling the parents doFinalization, we may
  791. // clone parents doFinalization and customize here.
  792. // Currently, we if NVISA out the EmitGlobals() in
  793. // parent's doFinalization, which is too intrusive.
  794. //
  795. // Same for the doInitialization.
  796. //return Result;
  797. }
  798. // This function emits appropriate linkage directives for
  799. // functions and global variables.
  800. //
  801. // extern function declaration -> .extern
  802. // extern function definition -> .visible
  803. // external global variable with init -> .visible
  804. // external without init -> .extern
  805. // appending -> not allowed, assert.
  806. // for any linkage other than
  807. // internal, private, linker_private,
  808. // linker_private_weak, linker_private_weak_def_auto,
  809. // we emit -> .weak.
  810. void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
  811. raw_ostream &O) {
  812. if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
  813. if (V->hasExternalLinkage()) {
  814. if (isa<GlobalVariable>(V)) {
  815. const GlobalVariable *GVar = cast<GlobalVariable>(V);
  816. if (GVar) {
  817. if (GVar->hasInitializer())
  818. O << ".visible ";
  819. else
  820. O << ".extern ";
  821. }
  822. } else if (V->isDeclaration())
  823. O << ".extern ";
  824. else
  825. O << ".visible ";
  826. } else if (V->hasAppendingLinkage()) {
  827. std::string msg;
  828. msg.append("Error: ");
  829. msg.append("Symbol ");
  830. if (V->hasName())
  831. msg.append(std::string(V->getName()));
  832. msg.append("has unsupported appending linkage type");
  833. llvm_unreachable(msg.c_str());
  834. } else if (!V->hasInternalLinkage() &&
  835. !V->hasPrivateLinkage()) {
  836. O << ".weak ";
  837. }
  838. }
  839. }
  840. void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
  841. raw_ostream &O,
  842. bool processDemoted) {
  843. // Skip meta data
  844. if (GVar->hasSection()) {
  845. if (GVar->getSection() == "llvm.metadata")
  846. return;
  847. }
  848. // Skip LLVM intrinsic global variables
  849. if (GVar->getName().startswith("llvm.") ||
  850. GVar->getName().startswith("nvvm."))
  851. return;
  852. const DataLayout &DL = getDataLayout();
  853. // GlobalVariables are always constant pointers themselves.
  854. PointerType *PTy = GVar->getType();
  855. Type *ETy = GVar->getValueType();
  856. if (GVar->hasExternalLinkage()) {
  857. if (GVar->hasInitializer())
  858. O << ".visible ";
  859. else
  860. O << ".extern ";
  861. } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
  862. GVar->hasAvailableExternallyLinkage() ||
  863. GVar->hasCommonLinkage()) {
  864. O << ".weak ";
  865. }
  866. if (isTexture(*GVar)) {
  867. O << ".global .texref " << getTextureName(*GVar) << ";\n";
  868. return;
  869. }
  870. if (isSurface(*GVar)) {
  871. O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
  872. return;
  873. }
  874. if (GVar->isDeclaration()) {
  875. // (extern) declarations, no definition or initializer
  876. // Currently the only known declaration is for an automatic __local
  877. // (.shared) promoted to global.
  878. emitPTXGlobalVariable(GVar, O);
  879. O << ";\n";
  880. return;
  881. }
  882. if (isSampler(*GVar)) {
  883. O << ".global .samplerref " << getSamplerName(*GVar);
  884. const Constant *Initializer = nullptr;
  885. if (GVar->hasInitializer())
  886. Initializer = GVar->getInitializer();
  887. const ConstantInt *CI = nullptr;
  888. if (Initializer)
  889. CI = dyn_cast<ConstantInt>(Initializer);
  890. if (CI) {
  891. unsigned sample = CI->getZExtValue();
  892. O << " = { ";
  893. for (int i = 0,
  894. addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
  895. i < 3; i++) {
  896. O << "addr_mode_" << i << " = ";
  897. switch (addr) {
  898. case 0:
  899. O << "wrap";
  900. break;
  901. case 1:
  902. O << "clamp_to_border";
  903. break;
  904. case 2:
  905. O << "clamp_to_edge";
  906. break;
  907. case 3:
  908. O << "wrap";
  909. break;
  910. case 4:
  911. O << "mirror";
  912. break;
  913. }
  914. O << ", ";
  915. }
  916. O << "filter_mode = ";
  917. switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
  918. case 0:
  919. O << "nearest";
  920. break;
  921. case 1:
  922. O << "linear";
  923. break;
  924. case 2:
  925. llvm_unreachable("Anisotropic filtering is not supported");
  926. default:
  927. O << "nearest";
  928. break;
  929. }
  930. if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
  931. O << ", force_unnormalized_coords = 1";
  932. }
  933. O << " }";
  934. }
  935. O << ";\n";
  936. return;
  937. }
  938. if (GVar->hasPrivateLinkage()) {
  939. if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
  940. return;
  941. // FIXME - need better way (e.g. Metadata) to avoid generating this global
  942. if (strncmp(GVar->getName().data(), "filename", 8) == 0)
  943. return;
  944. if (GVar->use_empty())
  945. return;
  946. }
  947. const Function *demotedFunc = nullptr;
  948. if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
  949. O << "// " << GVar->getName() << " has been demoted\n";
  950. if (localDecls.find(demotedFunc) != localDecls.end())
  951. localDecls[demotedFunc].push_back(GVar);
  952. else {
  953. std::vector<const GlobalVariable *> temp;
  954. temp.push_back(GVar);
  955. localDecls[demotedFunc] = temp;
  956. }
  957. return;
  958. }
  959. O << ".";
  960. emitPTXAddressSpace(PTy->getAddressSpace(), O);
  961. if (isManaged(*GVar)) {
  962. O << " .attribute(.managed)";
  963. }
  964. if (MaybeAlign A = GVar->getAlign())
  965. O << " .align " << A->value();
  966. else
  967. O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
  968. if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
  969. (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
  970. O << " .";
  971. // Special case: ABI requires that we use .u8 for predicates
  972. if (ETy->isIntegerTy(1))
  973. O << "u8";
  974. else
  975. O << getPTXFundamentalTypeStr(ETy, false);
  976. O << " ";
  977. getSymbol(GVar)->print(O, MAI);
  978. // Ptx allows variable initilization only for constant and global state
  979. // spaces.
  980. if (GVar->hasInitializer()) {
  981. if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
  982. (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
  983. const Constant *Initializer = GVar->getInitializer();
  984. // 'undef' is treated as there is no value specified.
  985. if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
  986. O << " = ";
  987. printScalarConstant(Initializer, O);
  988. }
  989. } else {
  990. // The frontend adds zero-initializer to device and constant variables
  991. // that don't have an initial value, and UndefValue to shared
  992. // variables, so skip warning for this case.
  993. if (!GVar->getInitializer()->isNullValue() &&
  994. !isa<UndefValue>(GVar->getInitializer())) {
  995. report_fatal_error("initial value of '" + GVar->getName() +
  996. "' is not allowed in addrspace(" +
  997. Twine(PTy->getAddressSpace()) + ")");
  998. }
  999. }
  1000. }
  1001. } else {
  1002. unsigned int ElementSize = 0;
  1003. // Although PTX has direct support for struct type and array type and
  1004. // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
  1005. // targets that support these high level field accesses. Structs, arrays
  1006. // and vectors are lowered into arrays of bytes.
  1007. switch (ETy->getTypeID()) {
  1008. case Type::IntegerTyID: // Integers larger than 64 bits
  1009. case Type::StructTyID:
  1010. case Type::ArrayTyID:
  1011. case Type::FixedVectorTyID:
  1012. ElementSize = DL.getTypeStoreSize(ETy);
  1013. // Ptx allows variable initilization only for constant and
  1014. // global state spaces.
  1015. if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
  1016. (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
  1017. GVar->hasInitializer()) {
  1018. const Constant *Initializer = GVar->getInitializer();
  1019. if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
  1020. AggBuffer aggBuffer(ElementSize, O, *this);
  1021. bufferAggregateConstant(Initializer, &aggBuffer);
  1022. if (aggBuffer.numSymbols) {
  1023. if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
  1024. O << " .u64 ";
  1025. getSymbol(GVar)->print(O, MAI);
  1026. O << "[";
  1027. O << ElementSize / 8;
  1028. } else {
  1029. O << " .u32 ";
  1030. getSymbol(GVar)->print(O, MAI);
  1031. O << "[";
  1032. O << ElementSize / 4;
  1033. }
  1034. O << "]";
  1035. } else {
  1036. O << " .b8 ";
  1037. getSymbol(GVar)->print(O, MAI);
  1038. O << "[";
  1039. O << ElementSize;
  1040. O << "]";
  1041. }
  1042. O << " = {";
  1043. aggBuffer.print();
  1044. O << "}";
  1045. } else {
  1046. O << " .b8 ";
  1047. getSymbol(GVar)->print(O, MAI);
  1048. if (ElementSize) {
  1049. O << "[";
  1050. O << ElementSize;
  1051. O << "]";
  1052. }
  1053. }
  1054. } else {
  1055. O << " .b8 ";
  1056. getSymbol(GVar)->print(O, MAI);
  1057. if (ElementSize) {
  1058. O << "[";
  1059. O << ElementSize;
  1060. O << "]";
  1061. }
  1062. }
  1063. break;
  1064. default:
  1065. llvm_unreachable("type not supported yet");
  1066. }
  1067. }
  1068. O << ";\n";
  1069. }
  1070. void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
  1071. if (localDecls.find(f) == localDecls.end())
  1072. return;
  1073. std::vector<const GlobalVariable *> &gvars = localDecls[f];
  1074. for (const GlobalVariable *GV : gvars) {
  1075. O << "\t// demoted variable\n\t";
  1076. printModuleLevelGV(GV, O, true);
  1077. }
  1078. }
  1079. void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
  1080. raw_ostream &O) const {
  1081. switch (AddressSpace) {
  1082. case ADDRESS_SPACE_LOCAL:
  1083. O << "local";
  1084. break;
  1085. case ADDRESS_SPACE_GLOBAL:
  1086. O << "global";
  1087. break;
  1088. case ADDRESS_SPACE_CONST:
  1089. O << "const";
  1090. break;
  1091. case ADDRESS_SPACE_SHARED:
  1092. O << "shared";
  1093. break;
  1094. default:
  1095. report_fatal_error("Bad address space found while emitting PTX: " +
  1096. llvm::Twine(AddressSpace));
  1097. break;
  1098. }
  1099. }
  1100. std::string
  1101. NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
  1102. switch (Ty->getTypeID()) {
  1103. case Type::IntegerTyID: {
  1104. unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
  1105. if (NumBits == 1)
  1106. return "pred";
  1107. else if (NumBits <= 64) {
  1108. std::string name = "u";
  1109. return name + utostr(NumBits);
  1110. } else {
  1111. llvm_unreachable("Integer too large");
  1112. break;
  1113. }
  1114. break;
  1115. }
  1116. case Type::HalfTyID:
  1117. // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
  1118. return "b16";
  1119. case Type::FloatTyID:
  1120. return "f32";
  1121. case Type::DoubleTyID:
  1122. return "f64";
  1123. case Type::PointerTyID:
  1124. if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
  1125. if (useB4PTR)
  1126. return "b64";
  1127. else
  1128. return "u64";
  1129. else if (useB4PTR)
  1130. return "b32";
  1131. else
  1132. return "u32";
  1133. default:
  1134. break;
  1135. }
  1136. llvm_unreachable("unexpected type");
  1137. }
  1138. void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
  1139. raw_ostream &O) {
  1140. const DataLayout &DL = getDataLayout();
  1141. // GlobalVariables are always constant pointers themselves.
  1142. Type *ETy = GVar->getValueType();
  1143. O << ".";
  1144. emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
  1145. if (MaybeAlign A = GVar->getAlign())
  1146. O << " .align " << A->value();
  1147. else
  1148. O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
  1149. // Special case for i128
  1150. if (ETy->isIntegerTy(128)) {
  1151. O << " .b8 ";
  1152. getSymbol(GVar)->print(O, MAI);
  1153. O << "[16]";
  1154. return;
  1155. }
  1156. if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
  1157. O << " .";
  1158. O << getPTXFundamentalTypeStr(ETy);
  1159. O << " ";
  1160. getSymbol(GVar)->print(O, MAI);
  1161. return;
  1162. }
  1163. int64_t ElementSize = 0;
  1164. // Although PTX has direct support for struct type and array type and LLVM IR
  1165. // is very similar to PTX, the LLVM CodeGen does not support for targets that
  1166. // support these high level field accesses. Structs and arrays are lowered
  1167. // into arrays of bytes.
  1168. switch (ETy->getTypeID()) {
  1169. case Type::StructTyID:
  1170. case Type::ArrayTyID:
  1171. case Type::FixedVectorTyID:
  1172. ElementSize = DL.getTypeStoreSize(ETy);
  1173. O << " .b8 ";
  1174. getSymbol(GVar)->print(O, MAI);
  1175. O << "[";
  1176. if (ElementSize) {
  1177. O << ElementSize;
  1178. }
  1179. O << "]";
  1180. break;
  1181. default:
  1182. llvm_unreachable("type not supported yet");
  1183. }
  1184. }
  1185. static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
  1186. if (Ty->isSingleValueType())
  1187. return DL.getPrefTypeAlignment(Ty);
  1188. auto *ATy = dyn_cast<ArrayType>(Ty);
  1189. if (ATy)
  1190. return getOpenCLAlignment(DL, ATy->getElementType());
  1191. auto *STy = dyn_cast<StructType>(Ty);
  1192. if (STy) {
  1193. unsigned int alignStruct = 1;
  1194. // Go through each element of the struct and find the
  1195. // largest alignment.
  1196. for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
  1197. Type *ETy = STy->getElementType(i);
  1198. unsigned int align = getOpenCLAlignment(DL, ETy);
  1199. if (align > alignStruct)
  1200. alignStruct = align;
  1201. }
  1202. return alignStruct;
  1203. }
  1204. auto *FTy = dyn_cast<FunctionType>(Ty);
  1205. if (FTy)
  1206. return DL.getPointerPrefAlignment().value();
  1207. return DL.getPrefTypeAlignment(Ty);
  1208. }
  1209. void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
  1210. int paramIndex, raw_ostream &O) {
  1211. getSymbol(I->getParent())->print(O, MAI);
  1212. O << "_param_" << paramIndex;
  1213. }
  1214. void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
  1215. const DataLayout &DL = getDataLayout();
  1216. const AttributeList &PAL = F->getAttributes();
  1217. const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
  1218. const TargetLowering *TLI = STI.getTargetLowering();
  1219. Function::const_arg_iterator I, E;
  1220. unsigned paramIndex = 0;
  1221. bool first = true;
  1222. bool isKernelFunc = isKernelFunction(*F);
  1223. bool isABI = (STI.getSmVersion() >= 20);
  1224. bool hasImageHandles = STI.hasImageHandles();
  1225. MVT thePointerTy = TLI->getPointerTy(DL);
  1226. if (F->arg_empty()) {
  1227. O << "()\n";
  1228. return;
  1229. }
  1230. O << "(\n";
  1231. for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
  1232. Type *Ty = I->getType();
  1233. if (!first)
  1234. O << ",\n";
  1235. first = false;
  1236. // Handle image/sampler parameters
  1237. if (isKernelFunction(*F)) {
  1238. if (isSampler(*I) || isImage(*I)) {
  1239. if (isImage(*I)) {
  1240. std::string sname = std::string(I->getName());
  1241. if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
  1242. if (hasImageHandles)
  1243. O << "\t.param .u64 .ptr .surfref ";
  1244. else
  1245. O << "\t.param .surfref ";
  1246. CurrentFnSym->print(O, MAI);
  1247. O << "_param_" << paramIndex;
  1248. }
  1249. else { // Default image is read_only
  1250. if (hasImageHandles)
  1251. O << "\t.param .u64 .ptr .texref ";
  1252. else
  1253. O << "\t.param .texref ";
  1254. CurrentFnSym->print(O, MAI);
  1255. O << "_param_" << paramIndex;
  1256. }
  1257. } else {
  1258. if (hasImageHandles)
  1259. O << "\t.param .u64 .ptr .samplerref ";
  1260. else
  1261. O << "\t.param .samplerref ";
  1262. CurrentFnSym->print(O, MAI);
  1263. O << "_param_" << paramIndex;
  1264. }
  1265. continue;
  1266. }
  1267. }
  1268. if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
  1269. if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
  1270. // Just print .param .align <a> .b8 .param[size];
  1271. // <a> = PAL.getparamalignment
  1272. // size = typeallocsize of element type
  1273. const Align align = DL.getValueOrABITypeAlignment(
  1274. PAL.getParamAlignment(paramIndex), Ty);
  1275. unsigned sz = DL.getTypeAllocSize(Ty);
  1276. O << "\t.param .align " << align.value() << " .b8 ";
  1277. printParamName(I, paramIndex, O);
  1278. O << "[" << sz << "]";
  1279. continue;
  1280. }
  1281. // Just a scalar
  1282. auto *PTy = dyn_cast<PointerType>(Ty);
  1283. if (isKernelFunc) {
  1284. if (PTy) {
  1285. // Special handling for pointer arguments to kernel
  1286. O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
  1287. if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
  1288. NVPTX::CUDA) {
  1289. Type *ETy = PTy->getPointerElementType();
  1290. int addrSpace = PTy->getAddressSpace();
  1291. switch (addrSpace) {
  1292. default:
  1293. O << ".ptr ";
  1294. break;
  1295. case ADDRESS_SPACE_CONST:
  1296. O << ".ptr .const ";
  1297. break;
  1298. case ADDRESS_SPACE_SHARED:
  1299. O << ".ptr .shared ";
  1300. break;
  1301. case ADDRESS_SPACE_GLOBAL:
  1302. O << ".ptr .global ";
  1303. break;
  1304. }
  1305. O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
  1306. }
  1307. printParamName(I, paramIndex, O);
  1308. continue;
  1309. }
  1310. // non-pointer scalar to kernel func
  1311. O << "\t.param .";
  1312. // Special case: predicate operands become .u8 types
  1313. if (Ty->isIntegerTy(1))
  1314. O << "u8";
  1315. else
  1316. O << getPTXFundamentalTypeStr(Ty);
  1317. O << " ";
  1318. printParamName(I, paramIndex, O);
  1319. continue;
  1320. }
  1321. // Non-kernel function, just print .param .b<size> for ABI
  1322. // and .reg .b<size> for non-ABI
  1323. unsigned sz = 0;
  1324. if (isa<IntegerType>(Ty)) {
  1325. sz = cast<IntegerType>(Ty)->getBitWidth();
  1326. if (sz < 32)
  1327. sz = 32;
  1328. } else if (isa<PointerType>(Ty))
  1329. sz = thePointerTy.getSizeInBits();
  1330. else if (Ty->isHalfTy())
  1331. // PTX ABI requires all scalar parameters to be at least 32
  1332. // bits in size. fp16 normally uses .b16 as its storage type
  1333. // in PTX, so its size must be adjusted here, too.
  1334. sz = 32;
  1335. else
  1336. sz = Ty->getPrimitiveSizeInBits();
  1337. if (isABI)
  1338. O << "\t.param .b" << sz << " ";
  1339. else
  1340. O << "\t.reg .b" << sz << " ";
  1341. printParamName(I, paramIndex, O);
  1342. continue;
  1343. }
  1344. // param has byVal attribute. So should be a pointer
  1345. auto *PTy = dyn_cast<PointerType>(Ty);
  1346. assert(PTy && "Param with byval attribute should be a pointer type");
  1347. Type *ETy = PTy->getPointerElementType();
  1348. if (isABI || isKernelFunc) {
  1349. // Just print .param .align <a> .b8 .param[size];
  1350. // <a> = PAL.getparamalignment
  1351. // size = typeallocsize of element type
  1352. Align align =
  1353. DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
  1354. // Work around a bug in ptxas. When PTX code takes address of
  1355. // byval parameter with alignment < 4, ptxas generates code to
  1356. // spill argument into memory. Alas on sm_50+ ptxas generates
  1357. // SASS code that fails with misaligned access. To work around
  1358. // the problem, make sure that we align byval parameters by at
  1359. // least 4. Matching change must be made in LowerCall() where we
  1360. // prepare parameters for the call.
  1361. //
  1362. // TODO: this will need to be undone when we get to support multi-TU
  1363. // device-side compilation as it breaks ABI compatibility with nvcc.
  1364. // Hopefully ptxas bug is fixed by then.
  1365. if (!isKernelFunc && align < Align(4))
  1366. align = Align(4);
  1367. unsigned sz = DL.getTypeAllocSize(ETy);
  1368. O << "\t.param .align " << align.value() << " .b8 ";
  1369. printParamName(I, paramIndex, O);
  1370. O << "[" << sz << "]";
  1371. continue;
  1372. } else {
  1373. // Split the ETy into constituent parts and
  1374. // print .param .b<size> <name> for each part.
  1375. // Further, if a part is vector, print the above for
  1376. // each vector element.
  1377. SmallVector<EVT, 16> vtparts;
  1378. ComputeValueVTs(*TLI, DL, ETy, vtparts);
  1379. for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
  1380. unsigned elems = 1;
  1381. EVT elemtype = vtparts[i];
  1382. if (vtparts[i].isVector()) {
  1383. elems = vtparts[i].getVectorNumElements();
  1384. elemtype = vtparts[i].getVectorElementType();
  1385. }
  1386. for (unsigned j = 0, je = elems; j != je; ++j) {
  1387. unsigned sz = elemtype.getSizeInBits();
  1388. if (elemtype.isInteger() && (sz < 32))
  1389. sz = 32;
  1390. O << "\t.reg .b" << sz << " ";
  1391. printParamName(I, paramIndex, O);
  1392. if (j < je - 1)
  1393. O << ",\n";
  1394. ++paramIndex;
  1395. }
  1396. if (i < e - 1)
  1397. O << ",\n";
  1398. }
  1399. --paramIndex;
  1400. continue;
  1401. }
  1402. }
  1403. O << "\n)\n";
  1404. }
  1405. void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
  1406. raw_ostream &O) {
  1407. const Function &F = MF.getFunction();
  1408. emitFunctionParamList(&F, O);
  1409. }
  1410. void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
  1411. const MachineFunction &MF) {
  1412. SmallString<128> Str;
  1413. raw_svector_ostream O(Str);
  1414. // Map the global virtual register number to a register class specific
  1415. // virtual register number starting from 1 with that class.
  1416. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  1417. //unsigned numRegClasses = TRI->getNumRegClasses();
  1418. // Emit the Fake Stack Object
  1419. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1420. int NumBytes = (int) MFI.getStackSize();
  1421. if (NumBytes) {
  1422. O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
  1423. << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
  1424. if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
  1425. O << "\t.reg .b64 \t%SP;\n";
  1426. O << "\t.reg .b64 \t%SPL;\n";
  1427. } else {
  1428. O << "\t.reg .b32 \t%SP;\n";
  1429. O << "\t.reg .b32 \t%SPL;\n";
  1430. }
  1431. }
  1432. // Go through all virtual registers to establish the mapping between the
  1433. // global virtual
  1434. // register number and the per class virtual register number.
  1435. // We use the per class virtual register number in the ptx output.
  1436. unsigned int numVRs = MRI->getNumVirtRegs();
  1437. for (unsigned i = 0; i < numVRs; i++) {
  1438. Register vr = Register::index2VirtReg(i);
  1439. const TargetRegisterClass *RC = MRI->getRegClass(vr);
  1440. DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
  1441. int n = regmap.size();
  1442. regmap.insert(std::make_pair(vr, n + 1));
  1443. }
  1444. // Emit register declarations
  1445. // @TODO: Extract out the real register usage
  1446. // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
  1447. // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
  1448. // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
  1449. // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
  1450. // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
  1451. // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
  1452. // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
  1453. // Emit declaration of the virtual registers or 'physical' registers for
  1454. // each register class
  1455. for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
  1456. const TargetRegisterClass *RC = TRI->getRegClass(i);
  1457. DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
  1458. std::string rcname = getNVPTXRegClassName(RC);
  1459. std::string rcStr = getNVPTXRegClassStr(RC);
  1460. int n = regmap.size();
  1461. // Only declare those registers that may be used.
  1462. if (n) {
  1463. O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
  1464. << ">;\n";
  1465. }
  1466. }
  1467. OutStreamer->emitRawText(O.str());
  1468. }
  1469. void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
  1470. APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
  1471. bool ignored;
  1472. unsigned int numHex;
  1473. const char *lead;
  1474. if (Fp->getType()->getTypeID() == Type::FloatTyID) {
  1475. numHex = 8;
  1476. lead = "0f";
  1477. APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
  1478. } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
  1479. numHex = 16;
  1480. lead = "0d";
  1481. APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
  1482. } else
  1483. llvm_unreachable("unsupported fp type");
  1484. APInt API = APF.bitcastToAPInt();
  1485. O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
  1486. }
  1487. void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
  1488. if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
  1489. O << CI->getValue();
  1490. return;
  1491. }
  1492. if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
  1493. printFPConstant(CFP, O);
  1494. return;
  1495. }
  1496. if (isa<ConstantPointerNull>(CPV)) {
  1497. O << "0";
  1498. return;
  1499. }
  1500. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
  1501. bool IsNonGenericPointer = false;
  1502. if (GVar->getType()->getAddressSpace() != 0) {
  1503. IsNonGenericPointer = true;
  1504. }
  1505. if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
  1506. O << "generic(";
  1507. getSymbol(GVar)->print(O, MAI);
  1508. O << ")";
  1509. } else {
  1510. getSymbol(GVar)->print(O, MAI);
  1511. }
  1512. return;
  1513. }
  1514. if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
  1515. const Value *v = Cexpr->stripPointerCasts();
  1516. PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
  1517. bool IsNonGenericPointer = false;
  1518. if (PTy && PTy->getAddressSpace() != 0) {
  1519. IsNonGenericPointer = true;
  1520. }
  1521. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
  1522. if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
  1523. O << "generic(";
  1524. getSymbol(GVar)->print(O, MAI);
  1525. O << ")";
  1526. } else {
  1527. getSymbol(GVar)->print(O, MAI);
  1528. }
  1529. return;
  1530. } else {
  1531. lowerConstant(CPV)->print(O, MAI);
  1532. return;
  1533. }
  1534. }
  1535. llvm_unreachable("Not scalar type found in printScalarConstant()");
  1536. }
  1537. void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
  1538. AggBuffer *AggBuffer) {
  1539. const DataLayout &DL = getDataLayout();
  1540. int AllocSize = DL.getTypeAllocSize(CPV->getType());
  1541. if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
  1542. // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
  1543. // only the space allocated by CPV.
  1544. AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
  1545. return;
  1546. }
  1547. // Helper for filling AggBuffer with APInts.
  1548. auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
  1549. size_t NumBytes = (Val.getBitWidth() + 7) / 8;
  1550. SmallVector<unsigned char, 16> Buf(NumBytes);
  1551. for (unsigned I = 0; I < NumBytes; ++I) {
  1552. Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
  1553. }
  1554. AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
  1555. };
  1556. switch (CPV->getType()->getTypeID()) {
  1557. case Type::IntegerTyID:
  1558. if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
  1559. AddIntToBuffer(CI->getValue());
  1560. break;
  1561. }
  1562. if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
  1563. if (const auto *CI =
  1564. dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
  1565. AddIntToBuffer(CI->getValue());
  1566. break;
  1567. }
  1568. if (Cexpr->getOpcode() == Instruction::PtrToInt) {
  1569. Value *V = Cexpr->getOperand(0)->stripPointerCasts();
  1570. AggBuffer->addSymbol(V, Cexpr->getOperand(0));
  1571. AggBuffer->addZeros(AllocSize);
  1572. break;
  1573. }
  1574. }
  1575. llvm_unreachable("unsupported integer const type");
  1576. break;
  1577. case Type::HalfTyID:
  1578. case Type::FloatTyID:
  1579. case Type::DoubleTyID:
  1580. AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
  1581. break;
  1582. case Type::PointerTyID: {
  1583. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
  1584. AggBuffer->addSymbol(GVar, GVar);
  1585. } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
  1586. const Value *v = Cexpr->stripPointerCasts();
  1587. AggBuffer->addSymbol(v, Cexpr);
  1588. }
  1589. AggBuffer->addZeros(AllocSize);
  1590. break;
  1591. }
  1592. case Type::ArrayTyID:
  1593. case Type::FixedVectorTyID:
  1594. case Type::StructTyID: {
  1595. if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
  1596. bufferAggregateConstant(CPV, AggBuffer);
  1597. if (Bytes > AllocSize)
  1598. AggBuffer->addZeros(Bytes - AllocSize);
  1599. } else if (isa<ConstantAggregateZero>(CPV))
  1600. AggBuffer->addZeros(Bytes);
  1601. else
  1602. llvm_unreachable("Unexpected Constant type");
  1603. break;
  1604. }
  1605. default:
  1606. llvm_unreachable("unsupported type");
  1607. }
  1608. }
  1609. void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
  1610. AggBuffer *aggBuffer) {
  1611. const DataLayout &DL = getDataLayout();
  1612. int Bytes;
  1613. // Integers of arbitrary width
  1614. if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
  1615. APInt Val = CI->getValue();
  1616. for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
  1617. uint8_t Byte = Val.getLoBits(8).getZExtValue();
  1618. aggBuffer->addBytes(&Byte, 1, 1);
  1619. Val.lshrInPlace(8);
  1620. }
  1621. return;
  1622. }
  1623. // Old constants
  1624. if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
  1625. if (CPV->getNumOperands())
  1626. for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
  1627. bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
  1628. return;
  1629. }
  1630. if (const ConstantDataSequential *CDS =
  1631. dyn_cast<ConstantDataSequential>(CPV)) {
  1632. if (CDS->getNumElements())
  1633. for (unsigned i = 0; i < CDS->getNumElements(); ++i)
  1634. bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
  1635. aggBuffer);
  1636. return;
  1637. }
  1638. if (isa<ConstantStruct>(CPV)) {
  1639. if (CPV->getNumOperands()) {
  1640. StructType *ST = cast<StructType>(CPV->getType());
  1641. for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
  1642. if (i == (e - 1))
  1643. Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
  1644. DL.getTypeAllocSize(ST) -
  1645. DL.getStructLayout(ST)->getElementOffset(i);
  1646. else
  1647. Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
  1648. DL.getStructLayout(ST)->getElementOffset(i);
  1649. bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
  1650. }
  1651. }
  1652. return;
  1653. }
  1654. llvm_unreachable("unsupported constant type in printAggregateConstant()");
  1655. }
  1656. /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
  1657. /// a copy from AsmPrinter::lowerConstant, except customized to only handle
  1658. /// expressions that are representable in PTX and create
  1659. /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
  1660. const MCExpr *
  1661. NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
  1662. MCContext &Ctx = OutContext;
  1663. if (CV->isNullValue() || isa<UndefValue>(CV))
  1664. return MCConstantExpr::create(0, Ctx);
  1665. if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
  1666. return MCConstantExpr::create(CI->getZExtValue(), Ctx);
  1667. if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
  1668. const MCSymbolRefExpr *Expr =
  1669. MCSymbolRefExpr::create(getSymbol(GV), Ctx);
  1670. if (ProcessingGeneric) {
  1671. return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
  1672. } else {
  1673. return Expr;
  1674. }
  1675. }
  1676. const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
  1677. if (!CE) {
  1678. llvm_unreachable("Unknown constant value to lower!");
  1679. }
  1680. switch (CE->getOpcode()) {
  1681. default: {
  1682. // If the code isn't optimized, there may be outstanding folding
  1683. // opportunities. Attempt to fold the expression using DataLayout as a
  1684. // last resort before giving up.
  1685. Constant *C = ConstantFoldConstant(CE, getDataLayout());
  1686. if (C != CE)
  1687. return lowerConstantForGV(C, ProcessingGeneric);
  1688. // Otherwise report the problem to the user.
  1689. std::string S;
  1690. raw_string_ostream OS(S);
  1691. OS << "Unsupported expression in static initializer: ";
  1692. CE->printAsOperand(OS, /*PrintType=*/false,
  1693. !MF ? nullptr : MF->getFunction().getParent());
  1694. report_fatal_error(Twine(OS.str()));
  1695. }
  1696. case Instruction::AddrSpaceCast: {
  1697. // Strip the addrspacecast and pass along the operand
  1698. PointerType *DstTy = cast<PointerType>(CE->getType());
  1699. if (DstTy->getAddressSpace() == 0) {
  1700. return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
  1701. }
  1702. std::string S;
  1703. raw_string_ostream OS(S);
  1704. OS << "Unsupported expression in static initializer: ";
  1705. CE->printAsOperand(OS, /*PrintType=*/ false,
  1706. !MF ? nullptr : MF->getFunction().getParent());
  1707. report_fatal_error(Twine(OS.str()));
  1708. }
  1709. case Instruction::GetElementPtr: {
  1710. const DataLayout &DL = getDataLayout();
  1711. // Generate a symbolic expression for the byte address
  1712. APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
  1713. cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
  1714. const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
  1715. ProcessingGeneric);
  1716. if (!OffsetAI)
  1717. return Base;
  1718. int64_t Offset = OffsetAI.getSExtValue();
  1719. return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
  1720. Ctx);
  1721. }
  1722. case Instruction::Trunc:
  1723. // We emit the value and depend on the assembler to truncate the generated
  1724. // expression properly. This is important for differences between
  1725. // blockaddress labels. Since the two labels are in the same function, it
  1726. // is reasonable to treat their delta as a 32-bit value.
  1727. LLVM_FALLTHROUGH;
  1728. case Instruction::BitCast:
  1729. return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
  1730. case Instruction::IntToPtr: {
  1731. const DataLayout &DL = getDataLayout();
  1732. // Handle casts to pointers by changing them into casts to the appropriate
  1733. // integer type. This promotes constant folding and simplifies this code.
  1734. Constant *Op = CE->getOperand(0);
  1735. Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
  1736. false/*ZExt*/);
  1737. return lowerConstantForGV(Op, ProcessingGeneric);
  1738. }
  1739. case Instruction::PtrToInt: {
  1740. const DataLayout &DL = getDataLayout();
  1741. // Support only foldable casts to/from pointers that can be eliminated by
  1742. // changing the pointer to the appropriately sized integer type.
  1743. Constant *Op = CE->getOperand(0);
  1744. Type *Ty = CE->getType();
  1745. const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
  1746. // We can emit the pointer value into this slot if the slot is an
  1747. // integer slot equal to the size of the pointer.
  1748. if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
  1749. return OpExpr;
  1750. // Otherwise the pointer is smaller than the resultant integer, mask off
  1751. // the high bits so we are sure to get a proper truncation if the input is
  1752. // a constant expr.
  1753. unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
  1754. const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
  1755. return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
  1756. }
  1757. // The MC library also has a right-shift operator, but it isn't consistently
  1758. // signed or unsigned between different targets.
  1759. case Instruction::Add: {
  1760. const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
  1761. const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
  1762. switch (CE->getOpcode()) {
  1763. default: llvm_unreachable("Unknown binary operator constant cast expr");
  1764. case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
  1765. }
  1766. }
  1767. }
  1768. }
  1769. // Copy of MCExpr::print customized for NVPTX
  1770. void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
  1771. switch (Expr.getKind()) {
  1772. case MCExpr::Target:
  1773. return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
  1774. case MCExpr::Constant:
  1775. OS << cast<MCConstantExpr>(Expr).getValue();
  1776. return;
  1777. case MCExpr::SymbolRef: {
  1778. const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
  1779. const MCSymbol &Sym = SRE.getSymbol();
  1780. Sym.print(OS, MAI);
  1781. return;
  1782. }
  1783. case MCExpr::Unary: {
  1784. const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
  1785. switch (UE.getOpcode()) {
  1786. case MCUnaryExpr::LNot: OS << '!'; break;
  1787. case MCUnaryExpr::Minus: OS << '-'; break;
  1788. case MCUnaryExpr::Not: OS << '~'; break;
  1789. case MCUnaryExpr::Plus: OS << '+'; break;
  1790. }
  1791. printMCExpr(*UE.getSubExpr(), OS);
  1792. return;
  1793. }
  1794. case MCExpr::Binary: {
  1795. const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
  1796. // Only print parens around the LHS if it is non-trivial.
  1797. if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
  1798. isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
  1799. printMCExpr(*BE.getLHS(), OS);
  1800. } else {
  1801. OS << '(';
  1802. printMCExpr(*BE.getLHS(), OS);
  1803. OS<< ')';
  1804. }
  1805. switch (BE.getOpcode()) {
  1806. case MCBinaryExpr::Add:
  1807. // Print "X-42" instead of "X+-42".
  1808. if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
  1809. if (RHSC->getValue() < 0) {
  1810. OS << RHSC->getValue();
  1811. return;
  1812. }
  1813. }
  1814. OS << '+';
  1815. break;
  1816. default: llvm_unreachable("Unhandled binary operator");
  1817. }
  1818. // Only print parens around the LHS if it is non-trivial.
  1819. if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
  1820. printMCExpr(*BE.getRHS(), OS);
  1821. } else {
  1822. OS << '(';
  1823. printMCExpr(*BE.getRHS(), OS);
  1824. OS << ')';
  1825. }
  1826. return;
  1827. }
  1828. }
  1829. llvm_unreachable("Invalid expression kind!");
  1830. }
  1831. /// PrintAsmOperand - Print out an operand for an inline asm expression.
  1832. ///
  1833. bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
  1834. const char *ExtraCode, raw_ostream &O) {
  1835. if (ExtraCode && ExtraCode[0]) {
  1836. if (ExtraCode[1] != 0)
  1837. return true; // Unknown modifier.
  1838. switch (ExtraCode[0]) {
  1839. default:
  1840. // See if this is a generic print operand
  1841. return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
  1842. case 'r':
  1843. break;
  1844. }
  1845. }
  1846. printOperand(MI, OpNo, O);
  1847. return false;
  1848. }
  1849. bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
  1850. unsigned OpNo,
  1851. const char *ExtraCode,
  1852. raw_ostream &O) {
  1853. if (ExtraCode && ExtraCode[0])
  1854. return true; // Unknown modifier
  1855. O << '[';
  1856. printMemOperand(MI, OpNo, O);
  1857. O << ']';
  1858. return false;
  1859. }
  1860. void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
  1861. raw_ostream &O) {
  1862. const MachineOperand &MO = MI->getOperand(opNum);
  1863. switch (MO.getType()) {
  1864. case MachineOperand::MO_Register:
  1865. if (Register::isPhysicalRegister(MO.getReg())) {
  1866. if (MO.getReg() == NVPTX::VRDepot)
  1867. O << DEPOTNAME << getFunctionNumber();
  1868. else
  1869. O << NVPTXInstPrinter::getRegisterName(MO.getReg());
  1870. } else {
  1871. emitVirtualRegister(MO.getReg(), O);
  1872. }
  1873. break;
  1874. case MachineOperand::MO_Immediate:
  1875. O << MO.getImm();
  1876. break;
  1877. case MachineOperand::MO_FPImmediate:
  1878. printFPConstant(MO.getFPImm(), O);
  1879. break;
  1880. case MachineOperand::MO_GlobalAddress:
  1881. PrintSymbolOperand(MO, O);
  1882. break;
  1883. case MachineOperand::MO_MachineBasicBlock:
  1884. MO.getMBB()->getSymbol()->print(O, MAI);
  1885. break;
  1886. default:
  1887. llvm_unreachable("Operand type not supported.");
  1888. }
  1889. }
  1890. void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
  1891. raw_ostream &O, const char *Modifier) {
  1892. printOperand(MI, opNum, O);
  1893. if (Modifier && strcmp(Modifier, "add") == 0) {
  1894. O << ", ";
  1895. printOperand(MI, opNum + 1, O);
  1896. } else {
  1897. if (MI->getOperand(opNum + 1).isImm() &&
  1898. MI->getOperand(opNum + 1).getImm() == 0)
  1899. return; // don't print ',0' or '+0'
  1900. O << "+";
  1901. printOperand(MI, opNum + 1, O);
  1902. }
  1903. }
  1904. // Force static initialization.
  1905. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() {
  1906. RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
  1907. RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
  1908. }