NVPTXAsmPrinter.cpp 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229
  1. //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a printer that converts from our internal representation
  10. // of machine-dependent LLVM code to NVPTX assembly language.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "NVPTXAsmPrinter.h"
  14. #include "MCTargetDesc/NVPTXBaseInfo.h"
  15. #include "MCTargetDesc/NVPTXInstPrinter.h"
  16. #include "MCTargetDesc/NVPTXMCAsmInfo.h"
  17. #include "MCTargetDesc/NVPTXTargetStreamer.h"
  18. #include "NVPTX.h"
  19. #include "NVPTXMCExpr.h"
  20. #include "NVPTXMachineFunctionInfo.h"
  21. #include "NVPTXRegisterInfo.h"
  22. #include "NVPTXSubtarget.h"
  23. #include "NVPTXTargetMachine.h"
  24. #include "NVPTXUtilities.h"
  25. #include "TargetInfo/NVPTXTargetInfo.h"
  26. #include "cl_common_defines.h"
  27. #include "llvm/ADT/APFloat.h"
  28. #include "llvm/ADT/APInt.h"
  29. #include "llvm/ADT/DenseMap.h"
  30. #include "llvm/ADT/DenseSet.h"
  31. #include "llvm/ADT/SmallString.h"
  32. #include "llvm/ADT/SmallVector.h"
  33. #include "llvm/ADT/StringExtras.h"
  34. #include "llvm/ADT/StringRef.h"
  35. #include "llvm/ADT/Triple.h"
  36. #include "llvm/ADT/Twine.h"
  37. #include "llvm/Analysis/ConstantFolding.h"
  38. #include "llvm/CodeGen/Analysis.h"
  39. #include "llvm/CodeGen/MachineBasicBlock.h"
  40. #include "llvm/CodeGen/MachineFrameInfo.h"
  41. #include "llvm/CodeGen/MachineFunction.h"
  42. #include "llvm/CodeGen/MachineInstr.h"
  43. #include "llvm/CodeGen/MachineLoopInfo.h"
  44. #include "llvm/CodeGen/MachineModuleInfo.h"
  45. #include "llvm/CodeGen/MachineOperand.h"
  46. #include "llvm/CodeGen/MachineRegisterInfo.h"
  47. #include "llvm/CodeGen/TargetRegisterInfo.h"
  48. #include "llvm/CodeGen/ValueTypes.h"
  49. #include "llvm/IR/Attributes.h"
  50. #include "llvm/IR/BasicBlock.h"
  51. #include "llvm/IR/Constant.h"
  52. #include "llvm/IR/Constants.h"
  53. #include "llvm/IR/DataLayout.h"
  54. #include "llvm/IR/DebugInfo.h"
  55. #include "llvm/IR/DebugInfoMetadata.h"
  56. #include "llvm/IR/DebugLoc.h"
  57. #include "llvm/IR/DerivedTypes.h"
  58. #include "llvm/IR/Function.h"
  59. #include "llvm/IR/GlobalValue.h"
  60. #include "llvm/IR/GlobalVariable.h"
  61. #include "llvm/IR/Instruction.h"
  62. #include "llvm/IR/LLVMContext.h"
  63. #include "llvm/IR/Module.h"
  64. #include "llvm/IR/Operator.h"
  65. #include "llvm/IR/Type.h"
  66. #include "llvm/IR/User.h"
  67. #include "llvm/MC/MCExpr.h"
  68. #include "llvm/MC/MCInst.h"
  69. #include "llvm/MC/MCInstrDesc.h"
  70. #include "llvm/MC/MCStreamer.h"
  71. #include "llvm/MC/MCSymbol.h"
  72. #include "llvm/MC/TargetRegistry.h"
  73. #include "llvm/Support/Casting.h"
  74. #include "llvm/Support/CommandLine.h"
  75. #include "llvm/Support/Endian.h"
  76. #include "llvm/Support/ErrorHandling.h"
  77. #include "llvm/Support/MachineValueType.h"
  78. #include "llvm/Support/NativeFormatting.h"
  79. #include "llvm/Support/Path.h"
  80. #include "llvm/Support/raw_ostream.h"
  81. #include "llvm/Target/TargetLoweringObjectFile.h"
  82. #include "llvm/Target/TargetMachine.h"
  83. #include "llvm/Transforms/Utils/UnrollLoop.h"
  84. #include <cassert>
  85. #include <cstdint>
  86. #include <cstring>
  87. #include <new>
  88. #include <string>
  89. #include <utility>
  90. #include <vector>
  91. using namespace llvm;
  92. #define DEPOTNAME "__local_depot"
  93. /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
  94. /// depends.
  95. static void
  96. DiscoverDependentGlobals(const Value *V,
  97. DenseSet<const GlobalVariable *> &Globals) {
  98. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
  99. Globals.insert(GV);
  100. else {
  101. if (const User *U = dyn_cast<User>(V)) {
  102. for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
  103. DiscoverDependentGlobals(U->getOperand(i), Globals);
  104. }
  105. }
  106. }
  107. }
  108. /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
  109. /// instances to be emitted, but only after any dependents have been added
  110. /// first.s
  111. static void
  112. VisitGlobalVariableForEmission(const GlobalVariable *GV,
  113. SmallVectorImpl<const GlobalVariable *> &Order,
  114. DenseSet<const GlobalVariable *> &Visited,
  115. DenseSet<const GlobalVariable *> &Visiting) {
  116. // Have we already visited this one?
  117. if (Visited.count(GV))
  118. return;
  119. // Do we have a circular dependency?
  120. if (!Visiting.insert(GV).second)
  121. report_fatal_error("Circular dependency found in global variable set");
  122. // Make sure we visit all dependents first
  123. DenseSet<const GlobalVariable *> Others;
  124. for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
  125. DiscoverDependentGlobals(GV->getOperand(i), Others);
  126. for (const GlobalVariable *GV : Others)
  127. VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
  128. // Now we can visit ourself
  129. Order.push_back(GV);
  130. Visited.insert(GV);
  131. Visiting.erase(GV);
  132. }
  133. void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
  134. NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
  135. getSubtargetInfo().getFeatureBits());
  136. MCInst Inst;
  137. lowerToMCInst(MI, Inst);
  138. EmitToStreamer(*OutStreamer, Inst);
  139. }
  140. // Handle symbol backtracking for targets that do not support image handles
  141. bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
  142. unsigned OpNo, MCOperand &MCOp) {
  143. const MachineOperand &MO = MI->getOperand(OpNo);
  144. const MCInstrDesc &MCID = MI->getDesc();
  145. if (MCID.TSFlags & NVPTXII::IsTexFlag) {
  146. // This is a texture fetch, so operand 4 is a texref and operand 5 is
  147. // a samplerref
  148. if (OpNo == 4 && MO.isImm()) {
  149. lowerImageHandleSymbol(MO.getImm(), MCOp);
  150. return true;
  151. }
  152. if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
  153. lowerImageHandleSymbol(MO.getImm(), MCOp);
  154. return true;
  155. }
  156. return false;
  157. } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
  158. unsigned VecSize =
  159. 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
  160. // For a surface load of vector size N, the Nth operand will be the surfref
  161. if (OpNo == VecSize && MO.isImm()) {
  162. lowerImageHandleSymbol(MO.getImm(), MCOp);
  163. return true;
  164. }
  165. return false;
  166. } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
  167. // This is a surface store, so operand 0 is a surfref
  168. if (OpNo == 0 && MO.isImm()) {
  169. lowerImageHandleSymbol(MO.getImm(), MCOp);
  170. return true;
  171. }
  172. return false;
  173. } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
  174. // This is a query, so operand 1 is a surfref/texref
  175. if (OpNo == 1 && MO.isImm()) {
  176. lowerImageHandleSymbol(MO.getImm(), MCOp);
  177. return true;
  178. }
  179. return false;
  180. }
  181. return false;
  182. }
  183. void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
  184. // Ewwww
  185. LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
  186. NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
  187. const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
  188. const char *Sym = MFI->getImageHandleSymbol(Index);
  189. StringRef SymName = nvTM.getStrPool().save(Sym);
  190. MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
  191. }
  192. void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
  193. OutMI.setOpcode(MI->getOpcode());
  194. // Special: Do not mangle symbol operand of CALL_PROTOTYPE
  195. if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
  196. const MachineOperand &MO = MI->getOperand(0);
  197. OutMI.addOperand(GetSymbolRef(
  198. OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
  199. return;
  200. }
  201. const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
  202. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  203. const MachineOperand &MO = MI->getOperand(i);
  204. MCOperand MCOp;
  205. if (!STI.hasImageHandles()) {
  206. if (lowerImageHandleOperand(MI, i, MCOp)) {
  207. OutMI.addOperand(MCOp);
  208. continue;
  209. }
  210. }
  211. if (lowerOperand(MO, MCOp))
  212. OutMI.addOperand(MCOp);
  213. }
  214. }
  215. bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
  216. MCOperand &MCOp) {
  217. switch (MO.getType()) {
  218. default: llvm_unreachable("unknown operand type");
  219. case MachineOperand::MO_Register:
  220. MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
  221. break;
  222. case MachineOperand::MO_Immediate:
  223. MCOp = MCOperand::createImm(MO.getImm());
  224. break;
  225. case MachineOperand::MO_MachineBasicBlock:
  226. MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
  227. MO.getMBB()->getSymbol(), OutContext));
  228. break;
  229. case MachineOperand::MO_ExternalSymbol:
  230. MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
  231. break;
  232. case MachineOperand::MO_GlobalAddress:
  233. MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
  234. break;
  235. case MachineOperand::MO_FPImmediate: {
  236. const ConstantFP *Cnt = MO.getFPImm();
  237. const APFloat &Val = Cnt->getValueAPF();
  238. switch (Cnt->getType()->getTypeID()) {
  239. default: report_fatal_error("Unsupported FP type"); break;
  240. case Type::HalfTyID:
  241. MCOp = MCOperand::createExpr(
  242. NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
  243. break;
  244. case Type::FloatTyID:
  245. MCOp = MCOperand::createExpr(
  246. NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
  247. break;
  248. case Type::DoubleTyID:
  249. MCOp = MCOperand::createExpr(
  250. NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
  251. break;
  252. }
  253. break;
  254. }
  255. }
  256. return true;
  257. }
  258. unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
  259. if (Register::isVirtualRegister(Reg)) {
  260. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  261. DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
  262. unsigned RegNum = RegMap[Reg];
  263. // Encode the register class in the upper 4 bits
  264. // Must be kept in sync with NVPTXInstPrinter::printRegName
  265. unsigned Ret = 0;
  266. if (RC == &NVPTX::Int1RegsRegClass) {
  267. Ret = (1 << 28);
  268. } else if (RC == &NVPTX::Int16RegsRegClass) {
  269. Ret = (2 << 28);
  270. } else if (RC == &NVPTX::Int32RegsRegClass) {
  271. Ret = (3 << 28);
  272. } else if (RC == &NVPTX::Int64RegsRegClass) {
  273. Ret = (4 << 28);
  274. } else if (RC == &NVPTX::Float32RegsRegClass) {
  275. Ret = (5 << 28);
  276. } else if (RC == &NVPTX::Float64RegsRegClass) {
  277. Ret = (6 << 28);
  278. } else if (RC == &NVPTX::Float16RegsRegClass) {
  279. Ret = (7 << 28);
  280. } else if (RC == &NVPTX::Float16x2RegsRegClass) {
  281. Ret = (8 << 28);
  282. } else {
  283. report_fatal_error("Bad register class");
  284. }
  285. // Insert the vreg number
  286. Ret |= (RegNum & 0x0FFFFFFF);
  287. return Ret;
  288. } else {
  289. // Some special-use registers are actually physical registers.
  290. // Encode this as the register class ID of 0 and the real register ID.
  291. return Reg & 0x0FFFFFFF;
  292. }
  293. }
  294. MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
  295. const MCExpr *Expr;
  296. Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
  297. OutContext);
  298. return MCOperand::createExpr(Expr);
  299. }
  300. void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
  301. const DataLayout &DL = getDataLayout();
  302. const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
  303. const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
  304. Type *Ty = F->getReturnType();
  305. bool isABI = (STI.getSmVersion() >= 20);
  306. if (Ty->getTypeID() == Type::VoidTyID)
  307. return;
  308. O << " (";
  309. if (isABI) {
  310. if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
  311. unsigned size = 0;
  312. if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
  313. size = ITy->getBitWidth();
  314. } else {
  315. assert(Ty->isFloatingPointTy() && "Floating point type expected here");
  316. size = Ty->getPrimitiveSizeInBits();
  317. }
  318. // PTX ABI requires all scalar return values to be at least 32
  319. // bits in size. fp16 normally uses .b16 as its storage type in
  320. // PTX, so its size must be adjusted here, too.
  321. size = promoteScalarArgumentSize(size);
  322. O << ".param .b" << size << " func_retval0";
  323. } else if (isa<PointerType>(Ty)) {
  324. O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
  325. << " func_retval0";
  326. } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
  327. unsigned totalsz = DL.getTypeAllocSize(Ty);
  328. unsigned retAlignment = 0;
  329. if (!getAlign(*F, 0, retAlignment))
  330. retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
  331. O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
  332. << "]";
  333. } else
  334. llvm_unreachable("Unknown return type");
  335. } else {
  336. SmallVector<EVT, 16> vtparts;
  337. ComputeValueVTs(*TLI, DL, Ty, vtparts);
  338. unsigned idx = 0;
  339. for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
  340. unsigned elems = 1;
  341. EVT elemtype = vtparts[i];
  342. if (vtparts[i].isVector()) {
  343. elems = vtparts[i].getVectorNumElements();
  344. elemtype = vtparts[i].getVectorElementType();
  345. }
  346. for (unsigned j = 0, je = elems; j != je; ++j) {
  347. unsigned sz = elemtype.getSizeInBits();
  348. if (elemtype.isInteger())
  349. sz = promoteScalarArgumentSize(sz);
  350. O << ".reg .b" << sz << " func_retval" << idx;
  351. if (j < je - 1)
  352. O << ", ";
  353. ++idx;
  354. }
  355. if (i < e - 1)
  356. O << ", ";
  357. }
  358. }
  359. O << ") ";
  360. }
  361. void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
  362. raw_ostream &O) {
  363. const Function &F = MF.getFunction();
  364. printReturnValStr(&F, O);
  365. }
  366. // Return true if MBB is the header of a loop marked with
  367. // llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
  368. bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
  369. const MachineBasicBlock &MBB) const {
  370. MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
  371. // We insert .pragma "nounroll" only to the loop header.
  372. if (!LI.isLoopHeader(&MBB))
  373. return false;
  374. // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
  375. // we iterate through each back edge of the loop with header MBB, and check
  376. // whether its metadata contains llvm.loop.unroll.disable.
  377. for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
  378. if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
  379. // Edges from other loops to MBB are not back edges.
  380. continue;
  381. }
  382. if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
  383. if (MDNode *LoopID =
  384. PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
  385. if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
  386. return true;
  387. if (MDNode *UnrollCountMD =
  388. GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
  389. if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
  390. ->getZExtValue() == 1)
  391. return true;
  392. }
  393. }
  394. }
  395. }
  396. return false;
  397. }
  398. void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
  399. AsmPrinter::emitBasicBlockStart(MBB);
  400. if (isLoopHeaderOfNoUnroll(MBB))
  401. OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
  402. }
  403. void NVPTXAsmPrinter::emitFunctionEntryLabel() {
  404. SmallString<128> Str;
  405. raw_svector_ostream O(Str);
  406. if (!GlobalsEmitted) {
  407. emitGlobals(*MF->getFunction().getParent());
  408. GlobalsEmitted = true;
  409. }
  410. // Set up
  411. MRI = &MF->getRegInfo();
  412. F = &MF->getFunction();
  413. emitLinkageDirective(F, O);
  414. if (isKernelFunction(*F))
  415. O << ".entry ";
  416. else {
  417. O << ".func ";
  418. printReturnValStr(*MF, O);
  419. }
  420. CurrentFnSym->print(O, MAI);
  421. emitFunctionParamList(*MF, O);
  422. if (isKernelFunction(*F))
  423. emitKernelFunctionDirectives(*F, O);
  424. if (shouldEmitPTXNoReturn(F, TM))
  425. O << ".noreturn";
  426. OutStreamer->emitRawText(O.str());
  427. VRegMapping.clear();
  428. // Emit open brace for function body.
  429. OutStreamer->emitRawText(StringRef("{\n"));
  430. setAndEmitFunctionVirtualRegisters(*MF);
  431. // Emit initial .loc debug directive for correct relocation symbol data.
  432. if (MMI && MMI->hasDebugInfo())
  433. emitInitialRawDwarfLocDirective(*MF);
  434. }
  435. bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
  436. bool Result = AsmPrinter::runOnMachineFunction(F);
  437. // Emit closing brace for the body of function F.
  438. // The closing brace must be emitted here because we need to emit additional
  439. // debug labels/data after the last basic block.
  440. // We need to emit the closing brace here because we don't have function that
  441. // finished emission of the function body.
  442. OutStreamer->emitRawText(StringRef("}\n"));
  443. return Result;
  444. }
  445. void NVPTXAsmPrinter::emitFunctionBodyStart() {
  446. SmallString<128> Str;
  447. raw_svector_ostream O(Str);
  448. emitDemotedVars(&MF->getFunction(), O);
  449. OutStreamer->emitRawText(O.str());
  450. }
  451. void NVPTXAsmPrinter::emitFunctionBodyEnd() {
  452. VRegMapping.clear();
  453. }
  454. const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
  455. SmallString<128> Str;
  456. raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber();
  457. return OutContext.getOrCreateSymbol(Str);
  458. }
  459. void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
  460. Register RegNo = MI->getOperand(0).getReg();
  461. if (RegNo.isVirtual()) {
  462. OutStreamer->AddComment(Twine("implicit-def: ") +
  463. getVirtualRegisterName(RegNo));
  464. } else {
  465. const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
  466. OutStreamer->AddComment(Twine("implicit-def: ") +
  467. STI.getRegisterInfo()->getName(RegNo));
  468. }
  469. OutStreamer->addBlankLine();
  470. }
  471. void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
  472. raw_ostream &O) const {
  473. // If the NVVM IR has some of reqntid* specified, then output
  474. // the reqntid directive, and set the unspecified ones to 1.
  475. // If none of reqntid* is specified, don't output reqntid directive.
  476. unsigned reqntidx, reqntidy, reqntidz;
  477. bool specified = false;
  478. if (!getReqNTIDx(F, reqntidx))
  479. reqntidx = 1;
  480. else
  481. specified = true;
  482. if (!getReqNTIDy(F, reqntidy))
  483. reqntidy = 1;
  484. else
  485. specified = true;
  486. if (!getReqNTIDz(F, reqntidz))
  487. reqntidz = 1;
  488. else
  489. specified = true;
  490. if (specified)
  491. O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
  492. << "\n";
  493. // If the NVVM IR has some of maxntid* specified, then output
  494. // the maxntid directive, and set the unspecified ones to 1.
  495. // If none of maxntid* is specified, don't output maxntid directive.
  496. unsigned maxntidx, maxntidy, maxntidz;
  497. specified = false;
  498. if (!getMaxNTIDx(F, maxntidx))
  499. maxntidx = 1;
  500. else
  501. specified = true;
  502. if (!getMaxNTIDy(F, maxntidy))
  503. maxntidy = 1;
  504. else
  505. specified = true;
  506. if (!getMaxNTIDz(F, maxntidz))
  507. maxntidz = 1;
  508. else
  509. specified = true;
  510. if (specified)
  511. O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
  512. << "\n";
  513. unsigned mincta;
  514. if (getMinCTASm(F, mincta))
  515. O << ".minnctapersm " << mincta << "\n";
  516. unsigned maxnreg;
  517. if (getMaxNReg(F, maxnreg))
  518. O << ".maxnreg " << maxnreg << "\n";
  519. }
  520. std::string
  521. NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
  522. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  523. std::string Name;
  524. raw_string_ostream NameStr(Name);
  525. VRegRCMap::const_iterator I = VRegMapping.find(RC);
  526. assert(I != VRegMapping.end() && "Bad register class");
  527. const DenseMap<unsigned, unsigned> &RegMap = I->second;
  528. VRegMap::const_iterator VI = RegMap.find(Reg);
  529. assert(VI != RegMap.end() && "Bad virtual register");
  530. unsigned MappedVR = VI->second;
  531. NameStr << getNVPTXRegClassStr(RC) << MappedVR;
  532. NameStr.flush();
  533. return Name;
  534. }
  535. void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
  536. raw_ostream &O) {
  537. O << getVirtualRegisterName(vr);
  538. }
  539. void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
  540. emitLinkageDirective(F, O);
  541. if (isKernelFunction(*F))
  542. O << ".entry ";
  543. else
  544. O << ".func ";
  545. printReturnValStr(F, O);
  546. getSymbol(F)->print(O, MAI);
  547. O << "\n";
  548. emitFunctionParamList(F, O);
  549. if (shouldEmitPTXNoReturn(F, TM))
  550. O << ".noreturn";
  551. O << ";\n";
  552. }
  553. static bool usedInGlobalVarDef(const Constant *C) {
  554. if (!C)
  555. return false;
  556. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
  557. return GV->getName() != "llvm.used";
  558. }
  559. for (const User *U : C->users())
  560. if (const Constant *C = dyn_cast<Constant>(U))
  561. if (usedInGlobalVarDef(C))
  562. return true;
  563. return false;
  564. }
  565. static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
  566. if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
  567. if (othergv->getName() == "llvm.used")
  568. return true;
  569. }
  570. if (const Instruction *instr = dyn_cast<Instruction>(U)) {
  571. if (instr->getParent() && instr->getParent()->getParent()) {
  572. const Function *curFunc = instr->getParent()->getParent();
  573. if (oneFunc && (curFunc != oneFunc))
  574. return false;
  575. oneFunc = curFunc;
  576. return true;
  577. } else
  578. return false;
  579. }
  580. for (const User *UU : U->users())
  581. if (!usedInOneFunc(UU, oneFunc))
  582. return false;
  583. return true;
  584. }
  585. /* Find out if a global variable can be demoted to local scope.
  586. * Currently, this is valid for CUDA shared variables, which have local
  587. * scope and global lifetime. So the conditions to check are :
  588. * 1. Is the global variable in shared address space?
  589. * 2. Does it have internal linkage?
  590. * 3. Is the global variable referenced only in one function?
  591. */
  592. static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
  593. if (!gv->hasInternalLinkage())
  594. return false;
  595. PointerType *Pty = gv->getType();
  596. if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
  597. return false;
  598. const Function *oneFunc = nullptr;
  599. bool flag = usedInOneFunc(gv, oneFunc);
  600. if (!flag)
  601. return false;
  602. if (!oneFunc)
  603. return false;
  604. f = oneFunc;
  605. return true;
  606. }
  607. static bool useFuncSeen(const Constant *C,
  608. DenseMap<const Function *, bool> &seenMap) {
  609. for (const User *U : C->users()) {
  610. if (const Constant *cu = dyn_cast<Constant>(U)) {
  611. if (useFuncSeen(cu, seenMap))
  612. return true;
  613. } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
  614. const BasicBlock *bb = I->getParent();
  615. if (!bb)
  616. continue;
  617. const Function *caller = bb->getParent();
  618. if (!caller)
  619. continue;
  620. if (seenMap.find(caller) != seenMap.end())
  621. return true;
  622. }
  623. }
  624. return false;
  625. }
  626. void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
  627. DenseMap<const Function *, bool> seenMap;
  628. for (const Function &F : M) {
  629. if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
  630. emitDeclaration(&F, O);
  631. continue;
  632. }
  633. if (F.isDeclaration()) {
  634. if (F.use_empty())
  635. continue;
  636. if (F.getIntrinsicID())
  637. continue;
  638. emitDeclaration(&F, O);
  639. continue;
  640. }
  641. for (const User *U : F.users()) {
  642. if (const Constant *C = dyn_cast<Constant>(U)) {
  643. if (usedInGlobalVarDef(C)) {
  644. // The use is in the initialization of a global variable
  645. // that is a function pointer, so print a declaration
  646. // for the original function
  647. emitDeclaration(&F, O);
  648. break;
  649. }
  650. // Emit a declaration of this function if the function that
  651. // uses this constant expr has already been seen.
  652. if (useFuncSeen(C, seenMap)) {
  653. emitDeclaration(&F, O);
  654. break;
  655. }
  656. }
  657. if (!isa<Instruction>(U))
  658. continue;
  659. const Instruction *instr = cast<Instruction>(U);
  660. const BasicBlock *bb = instr->getParent();
  661. if (!bb)
  662. continue;
  663. const Function *caller = bb->getParent();
  664. if (!caller)
  665. continue;
  666. // If a caller has already been seen, then the caller is
  667. // appearing in the module before the callee. so print out
  668. // a declaration for the callee.
  669. if (seenMap.find(caller) != seenMap.end()) {
  670. emitDeclaration(&F, O);
  671. break;
  672. }
  673. }
  674. seenMap[&F] = true;
  675. }
  676. }
  677. static bool isEmptyXXStructor(GlobalVariable *GV) {
  678. if (!GV) return true;
  679. const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
  680. if (!InitList) return true; // Not an array; we don't know how to parse.
  681. return InitList->getNumOperands() == 0;
  682. }
  683. void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
  684. // Construct a default subtarget off of the TargetMachine defaults. The
  685. // rest of NVPTX isn't friendly to change subtargets per function and
  686. // so the default TargetMachine will have all of the options.
  687. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
  688. const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
  689. SmallString<128> Str1;
  690. raw_svector_ostream OS1(Str1);
  691. // Emit header before any dwarf directives are emitted below.
  692. emitHeader(M, OS1, *STI);
  693. OutStreamer->emitRawText(OS1.str());
  694. }
  695. bool NVPTXAsmPrinter::doInitialization(Module &M) {
  696. if (M.alias_size()) {
  697. report_fatal_error("Module has aliases, which NVPTX does not support.");
  698. return true; // error
  699. }
  700. if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
  701. report_fatal_error(
  702. "Module has a nontrivial global ctor, which NVPTX does not support.");
  703. return true; // error
  704. }
  705. if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
  706. report_fatal_error(
  707. "Module has a nontrivial global dtor, which NVPTX does not support.");
  708. return true; // error
  709. }
  710. // We need to call the parent's one explicitly.
  711. bool Result = AsmPrinter::doInitialization(M);
  712. GlobalsEmitted = false;
  713. return Result;
  714. }
  715. void NVPTXAsmPrinter::emitGlobals(const Module &M) {
  716. SmallString<128> Str2;
  717. raw_svector_ostream OS2(Str2);
  718. emitDeclarations(M, OS2);
  719. // As ptxas does not support forward references of globals, we need to first
  720. // sort the list of module-level globals in def-use order. We visit each
  721. // global variable in order, and ensure that we emit it *after* its dependent
  722. // globals. We use a little extra memory maintaining both a set and a list to
  723. // have fast searches while maintaining a strict ordering.
  724. SmallVector<const GlobalVariable *, 8> Globals;
  725. DenseSet<const GlobalVariable *> GVVisited;
  726. DenseSet<const GlobalVariable *> GVVisiting;
  727. // Visit each global variable, in order
  728. for (const GlobalVariable &I : M.globals())
  729. VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
  730. assert(GVVisited.size() == M.getGlobalList().size() &&
  731. "Missed a global variable");
  732. assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
  733. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
  734. const NVPTXSubtarget &STI =
  735. *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
  736. // Print out module-level global variables in proper order
  737. for (unsigned i = 0, e = Globals.size(); i != e; ++i)
  738. printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
  739. OS2 << '\n';
  740. OutStreamer->emitRawText(OS2.str());
  741. }
  742. void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
  743. const NVPTXSubtarget &STI) {
  744. O << "//\n";
  745. O << "// Generated by LLVM NVPTX Back-End\n";
  746. O << "//\n";
  747. O << "\n";
  748. unsigned PTXVersion = STI.getPTXVersion();
  749. O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
  750. O << ".target ";
  751. O << STI.getTargetName();
  752. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
  753. if (NTM.getDrvInterface() == NVPTX::NVCL)
  754. O << ", texmode_independent";
  755. bool HasFullDebugInfo = false;
  756. for (DICompileUnit *CU : M.debug_compile_units()) {
  757. switch(CU->getEmissionKind()) {
  758. case DICompileUnit::NoDebug:
  759. case DICompileUnit::DebugDirectivesOnly:
  760. break;
  761. case DICompileUnit::LineTablesOnly:
  762. case DICompileUnit::FullDebug:
  763. HasFullDebugInfo = true;
  764. break;
  765. }
  766. if (HasFullDebugInfo)
  767. break;
  768. }
  769. if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
  770. O << ", debug";
  771. O << "\n";
  772. O << ".address_size ";
  773. if (NTM.is64Bit())
  774. O << "64";
  775. else
  776. O << "32";
  777. O << "\n";
  778. O << "\n";
  779. }
  780. bool NVPTXAsmPrinter::doFinalization(Module &M) {
  781. bool HasDebugInfo = MMI && MMI->hasDebugInfo();
  782. // If we did not emit any functions, then the global declarations have not
  783. // yet been emitted.
  784. if (!GlobalsEmitted) {
  785. emitGlobals(M);
  786. GlobalsEmitted = true;
  787. }
  788. // call doFinalization
  789. bool ret = AsmPrinter::doFinalization(M);
  790. clearAnnotationCache(&M);
  791. auto *TS =
  792. static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
  793. // Close the last emitted section
  794. if (HasDebugInfo) {
  795. TS->closeLastSection();
  796. // Emit empty .debug_loc section for better support of the empty files.
  797. OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
  798. }
  799. // Output last DWARF .file directives, if any.
  800. TS->outputDwarfFileDirectives();
  801. return ret;
  802. }
  803. // This function emits appropriate linkage directives for
  804. // functions and global variables.
  805. //
  806. // extern function declaration -> .extern
  807. // extern function definition -> .visible
  808. // external global variable with init -> .visible
  809. // external without init -> .extern
  810. // appending -> not allowed, assert.
  811. // for any linkage other than
  812. // internal, private, linker_private,
  813. // linker_private_weak, linker_private_weak_def_auto,
  814. // we emit -> .weak.
  815. void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
  816. raw_ostream &O) {
  817. if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
  818. if (V->hasExternalLinkage()) {
  819. if (isa<GlobalVariable>(V)) {
  820. const GlobalVariable *GVar = cast<GlobalVariable>(V);
  821. if (GVar) {
  822. if (GVar->hasInitializer())
  823. O << ".visible ";
  824. else
  825. O << ".extern ";
  826. }
  827. } else if (V->isDeclaration())
  828. O << ".extern ";
  829. else
  830. O << ".visible ";
  831. } else if (V->hasAppendingLinkage()) {
  832. std::string msg;
  833. msg.append("Error: ");
  834. msg.append("Symbol ");
  835. if (V->hasName())
  836. msg.append(std::string(V->getName()));
  837. msg.append("has unsupported appending linkage type");
  838. llvm_unreachable(msg.c_str());
  839. } else if (!V->hasInternalLinkage() &&
  840. !V->hasPrivateLinkage()) {
  841. O << ".weak ";
  842. }
  843. }
  844. }
  845. void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
  846. raw_ostream &O, bool processDemoted,
  847. const NVPTXSubtarget &STI) {
  848. // Skip meta data
  849. if (GVar->hasSection()) {
  850. if (GVar->getSection() == "llvm.metadata")
  851. return;
  852. }
  853. // Skip LLVM intrinsic global variables
  854. if (GVar->getName().startswith("llvm.") ||
  855. GVar->getName().startswith("nvvm."))
  856. return;
  857. const DataLayout &DL = getDataLayout();
  858. // GlobalVariables are always constant pointers themselves.
  859. PointerType *PTy = GVar->getType();
  860. Type *ETy = GVar->getValueType();
  861. if (GVar->hasExternalLinkage()) {
  862. if (GVar->hasInitializer())
  863. O << ".visible ";
  864. else
  865. O << ".extern ";
  866. } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
  867. GVar->hasAvailableExternallyLinkage() ||
  868. GVar->hasCommonLinkage()) {
  869. O << ".weak ";
  870. }
  871. if (isTexture(*GVar)) {
  872. O << ".global .texref " << getTextureName(*GVar) << ";\n";
  873. return;
  874. }
  875. if (isSurface(*GVar)) {
  876. O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
  877. return;
  878. }
  879. if (GVar->isDeclaration()) {
  880. // (extern) declarations, no definition or initializer
  881. // Currently the only known declaration is for an automatic __local
  882. // (.shared) promoted to global.
  883. emitPTXGlobalVariable(GVar, O, STI);
  884. O << ";\n";
  885. return;
  886. }
  887. if (isSampler(*GVar)) {
  888. O << ".global .samplerref " << getSamplerName(*GVar);
  889. const Constant *Initializer = nullptr;
  890. if (GVar->hasInitializer())
  891. Initializer = GVar->getInitializer();
  892. const ConstantInt *CI = nullptr;
  893. if (Initializer)
  894. CI = dyn_cast<ConstantInt>(Initializer);
  895. if (CI) {
  896. unsigned sample = CI->getZExtValue();
  897. O << " = { ";
  898. for (int i = 0,
  899. addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
  900. i < 3; i++) {
  901. O << "addr_mode_" << i << " = ";
  902. switch (addr) {
  903. case 0:
  904. O << "wrap";
  905. break;
  906. case 1:
  907. O << "clamp_to_border";
  908. break;
  909. case 2:
  910. O << "clamp_to_edge";
  911. break;
  912. case 3:
  913. O << "wrap";
  914. break;
  915. case 4:
  916. O << "mirror";
  917. break;
  918. }
  919. O << ", ";
  920. }
  921. O << "filter_mode = ";
  922. switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
  923. case 0:
  924. O << "nearest";
  925. break;
  926. case 1:
  927. O << "linear";
  928. break;
  929. case 2:
  930. llvm_unreachable("Anisotropic filtering is not supported");
  931. default:
  932. O << "nearest";
  933. break;
  934. }
  935. if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
  936. O << ", force_unnormalized_coords = 1";
  937. }
  938. O << " }";
  939. }
  940. O << ";\n";
  941. return;
  942. }
  943. if (GVar->hasPrivateLinkage()) {
  944. if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
  945. return;
  946. // FIXME - need better way (e.g. Metadata) to avoid generating this global
  947. if (strncmp(GVar->getName().data(), "filename", 8) == 0)
  948. return;
  949. if (GVar->use_empty())
  950. return;
  951. }
  952. const Function *demotedFunc = nullptr;
  953. if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
  954. O << "// " << GVar->getName() << " has been demoted\n";
  955. if (localDecls.find(demotedFunc) != localDecls.end())
  956. localDecls[demotedFunc].push_back(GVar);
  957. else {
  958. std::vector<const GlobalVariable *> temp;
  959. temp.push_back(GVar);
  960. localDecls[demotedFunc] = temp;
  961. }
  962. return;
  963. }
  964. O << ".";
  965. emitPTXAddressSpace(PTy->getAddressSpace(), O);
  966. if (isManaged(*GVar)) {
  967. if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
  968. report_fatal_error(
  969. ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
  970. }
  971. O << " .attribute(.managed)";
  972. }
  973. if (MaybeAlign A = GVar->getAlign())
  974. O << " .align " << A->value();
  975. else
  976. O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
  977. if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
  978. (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
  979. O << " .";
  980. // Special case: ABI requires that we use .u8 for predicates
  981. if (ETy->isIntegerTy(1))
  982. O << "u8";
  983. else
  984. O << getPTXFundamentalTypeStr(ETy, false);
  985. O << " ";
  986. getSymbol(GVar)->print(O, MAI);
  987. // Ptx allows variable initilization only for constant and global state
  988. // spaces.
  989. if (GVar->hasInitializer()) {
  990. if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
  991. (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
  992. const Constant *Initializer = GVar->getInitializer();
  993. // 'undef' is treated as there is no value specified.
  994. if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
  995. O << " = ";
  996. printScalarConstant(Initializer, O);
  997. }
  998. } else {
  999. // The frontend adds zero-initializer to device and constant variables
  1000. // that don't have an initial value, and UndefValue to shared
  1001. // variables, so skip warning for this case.
  1002. if (!GVar->getInitializer()->isNullValue() &&
  1003. !isa<UndefValue>(GVar->getInitializer())) {
  1004. report_fatal_error("initial value of '" + GVar->getName() +
  1005. "' is not allowed in addrspace(" +
  1006. Twine(PTy->getAddressSpace()) + ")");
  1007. }
  1008. }
  1009. }
  1010. } else {
  1011. unsigned int ElementSize = 0;
  1012. // Although PTX has direct support for struct type and array type and
  1013. // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
  1014. // targets that support these high level field accesses. Structs, arrays
  1015. // and vectors are lowered into arrays of bytes.
  1016. switch (ETy->getTypeID()) {
  1017. case Type::IntegerTyID: // Integers larger than 64 bits
  1018. case Type::StructTyID:
  1019. case Type::ArrayTyID:
  1020. case Type::FixedVectorTyID:
  1021. ElementSize = DL.getTypeStoreSize(ETy);
  1022. // Ptx allows variable initilization only for constant and
  1023. // global state spaces.
  1024. if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
  1025. (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
  1026. GVar->hasInitializer()) {
  1027. const Constant *Initializer = GVar->getInitializer();
  1028. if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
  1029. AggBuffer aggBuffer(ElementSize, *this);
  1030. bufferAggregateConstant(Initializer, &aggBuffer);
  1031. if (aggBuffer.numSymbols()) {
  1032. unsigned int ptrSize = MAI->getCodePointerSize();
  1033. if (ElementSize % ptrSize ||
  1034. !aggBuffer.allSymbolsAligned(ptrSize)) {
  1035. // Print in bytes and use the mask() operator for pointers.
  1036. if (!STI.hasMaskOperator())
  1037. report_fatal_error(
  1038. "initialized packed aggregate with pointers '" +
  1039. GVar->getName() +
  1040. "' requires at least PTX ISA version 7.1");
  1041. O << " .u8 ";
  1042. getSymbol(GVar)->print(O, MAI);
  1043. O << "[" << ElementSize << "] = {";
  1044. aggBuffer.printBytes(O);
  1045. O << "}";
  1046. } else {
  1047. O << " .u" << ptrSize * 8 << " ";
  1048. getSymbol(GVar)->print(O, MAI);
  1049. O << "[" << ElementSize / ptrSize << "] = {";
  1050. aggBuffer.printWords(O);
  1051. O << "}";
  1052. }
  1053. } else {
  1054. O << " .b8 ";
  1055. getSymbol(GVar)->print(O, MAI);
  1056. O << "[" << ElementSize << "] = {";
  1057. aggBuffer.printBytes(O);
  1058. O << "}";
  1059. }
  1060. } else {
  1061. O << " .b8 ";
  1062. getSymbol(GVar)->print(O, MAI);
  1063. if (ElementSize) {
  1064. O << "[";
  1065. O << ElementSize;
  1066. O << "]";
  1067. }
  1068. }
  1069. } else {
  1070. O << " .b8 ";
  1071. getSymbol(GVar)->print(O, MAI);
  1072. if (ElementSize) {
  1073. O << "[";
  1074. O << ElementSize;
  1075. O << "]";
  1076. }
  1077. }
  1078. break;
  1079. default:
  1080. llvm_unreachable("type not supported yet");
  1081. }
  1082. }
  1083. O << ";\n";
  1084. }
  1085. void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
  1086. const Value *v = Symbols[nSym];
  1087. const Value *v0 = SymbolsBeforeStripping[nSym];
  1088. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
  1089. MCSymbol *Name = AP.getSymbol(GVar);
  1090. PointerType *PTy = dyn_cast<PointerType>(v0->getType());
  1091. // Is v0 a generic pointer?
  1092. bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
  1093. if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
  1094. os << "generic(";
  1095. Name->print(os, AP.MAI);
  1096. os << ")";
  1097. } else {
  1098. Name->print(os, AP.MAI);
  1099. }
  1100. } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
  1101. const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
  1102. AP.printMCExpr(*Expr, os);
  1103. } else
  1104. llvm_unreachable("symbol type unknown");
  1105. }
  1106. void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
  1107. unsigned int ptrSize = AP.MAI->getCodePointerSize();
  1108. symbolPosInBuffer.push_back(size);
  1109. unsigned int nSym = 0;
  1110. unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
  1111. for (unsigned int pos = 0; pos < size;) {
  1112. if (pos)
  1113. os << ", ";
  1114. if (pos != nextSymbolPos) {
  1115. os << (unsigned int)buffer[pos];
  1116. ++pos;
  1117. continue;
  1118. }
  1119. // Generate a per-byte mask() operator for the symbol, which looks like:
  1120. // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
  1121. // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
  1122. std::string symText;
  1123. llvm::raw_string_ostream oss(symText);
  1124. printSymbol(nSym, oss);
  1125. for (unsigned i = 0; i < ptrSize; ++i) {
  1126. if (i)
  1127. os << ", ";
  1128. llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
  1129. os << "(" << symText << ")";
  1130. }
  1131. pos += ptrSize;
  1132. nextSymbolPos = symbolPosInBuffer[++nSym];
  1133. assert(nextSymbolPos >= pos);
  1134. }
  1135. }
  1136. void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
  1137. unsigned int ptrSize = AP.MAI->getCodePointerSize();
  1138. symbolPosInBuffer.push_back(size);
  1139. unsigned int nSym = 0;
  1140. unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
  1141. assert(nextSymbolPos % ptrSize == 0);
  1142. for (unsigned int pos = 0; pos < size; pos += ptrSize) {
  1143. if (pos)
  1144. os << ", ";
  1145. if (pos == nextSymbolPos) {
  1146. printSymbol(nSym, os);
  1147. nextSymbolPos = symbolPosInBuffer[++nSym];
  1148. assert(nextSymbolPos % ptrSize == 0);
  1149. assert(nextSymbolPos >= pos + ptrSize);
  1150. } else if (ptrSize == 4)
  1151. os << support::endian::read32le(&buffer[pos]);
  1152. else
  1153. os << support::endian::read64le(&buffer[pos]);
  1154. }
  1155. }
  1156. void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
  1157. if (localDecls.find(f) == localDecls.end())
  1158. return;
  1159. std::vector<const GlobalVariable *> &gvars = localDecls[f];
  1160. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
  1161. const NVPTXSubtarget &STI =
  1162. *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
  1163. for (const GlobalVariable *GV : gvars) {
  1164. O << "\t// demoted variable\n\t";
  1165. printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
  1166. }
  1167. }
  1168. void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
  1169. raw_ostream &O) const {
  1170. switch (AddressSpace) {
  1171. case ADDRESS_SPACE_LOCAL:
  1172. O << "local";
  1173. break;
  1174. case ADDRESS_SPACE_GLOBAL:
  1175. O << "global";
  1176. break;
  1177. case ADDRESS_SPACE_CONST:
  1178. O << "const";
  1179. break;
  1180. case ADDRESS_SPACE_SHARED:
  1181. O << "shared";
  1182. break;
  1183. default:
  1184. report_fatal_error("Bad address space found while emitting PTX: " +
  1185. llvm::Twine(AddressSpace));
  1186. break;
  1187. }
  1188. }
  1189. std::string
  1190. NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
  1191. switch (Ty->getTypeID()) {
  1192. case Type::IntegerTyID: {
  1193. unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
  1194. if (NumBits == 1)
  1195. return "pred";
  1196. else if (NumBits <= 64) {
  1197. std::string name = "u";
  1198. return name + utostr(NumBits);
  1199. } else {
  1200. llvm_unreachable("Integer too large");
  1201. break;
  1202. }
  1203. break;
  1204. }
  1205. case Type::HalfTyID:
  1206. // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
  1207. return "b16";
  1208. case Type::FloatTyID:
  1209. return "f32";
  1210. case Type::DoubleTyID:
  1211. return "f64";
  1212. case Type::PointerTyID: {
  1213. unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
  1214. assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
  1215. if (PtrSize == 64)
  1216. if (useB4PTR)
  1217. return "b64";
  1218. else
  1219. return "u64";
  1220. else if (useB4PTR)
  1221. return "b32";
  1222. else
  1223. return "u32";
  1224. }
  1225. default:
  1226. break;
  1227. }
  1228. llvm_unreachable("unexpected type");
  1229. }
  1230. void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
  1231. raw_ostream &O,
  1232. const NVPTXSubtarget &STI) {
  1233. const DataLayout &DL = getDataLayout();
  1234. // GlobalVariables are always constant pointers themselves.
  1235. Type *ETy = GVar->getValueType();
  1236. O << ".";
  1237. emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
  1238. if (isManaged(*GVar)) {
  1239. if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
  1240. report_fatal_error(
  1241. ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
  1242. }
  1243. O << " .attribute(.managed)";
  1244. }
  1245. if (MaybeAlign A = GVar->getAlign())
  1246. O << " .align " << A->value();
  1247. else
  1248. O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
  1249. // Special case for i128
  1250. if (ETy->isIntegerTy(128)) {
  1251. O << " .b8 ";
  1252. getSymbol(GVar)->print(O, MAI);
  1253. O << "[16]";
  1254. return;
  1255. }
  1256. if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
  1257. O << " .";
  1258. O << getPTXFundamentalTypeStr(ETy);
  1259. O << " ";
  1260. getSymbol(GVar)->print(O, MAI);
  1261. return;
  1262. }
  1263. int64_t ElementSize = 0;
  1264. // Although PTX has direct support for struct type and array type and LLVM IR
  1265. // is very similar to PTX, the LLVM CodeGen does not support for targets that
  1266. // support these high level field accesses. Structs and arrays are lowered
  1267. // into arrays of bytes.
  1268. switch (ETy->getTypeID()) {
  1269. case Type::StructTyID:
  1270. case Type::ArrayTyID:
  1271. case Type::FixedVectorTyID:
  1272. ElementSize = DL.getTypeStoreSize(ETy);
  1273. O << " .b8 ";
  1274. getSymbol(GVar)->print(O, MAI);
  1275. O << "[";
  1276. if (ElementSize) {
  1277. O << ElementSize;
  1278. }
  1279. O << "]";
  1280. break;
  1281. default:
  1282. llvm_unreachable("type not supported yet");
  1283. }
  1284. }
  1285. void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
  1286. int paramIndex, raw_ostream &O) {
  1287. getSymbol(I->getParent())->print(O, MAI);
  1288. O << "_param_" << paramIndex;
  1289. }
  1290. void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
  1291. const DataLayout &DL = getDataLayout();
  1292. const AttributeList &PAL = F->getAttributes();
  1293. const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
  1294. const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
  1295. Function::const_arg_iterator I, E;
  1296. unsigned paramIndex = 0;
  1297. bool first = true;
  1298. bool isKernelFunc = isKernelFunction(*F);
  1299. bool isABI = (STI.getSmVersion() >= 20);
  1300. bool hasImageHandles = STI.hasImageHandles();
  1301. if (F->arg_empty() && !F->isVarArg()) {
  1302. O << "()\n";
  1303. return;
  1304. }
  1305. O << "(\n";
  1306. for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
  1307. Type *Ty = I->getType();
  1308. if (!first)
  1309. O << ",\n";
  1310. first = false;
  1311. // Handle image/sampler parameters
  1312. if (isKernelFunction(*F)) {
  1313. if (isSampler(*I) || isImage(*I)) {
  1314. if (isImage(*I)) {
  1315. std::string sname = std::string(I->getName());
  1316. if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
  1317. if (hasImageHandles)
  1318. O << "\t.param .u64 .ptr .surfref ";
  1319. else
  1320. O << "\t.param .surfref ";
  1321. CurrentFnSym->print(O, MAI);
  1322. O << "_param_" << paramIndex;
  1323. }
  1324. else { // Default image is read_only
  1325. if (hasImageHandles)
  1326. O << "\t.param .u64 .ptr .texref ";
  1327. else
  1328. O << "\t.param .texref ";
  1329. CurrentFnSym->print(O, MAI);
  1330. O << "_param_" << paramIndex;
  1331. }
  1332. } else {
  1333. if (hasImageHandles)
  1334. O << "\t.param .u64 .ptr .samplerref ";
  1335. else
  1336. O << "\t.param .samplerref ";
  1337. CurrentFnSym->print(O, MAI);
  1338. O << "_param_" << paramIndex;
  1339. }
  1340. continue;
  1341. }
  1342. }
  1343. auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
  1344. paramIndex](Type *Ty) -> Align {
  1345. Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
  1346. MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
  1347. return std::max(TypeAlign, ParamAlign.valueOrOne());
  1348. };
  1349. if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
  1350. if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
  1351. // Just print .param .align <a> .b8 .param[size];
  1352. // <a> = optimal alignment for the element type; always multiple of
  1353. // PAL.getParamAlignment
  1354. // size = typeallocsize of element type
  1355. Align OptimalAlign = getOptimalAlignForParam(Ty);
  1356. O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
  1357. printParamName(I, paramIndex, O);
  1358. O << "[" << DL.getTypeAllocSize(Ty) << "]";
  1359. continue;
  1360. }
  1361. // Just a scalar
  1362. auto *PTy = dyn_cast<PointerType>(Ty);
  1363. unsigned PTySizeInBits = 0;
  1364. if (PTy) {
  1365. PTySizeInBits =
  1366. TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
  1367. assert(PTySizeInBits && "Invalid pointer size");
  1368. }
  1369. if (isKernelFunc) {
  1370. if (PTy) {
  1371. // Special handling for pointer arguments to kernel
  1372. O << "\t.param .u" << PTySizeInBits << " ";
  1373. if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
  1374. NVPTX::CUDA) {
  1375. int addrSpace = PTy->getAddressSpace();
  1376. switch (addrSpace) {
  1377. default:
  1378. O << ".ptr ";
  1379. break;
  1380. case ADDRESS_SPACE_CONST:
  1381. O << ".ptr .const ";
  1382. break;
  1383. case ADDRESS_SPACE_SHARED:
  1384. O << ".ptr .shared ";
  1385. break;
  1386. case ADDRESS_SPACE_GLOBAL:
  1387. O << ".ptr .global ";
  1388. break;
  1389. }
  1390. Align ParamAlign = I->getParamAlign().valueOrOne();
  1391. O << ".align " << ParamAlign.value() << " ";
  1392. }
  1393. printParamName(I, paramIndex, O);
  1394. continue;
  1395. }
  1396. // non-pointer scalar to kernel func
  1397. O << "\t.param .";
  1398. // Special case: predicate operands become .u8 types
  1399. if (Ty->isIntegerTy(1))
  1400. O << "u8";
  1401. else
  1402. O << getPTXFundamentalTypeStr(Ty);
  1403. O << " ";
  1404. printParamName(I, paramIndex, O);
  1405. continue;
  1406. }
  1407. // Non-kernel function, just print .param .b<size> for ABI
  1408. // and .reg .b<size> for non-ABI
  1409. unsigned sz = 0;
  1410. if (isa<IntegerType>(Ty)) {
  1411. sz = cast<IntegerType>(Ty)->getBitWidth();
  1412. sz = promoteScalarArgumentSize(sz);
  1413. } else if (PTy) {
  1414. assert(PTySizeInBits && "Invalid pointer size");
  1415. sz = PTySizeInBits;
  1416. } else if (Ty->isHalfTy())
  1417. // PTX ABI requires all scalar parameters to be at least 32
  1418. // bits in size. fp16 normally uses .b16 as its storage type
  1419. // in PTX, so its size must be adjusted here, too.
  1420. sz = 32;
  1421. else
  1422. sz = Ty->getPrimitiveSizeInBits();
  1423. if (isABI)
  1424. O << "\t.param .b" << sz << " ";
  1425. else
  1426. O << "\t.reg .b" << sz << " ";
  1427. printParamName(I, paramIndex, O);
  1428. continue;
  1429. }
  1430. // param has byVal attribute.
  1431. Type *ETy = PAL.getParamByValType(paramIndex);
  1432. assert(ETy && "Param should have byval type");
  1433. if (isABI || isKernelFunc) {
  1434. // Just print .param .align <a> .b8 .param[size];
  1435. // <a> = optimal alignment for the element type; always multiple of
  1436. // PAL.getParamAlignment
  1437. // size = typeallocsize of element type
  1438. Align OptimalAlign =
  1439. isKernelFunc
  1440. ? getOptimalAlignForParam(ETy)
  1441. : TLI->getFunctionByValParamAlign(
  1442. F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
  1443. unsigned sz = DL.getTypeAllocSize(ETy);
  1444. O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
  1445. printParamName(I, paramIndex, O);
  1446. O << "[" << sz << "]";
  1447. continue;
  1448. } else {
  1449. // Split the ETy into constituent parts and
  1450. // print .param .b<size> <name> for each part.
  1451. // Further, if a part is vector, print the above for
  1452. // each vector element.
  1453. SmallVector<EVT, 16> vtparts;
  1454. ComputeValueVTs(*TLI, DL, ETy, vtparts);
  1455. for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
  1456. unsigned elems = 1;
  1457. EVT elemtype = vtparts[i];
  1458. if (vtparts[i].isVector()) {
  1459. elems = vtparts[i].getVectorNumElements();
  1460. elemtype = vtparts[i].getVectorElementType();
  1461. }
  1462. for (unsigned j = 0, je = elems; j != je; ++j) {
  1463. unsigned sz = elemtype.getSizeInBits();
  1464. if (elemtype.isInteger())
  1465. sz = promoteScalarArgumentSize(sz);
  1466. O << "\t.reg .b" << sz << " ";
  1467. printParamName(I, paramIndex, O);
  1468. if (j < je - 1)
  1469. O << ",\n";
  1470. ++paramIndex;
  1471. }
  1472. if (i < e - 1)
  1473. O << ",\n";
  1474. }
  1475. --paramIndex;
  1476. continue;
  1477. }
  1478. }
  1479. if (F->isVarArg()) {
  1480. if (!first)
  1481. O << ",\n";
  1482. O << "\t.param .align " << STI.getMaxRequiredAlignment();
  1483. O << " .b8 ";
  1484. getSymbol(F)->print(O, MAI);
  1485. O << "_vararg[]";
  1486. }
  1487. O << "\n)\n";
  1488. }
  1489. void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
  1490. raw_ostream &O) {
  1491. const Function &F = MF.getFunction();
  1492. emitFunctionParamList(&F, O);
  1493. }
  1494. void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
  1495. const MachineFunction &MF) {
  1496. SmallString<128> Str;
  1497. raw_svector_ostream O(Str);
  1498. // Map the global virtual register number to a register class specific
  1499. // virtual register number starting from 1 with that class.
  1500. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  1501. //unsigned numRegClasses = TRI->getNumRegClasses();
  1502. // Emit the Fake Stack Object
  1503. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1504. int NumBytes = (int) MFI.getStackSize();
  1505. if (NumBytes) {
  1506. O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
  1507. << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
  1508. if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
  1509. O << "\t.reg .b64 \t%SP;\n";
  1510. O << "\t.reg .b64 \t%SPL;\n";
  1511. } else {
  1512. O << "\t.reg .b32 \t%SP;\n";
  1513. O << "\t.reg .b32 \t%SPL;\n";
  1514. }
  1515. }
  1516. // Go through all virtual registers to establish the mapping between the
  1517. // global virtual
  1518. // register number and the per class virtual register number.
  1519. // We use the per class virtual register number in the ptx output.
  1520. unsigned int numVRs = MRI->getNumVirtRegs();
  1521. for (unsigned i = 0; i < numVRs; i++) {
  1522. Register vr = Register::index2VirtReg(i);
  1523. const TargetRegisterClass *RC = MRI->getRegClass(vr);
  1524. DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
  1525. int n = regmap.size();
  1526. regmap.insert(std::make_pair(vr, n + 1));
  1527. }
  1528. // Emit register declarations
  1529. // @TODO: Extract out the real register usage
  1530. // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
  1531. // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
  1532. // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
  1533. // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
  1534. // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
  1535. // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
  1536. // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
  1537. // Emit declaration of the virtual registers or 'physical' registers for
  1538. // each register class
  1539. for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
  1540. const TargetRegisterClass *RC = TRI->getRegClass(i);
  1541. DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
  1542. std::string rcname = getNVPTXRegClassName(RC);
  1543. std::string rcStr = getNVPTXRegClassStr(RC);
  1544. int n = regmap.size();
  1545. // Only declare those registers that may be used.
  1546. if (n) {
  1547. O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
  1548. << ">;\n";
  1549. }
  1550. }
  1551. OutStreamer->emitRawText(O.str());
  1552. }
  1553. void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
  1554. APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
  1555. bool ignored;
  1556. unsigned int numHex;
  1557. const char *lead;
  1558. if (Fp->getType()->getTypeID() == Type::FloatTyID) {
  1559. numHex = 8;
  1560. lead = "0f";
  1561. APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
  1562. } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
  1563. numHex = 16;
  1564. lead = "0d";
  1565. APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
  1566. } else
  1567. llvm_unreachable("unsupported fp type");
  1568. APInt API = APF.bitcastToAPInt();
  1569. O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
  1570. }
  1571. void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
  1572. if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
  1573. O << CI->getValue();
  1574. return;
  1575. }
  1576. if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
  1577. printFPConstant(CFP, O);
  1578. return;
  1579. }
  1580. if (isa<ConstantPointerNull>(CPV)) {
  1581. O << "0";
  1582. return;
  1583. }
  1584. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
  1585. bool IsNonGenericPointer = false;
  1586. if (GVar->getType()->getAddressSpace() != 0) {
  1587. IsNonGenericPointer = true;
  1588. }
  1589. if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
  1590. O << "generic(";
  1591. getSymbol(GVar)->print(O, MAI);
  1592. O << ")";
  1593. } else {
  1594. getSymbol(GVar)->print(O, MAI);
  1595. }
  1596. return;
  1597. }
  1598. if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
  1599. const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
  1600. printMCExpr(*E, O);
  1601. return;
  1602. }
  1603. llvm_unreachable("Not scalar type found in printScalarConstant()");
  1604. }
  1605. void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
  1606. AggBuffer *AggBuffer) {
  1607. const DataLayout &DL = getDataLayout();
  1608. int AllocSize = DL.getTypeAllocSize(CPV->getType());
  1609. if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
  1610. // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
  1611. // only the space allocated by CPV.
  1612. AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
  1613. return;
  1614. }
  1615. // Helper for filling AggBuffer with APInts.
  1616. auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
  1617. size_t NumBytes = (Val.getBitWidth() + 7) / 8;
  1618. SmallVector<unsigned char, 16> Buf(NumBytes);
  1619. for (unsigned I = 0; I < NumBytes; ++I) {
  1620. Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
  1621. }
  1622. AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
  1623. };
  1624. switch (CPV->getType()->getTypeID()) {
  1625. case Type::IntegerTyID:
  1626. if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
  1627. AddIntToBuffer(CI->getValue());
  1628. break;
  1629. }
  1630. if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
  1631. if (const auto *CI =
  1632. dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
  1633. AddIntToBuffer(CI->getValue());
  1634. break;
  1635. }
  1636. if (Cexpr->getOpcode() == Instruction::PtrToInt) {
  1637. Value *V = Cexpr->getOperand(0)->stripPointerCasts();
  1638. AggBuffer->addSymbol(V, Cexpr->getOperand(0));
  1639. AggBuffer->addZeros(AllocSize);
  1640. break;
  1641. }
  1642. }
  1643. llvm_unreachable("unsupported integer const type");
  1644. break;
  1645. case Type::HalfTyID:
  1646. case Type::BFloatTyID:
  1647. case Type::FloatTyID:
  1648. case Type::DoubleTyID:
  1649. AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
  1650. break;
  1651. case Type::PointerTyID: {
  1652. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
  1653. AggBuffer->addSymbol(GVar, GVar);
  1654. } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
  1655. const Value *v = Cexpr->stripPointerCasts();
  1656. AggBuffer->addSymbol(v, Cexpr);
  1657. }
  1658. AggBuffer->addZeros(AllocSize);
  1659. break;
  1660. }
  1661. case Type::ArrayTyID:
  1662. case Type::FixedVectorTyID:
  1663. case Type::StructTyID: {
  1664. if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
  1665. bufferAggregateConstant(CPV, AggBuffer);
  1666. if (Bytes > AllocSize)
  1667. AggBuffer->addZeros(Bytes - AllocSize);
  1668. } else if (isa<ConstantAggregateZero>(CPV))
  1669. AggBuffer->addZeros(Bytes);
  1670. else
  1671. llvm_unreachable("Unexpected Constant type");
  1672. break;
  1673. }
  1674. default:
  1675. llvm_unreachable("unsupported type");
  1676. }
  1677. }
  1678. void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
  1679. AggBuffer *aggBuffer) {
  1680. const DataLayout &DL = getDataLayout();
  1681. int Bytes;
  1682. // Integers of arbitrary width
  1683. if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
  1684. APInt Val = CI->getValue();
  1685. for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
  1686. uint8_t Byte = Val.getLoBits(8).getZExtValue();
  1687. aggBuffer->addBytes(&Byte, 1, 1);
  1688. Val.lshrInPlace(8);
  1689. }
  1690. return;
  1691. }
  1692. // Old constants
  1693. if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
  1694. if (CPV->getNumOperands())
  1695. for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
  1696. bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
  1697. return;
  1698. }
  1699. if (const ConstantDataSequential *CDS =
  1700. dyn_cast<ConstantDataSequential>(CPV)) {
  1701. if (CDS->getNumElements())
  1702. for (unsigned i = 0; i < CDS->getNumElements(); ++i)
  1703. bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
  1704. aggBuffer);
  1705. return;
  1706. }
  1707. if (isa<ConstantStruct>(CPV)) {
  1708. if (CPV->getNumOperands()) {
  1709. StructType *ST = cast<StructType>(CPV->getType());
  1710. for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
  1711. if (i == (e - 1))
  1712. Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
  1713. DL.getTypeAllocSize(ST) -
  1714. DL.getStructLayout(ST)->getElementOffset(i);
  1715. else
  1716. Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
  1717. DL.getStructLayout(ST)->getElementOffset(i);
  1718. bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
  1719. }
  1720. }
  1721. return;
  1722. }
  1723. llvm_unreachable("unsupported constant type in printAggregateConstant()");
  1724. }
  1725. /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
  1726. /// a copy from AsmPrinter::lowerConstant, except customized to only handle
  1727. /// expressions that are representable in PTX and create
  1728. /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
  1729. const MCExpr *
  1730. NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
  1731. MCContext &Ctx = OutContext;
  1732. if (CV->isNullValue() || isa<UndefValue>(CV))
  1733. return MCConstantExpr::create(0, Ctx);
  1734. if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
  1735. return MCConstantExpr::create(CI->getZExtValue(), Ctx);
  1736. if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
  1737. const MCSymbolRefExpr *Expr =
  1738. MCSymbolRefExpr::create(getSymbol(GV), Ctx);
  1739. if (ProcessingGeneric) {
  1740. return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
  1741. } else {
  1742. return Expr;
  1743. }
  1744. }
  1745. const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
  1746. if (!CE) {
  1747. llvm_unreachable("Unknown constant value to lower!");
  1748. }
  1749. switch (CE->getOpcode()) {
  1750. default: {
  1751. // If the code isn't optimized, there may be outstanding folding
  1752. // opportunities. Attempt to fold the expression using DataLayout as a
  1753. // last resort before giving up.
  1754. Constant *C = ConstantFoldConstant(CE, getDataLayout());
  1755. if (C != CE)
  1756. return lowerConstantForGV(C, ProcessingGeneric);
  1757. // Otherwise report the problem to the user.
  1758. std::string S;
  1759. raw_string_ostream OS(S);
  1760. OS << "Unsupported expression in static initializer: ";
  1761. CE->printAsOperand(OS, /*PrintType=*/false,
  1762. !MF ? nullptr : MF->getFunction().getParent());
  1763. report_fatal_error(Twine(OS.str()));
  1764. }
  1765. case Instruction::AddrSpaceCast: {
  1766. // Strip the addrspacecast and pass along the operand
  1767. PointerType *DstTy = cast<PointerType>(CE->getType());
  1768. if (DstTy->getAddressSpace() == 0) {
  1769. return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
  1770. }
  1771. std::string S;
  1772. raw_string_ostream OS(S);
  1773. OS << "Unsupported expression in static initializer: ";
  1774. CE->printAsOperand(OS, /*PrintType=*/ false,
  1775. !MF ? nullptr : MF->getFunction().getParent());
  1776. report_fatal_error(Twine(OS.str()));
  1777. }
  1778. case Instruction::GetElementPtr: {
  1779. const DataLayout &DL = getDataLayout();
  1780. // Generate a symbolic expression for the byte address
  1781. APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
  1782. cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
  1783. const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
  1784. ProcessingGeneric);
  1785. if (!OffsetAI)
  1786. return Base;
  1787. int64_t Offset = OffsetAI.getSExtValue();
  1788. return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
  1789. Ctx);
  1790. }
  1791. case Instruction::Trunc:
  1792. // We emit the value and depend on the assembler to truncate the generated
  1793. // expression properly. This is important for differences between
  1794. // blockaddress labels. Since the two labels are in the same function, it
  1795. // is reasonable to treat their delta as a 32-bit value.
  1796. [[fallthrough]];
  1797. case Instruction::BitCast:
  1798. return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
  1799. case Instruction::IntToPtr: {
  1800. const DataLayout &DL = getDataLayout();
  1801. // Handle casts to pointers by changing them into casts to the appropriate
  1802. // integer type. This promotes constant folding and simplifies this code.
  1803. Constant *Op = CE->getOperand(0);
  1804. Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
  1805. false/*ZExt*/);
  1806. return lowerConstantForGV(Op, ProcessingGeneric);
  1807. }
  1808. case Instruction::PtrToInt: {
  1809. const DataLayout &DL = getDataLayout();
  1810. // Support only foldable casts to/from pointers that can be eliminated by
  1811. // changing the pointer to the appropriately sized integer type.
  1812. Constant *Op = CE->getOperand(0);
  1813. Type *Ty = CE->getType();
  1814. const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
  1815. // We can emit the pointer value into this slot if the slot is an
  1816. // integer slot equal to the size of the pointer.
  1817. if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
  1818. return OpExpr;
  1819. // Otherwise the pointer is smaller than the resultant integer, mask off
  1820. // the high bits so we are sure to get a proper truncation if the input is
  1821. // a constant expr.
  1822. unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
  1823. const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
  1824. return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
  1825. }
  1826. // The MC library also has a right-shift operator, but it isn't consistently
  1827. // signed or unsigned between different targets.
  1828. case Instruction::Add: {
  1829. const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
  1830. const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
  1831. switch (CE->getOpcode()) {
  1832. default: llvm_unreachable("Unknown binary operator constant cast expr");
  1833. case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
  1834. }
  1835. }
  1836. }
  1837. }
  1838. // Copy of MCExpr::print customized for NVPTX
  1839. void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
  1840. switch (Expr.getKind()) {
  1841. case MCExpr::Target:
  1842. return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
  1843. case MCExpr::Constant:
  1844. OS << cast<MCConstantExpr>(Expr).getValue();
  1845. return;
  1846. case MCExpr::SymbolRef: {
  1847. const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
  1848. const MCSymbol &Sym = SRE.getSymbol();
  1849. Sym.print(OS, MAI);
  1850. return;
  1851. }
  1852. case MCExpr::Unary: {
  1853. const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
  1854. switch (UE.getOpcode()) {
  1855. case MCUnaryExpr::LNot: OS << '!'; break;
  1856. case MCUnaryExpr::Minus: OS << '-'; break;
  1857. case MCUnaryExpr::Not: OS << '~'; break;
  1858. case MCUnaryExpr::Plus: OS << '+'; break;
  1859. }
  1860. printMCExpr(*UE.getSubExpr(), OS);
  1861. return;
  1862. }
  1863. case MCExpr::Binary: {
  1864. const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
  1865. // Only print parens around the LHS if it is non-trivial.
  1866. if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
  1867. isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
  1868. printMCExpr(*BE.getLHS(), OS);
  1869. } else {
  1870. OS << '(';
  1871. printMCExpr(*BE.getLHS(), OS);
  1872. OS<< ')';
  1873. }
  1874. switch (BE.getOpcode()) {
  1875. case MCBinaryExpr::Add:
  1876. // Print "X-42" instead of "X+-42".
  1877. if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
  1878. if (RHSC->getValue() < 0) {
  1879. OS << RHSC->getValue();
  1880. return;
  1881. }
  1882. }
  1883. OS << '+';
  1884. break;
  1885. default: llvm_unreachable("Unhandled binary operator");
  1886. }
  1887. // Only print parens around the LHS if it is non-trivial.
  1888. if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
  1889. printMCExpr(*BE.getRHS(), OS);
  1890. } else {
  1891. OS << '(';
  1892. printMCExpr(*BE.getRHS(), OS);
  1893. OS << ')';
  1894. }
  1895. return;
  1896. }
  1897. }
  1898. llvm_unreachable("Invalid expression kind!");
  1899. }
  1900. /// PrintAsmOperand - Print out an operand for an inline asm expression.
  1901. ///
  1902. bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
  1903. const char *ExtraCode, raw_ostream &O) {
  1904. if (ExtraCode && ExtraCode[0]) {
  1905. if (ExtraCode[1] != 0)
  1906. return true; // Unknown modifier.
  1907. switch (ExtraCode[0]) {
  1908. default:
  1909. // See if this is a generic print operand
  1910. return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
  1911. case 'r':
  1912. break;
  1913. }
  1914. }
  1915. printOperand(MI, OpNo, O);
  1916. return false;
  1917. }
  1918. bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
  1919. unsigned OpNo,
  1920. const char *ExtraCode,
  1921. raw_ostream &O) {
  1922. if (ExtraCode && ExtraCode[0])
  1923. return true; // Unknown modifier
  1924. O << '[';
  1925. printMemOperand(MI, OpNo, O);
  1926. O << ']';
  1927. return false;
  1928. }
  1929. void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
  1930. raw_ostream &O) {
  1931. const MachineOperand &MO = MI->getOperand(opNum);
  1932. switch (MO.getType()) {
  1933. case MachineOperand::MO_Register:
  1934. if (MO.getReg().isPhysical()) {
  1935. if (MO.getReg() == NVPTX::VRDepot)
  1936. O << DEPOTNAME << getFunctionNumber();
  1937. else
  1938. O << NVPTXInstPrinter::getRegisterName(MO.getReg());
  1939. } else {
  1940. emitVirtualRegister(MO.getReg(), O);
  1941. }
  1942. break;
  1943. case MachineOperand::MO_Immediate:
  1944. O << MO.getImm();
  1945. break;
  1946. case MachineOperand::MO_FPImmediate:
  1947. printFPConstant(MO.getFPImm(), O);
  1948. break;
  1949. case MachineOperand::MO_GlobalAddress:
  1950. PrintSymbolOperand(MO, O);
  1951. break;
  1952. case MachineOperand::MO_MachineBasicBlock:
  1953. MO.getMBB()->getSymbol()->print(O, MAI);
  1954. break;
  1955. default:
  1956. llvm_unreachable("Operand type not supported.");
  1957. }
  1958. }
  1959. void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
  1960. raw_ostream &O, const char *Modifier) {
  1961. printOperand(MI, opNum, O);
  1962. if (Modifier && strcmp(Modifier, "add") == 0) {
  1963. O << ", ";
  1964. printOperand(MI, opNum + 1, O);
  1965. } else {
  1966. if (MI->getOperand(opNum + 1).isImm() &&
  1967. MI->getOperand(opNum + 1).getImm() == 0)
  1968. return; // don't print ',0' or '+0'
  1969. O << "+";
  1970. printOperand(MI, opNum + 1, O);
  1971. }
  1972. }
  1973. // Force static initialization.
  1974. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() {
  1975. RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
  1976. RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
  1977. }