NVPTXAsmPrinter.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. //===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer ----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a printer that converts from our internal representation
  10. // of machine-dependent LLVM code to NVPTX assembly language.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
  14. #define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
  15. #include "NVPTX.h"
  16. #include "NVPTXSubtarget.h"
  17. #include "NVPTXTargetMachine.h"
  18. #include "llvm/ADT/DenseMap.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/StringRef.h"
  21. #include "llvm/CodeGen/AsmPrinter.h"
  22. #include "llvm/CodeGen/MachineFunction.h"
  23. #include "llvm/CodeGen/MachineLoopInfo.h"
  24. #include "llvm/IR/Constants.h"
  25. #include "llvm/IR/DebugLoc.h"
  26. #include "llvm/IR/DerivedTypes.h"
  27. #include "llvm/IR/Function.h"
  28. #include "llvm/IR/GlobalValue.h"
  29. #include "llvm/IR/Value.h"
  30. #include "llvm/MC/MCExpr.h"
  31. #include "llvm/MC/MCStreamer.h"
  32. #include "llvm/MC/MCSymbol.h"
  33. #include "llvm/Pass.h"
  34. #include "llvm/Support/Casting.h"
  35. #include "llvm/Support/Compiler.h"
  36. #include "llvm/Support/ErrorHandling.h"
  37. #include "llvm/Support/raw_ostream.h"
  38. #include "llvm/Target/TargetMachine.h"
  39. #include <algorithm>
  40. #include <cassert>
  41. #include <map>
  42. #include <memory>
  43. #include <string>
  44. #include <vector>
  45. // The ptx syntax and format is very different from that usually seem in a .s
  46. // file,
  47. // therefore we are not able to use the MCAsmStreamer interface here.
  48. //
  49. // We are handcrafting the output method here.
  50. //
  51. // A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
  52. // (subclass of MCStreamer).
  53. namespace llvm {
  54. class MCOperand;
  55. class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
  56. class AggBuffer {
  57. // Used to buffer the emitted string for initializing global
  58. // aggregates.
  59. //
  60. // Normally an aggregate (array, vector or structure) is emitted
  61. // as a u8[]. However, if one element/field of the aggregate
  62. // is a non-NULL address, then the aggregate is emitted as u32[]
  63. // or u64[].
  64. //
  65. // We first layout the aggregate in 'buffer' in bytes, except for
  66. // those symbol addresses. For the i-th symbol address in the
  67. //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
  68. // are filled with 0s. symbolPosInBuffer[i-1] records its position
  69. // in 'buffer', and Symbols[i-1] records the Value*.
  70. //
  71. // Once we have this AggBuffer setup, we can choose how to print
  72. // it out.
  73. public:
  74. unsigned numSymbols; // number of symbol addresses
  75. private:
  76. const unsigned size; // size of the buffer in bytes
  77. std::vector<unsigned char> buffer; // the buffer
  78. SmallVector<unsigned, 4> symbolPosInBuffer;
  79. SmallVector<const Value *, 4> Symbols;
  80. // SymbolsBeforeStripping[i] is the original form of Symbols[i] before
  81. // stripping pointer casts, i.e.,
  82. // Symbols[i] == SymbolsBeforeStripping[i]->stripPointerCasts().
  83. //
  84. // We need to keep these values because AggBuffer::print decides whether to
  85. // emit a "generic()" cast for Symbols[i] depending on the address space of
  86. // SymbolsBeforeStripping[i].
  87. SmallVector<const Value *, 4> SymbolsBeforeStripping;
  88. unsigned curpos;
  89. raw_ostream &O;
  90. NVPTXAsmPrinter &AP;
  91. bool EmitGeneric;
  92. public:
  93. AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
  94. : size(size), buffer(size), O(O), AP(AP) {
  95. curpos = 0;
  96. numSymbols = 0;
  97. EmitGeneric = AP.EmitGeneric;
  98. }
  99. // Copy Num bytes from Ptr.
  100. // if Bytes > Num, zero fill up to Bytes.
  101. unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
  102. assert((curpos + Num) <= size);
  103. assert((curpos + Bytes) <= size);
  104. for (int i = 0; i < Num; ++i) {
  105. buffer[curpos] = Ptr[i];
  106. curpos++;
  107. }
  108. for (int i = Num; i < Bytes; ++i) {
  109. buffer[curpos] = 0;
  110. curpos++;
  111. }
  112. return curpos;
  113. }
  114. unsigned addZeros(int Num) {
  115. assert((curpos + Num) <= size);
  116. for (int i = 0; i < Num; ++i) {
  117. buffer[curpos] = 0;
  118. curpos++;
  119. }
  120. return curpos;
  121. }
  122. void addSymbol(const Value *GVar, const Value *GVarBeforeStripping) {
  123. symbolPosInBuffer.push_back(curpos);
  124. Symbols.push_back(GVar);
  125. SymbolsBeforeStripping.push_back(GVarBeforeStripping);
  126. numSymbols++;
  127. }
  128. void print() {
  129. if (numSymbols == 0) {
  130. // print out in bytes
  131. for (unsigned i = 0; i < size; i++) {
  132. if (i)
  133. O << ", ";
  134. O << (unsigned int) buffer[i];
  135. }
  136. } else {
  137. // print out in 4-bytes or 8-bytes
  138. unsigned int pos = 0;
  139. unsigned int nSym = 0;
  140. unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
  141. unsigned int nBytes = 4;
  142. if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit())
  143. nBytes = 8;
  144. for (pos = 0; pos < size; pos += nBytes) {
  145. if (pos)
  146. O << ", ";
  147. if (pos == nextSymbolPos) {
  148. const Value *v = Symbols[nSym];
  149. const Value *v0 = SymbolsBeforeStripping[nSym];
  150. if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
  151. MCSymbol *Name = AP.getSymbol(GVar);
  152. PointerType *PTy = dyn_cast<PointerType>(v0->getType());
  153. bool IsNonGenericPointer = false; // Is v0 a non-generic pointer?
  154. if (PTy && PTy->getAddressSpace() != 0) {
  155. IsNonGenericPointer = true;
  156. }
  157. if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
  158. O << "generic(";
  159. Name->print(O, AP.MAI);
  160. O << ")";
  161. } else {
  162. Name->print(O, AP.MAI);
  163. }
  164. } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
  165. const MCExpr *Expr =
  166. AP.lowerConstantForGV(cast<Constant>(CExpr), false);
  167. AP.printMCExpr(*Expr, O);
  168. } else
  169. llvm_unreachable("symbol type unknown");
  170. nSym++;
  171. if (nSym >= numSymbols)
  172. nextSymbolPos = size + 1;
  173. else
  174. nextSymbolPos = symbolPosInBuffer[nSym];
  175. } else if (nBytes == 4)
  176. O << *(unsigned int *)(&buffer[pos]);
  177. else
  178. O << *(unsigned long long *)(&buffer[pos]);
  179. }
  180. }
  181. }
  182. };
  183. friend class AggBuffer;
  184. private:
  185. StringRef getPassName() const override { return "NVPTX Assembly Printer"; }
  186. const Function *F;
  187. std::string CurrentFnName;
  188. void emitStartOfAsmFile(Module &M) override;
  189. void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
  190. void emitFunctionEntryLabel() override;
  191. void emitFunctionBodyStart() override;
  192. void emitFunctionBodyEnd() override;
  193. void emitImplicitDef(const MachineInstr *MI) const override;
  194. void emitInstruction(const MachineInstr *) override;
  195. void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
  196. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
  197. MCOperand GetSymbolRef(const MCSymbol *Symbol);
  198. unsigned encodeVirtualRegister(unsigned Reg);
  199. void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
  200. const char *Modifier = nullptr);
  201. void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
  202. bool = false);
  203. void printParamName(Function::const_arg_iterator I, int paramIndex,
  204. raw_ostream &O);
  205. void emitGlobals(const Module &M);
  206. void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
  207. void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
  208. void emitVirtualRegister(unsigned int vr, raw_ostream &);
  209. void emitFunctionParamList(const Function *, raw_ostream &O);
  210. void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
  211. void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
  212. void printReturnValStr(const Function *, raw_ostream &O);
  213. void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
  214. bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
  215. const char *ExtraCode, raw_ostream &) override;
  216. void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
  217. bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
  218. const char *ExtraCode, raw_ostream &) override;
  219. const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric);
  220. void printMCExpr(const MCExpr &Expr, raw_ostream &OS);
  221. protected:
  222. bool doInitialization(Module &M) override;
  223. bool doFinalization(Module &M) override;
  224. private:
  225. bool GlobalsEmitted;
  226. // This is specific per MachineFunction.
  227. const MachineRegisterInfo *MRI;
  228. // The contents are specific for each
  229. // MachineFunction. But the size of the
  230. // array is not.
  231. typedef DenseMap<unsigned, unsigned> VRegMap;
  232. typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
  233. VRegRCMap VRegMapping;
  234. // List of variables demoted to a function scope.
  235. std::map<const Function *, std::vector<const GlobalVariable *>> localDecls;
  236. void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
  237. void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
  238. std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
  239. void printScalarConstant(const Constant *CPV, raw_ostream &O);
  240. void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
  241. void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
  242. void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
  243. void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
  244. void emitDeclarations(const Module &, raw_ostream &O);
  245. void emitDeclaration(const Function *, raw_ostream &O);
  246. void emitDemotedVars(const Function *, raw_ostream &);
  247. bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
  248. MCOperand &MCOp);
  249. void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
  250. bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const;
  251. // Used to control the need to emit .generic() in the initializer of
  252. // module scope variables.
  253. // Although ptx supports the hybrid mode like the following,
  254. // .global .u32 a;
  255. // .global .u32 b;
  256. // .global .u32 addr[] = {a, generic(b)}
  257. // we have difficulty representing the difference in the NVVM IR.
  258. //
  259. // Since the address value should always be generic in CUDA C and always
  260. // be specific in OpenCL, we use this simple control here.
  261. //
  262. bool EmitGeneric;
  263. public:
  264. NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
  265. : AsmPrinter(TM, std::move(Streamer)),
  266. EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
  267. NVPTX::CUDA) {}
  268. bool runOnMachineFunction(MachineFunction &F) override;
  269. void getAnalysisUsage(AnalysisUsage &AU) const override {
  270. AU.addRequired<MachineLoopInfo>();
  271. AsmPrinter::getAnalysisUsage(AU);
  272. }
  273. std::string getVirtualRegisterName(unsigned) const;
  274. const MCSymbol *getFunctionFrameSymbol() const override;
  275. // Make emitGlobalVariable() no-op for NVPTX.
  276. // Global variables have been already emitted by the time the base AsmPrinter
  277. // attempts to do so in doFinalization() (see NVPTXAsmPrinter::emitGlobals()).
  278. void emitGlobalVariable(const GlobalVariable *GV) override {}
  279. };
  280. } // end namespace llvm
  281. #endif // LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H