123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229 |
- //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file contains a printer that converts from our internal representation
- // of machine-dependent LLVM code to NVPTX assembly language.
- //
- //===----------------------------------------------------------------------===//
- #include "NVPTXAsmPrinter.h"
- #include "MCTargetDesc/NVPTXBaseInfo.h"
- #include "MCTargetDesc/NVPTXInstPrinter.h"
- #include "MCTargetDesc/NVPTXMCAsmInfo.h"
- #include "MCTargetDesc/NVPTXTargetStreamer.h"
- #include "NVPTX.h"
- #include "NVPTXMCExpr.h"
- #include "NVPTXMachineFunctionInfo.h"
- #include "NVPTXRegisterInfo.h"
- #include "NVPTXSubtarget.h"
- #include "NVPTXTargetMachine.h"
- #include "NVPTXUtilities.h"
- #include "TargetInfo/NVPTXTargetInfo.h"
- #include "cl_common_defines.h"
- #include "llvm/ADT/APFloat.h"
- #include "llvm/ADT/APInt.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/DenseSet.h"
- #include "llvm/ADT/SmallString.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/StringExtras.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/ADT/Triple.h"
- #include "llvm/ADT/Twine.h"
- #include "llvm/Analysis/ConstantFolding.h"
- #include "llvm/CodeGen/Analysis.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineFunction.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineLoopInfo.h"
- #include "llvm/CodeGen/MachineModuleInfo.h"
- #include "llvm/CodeGen/MachineOperand.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/TargetRegisterInfo.h"
- #include "llvm/CodeGen/ValueTypes.h"
- #include "llvm/IR/Attributes.h"
- #include "llvm/IR/BasicBlock.h"
- #include "llvm/IR/Constant.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/DataLayout.h"
- #include "llvm/IR/DebugInfo.h"
- #include "llvm/IR/DebugInfoMetadata.h"
- #include "llvm/IR/DebugLoc.h"
- #include "llvm/IR/DerivedTypes.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/GlobalValue.h"
- #include "llvm/IR/GlobalVariable.h"
- #include "llvm/IR/Instruction.h"
- #include "llvm/IR/LLVMContext.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/Operator.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/User.h"
- #include "llvm/MC/MCExpr.h"
- #include "llvm/MC/MCInst.h"
- #include "llvm/MC/MCInstrDesc.h"
- #include "llvm/MC/MCStreamer.h"
- #include "llvm/MC/MCSymbol.h"
- #include "llvm/MC/TargetRegistry.h"
- #include "llvm/Support/Casting.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/Endian.h"
- #include "llvm/Support/ErrorHandling.h"
- #include "llvm/Support/MachineValueType.h"
- #include "llvm/Support/NativeFormatting.h"
- #include "llvm/Support/Path.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Target/TargetLoweringObjectFile.h"
- #include "llvm/Target/TargetMachine.h"
- #include "llvm/Transforms/Utils/UnrollLoop.h"
- #include <cassert>
- #include <cstdint>
- #include <cstring>
- #include <new>
- #include <string>
- #include <utility>
- #include <vector>
- using namespace llvm;
- #define DEPOTNAME "__local_depot"
- /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
- /// depends.
- static void
- DiscoverDependentGlobals(const Value *V,
- DenseSet<const GlobalVariable *> &Globals) {
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- Globals.insert(GV);
- else {
- if (const User *U = dyn_cast<User>(V)) {
- for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
- DiscoverDependentGlobals(U->getOperand(i), Globals);
- }
- }
- }
- }
- /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
- /// instances to be emitted, but only after any dependents have been added
- /// first.s
- static void
- VisitGlobalVariableForEmission(const GlobalVariable *GV,
- SmallVectorImpl<const GlobalVariable *> &Order,
- DenseSet<const GlobalVariable *> &Visited,
- DenseSet<const GlobalVariable *> &Visiting) {
- // Have we already visited this one?
- if (Visited.count(GV))
- return;
- // Do we have a circular dependency?
- if (!Visiting.insert(GV).second)
- report_fatal_error("Circular dependency found in global variable set");
- // Make sure we visit all dependents first
- DenseSet<const GlobalVariable *> Others;
- for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
- DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (const GlobalVariable *GV : Others)
- VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
- // Now we can visit ourself
- Order.push_back(GV);
- Visited.insert(GV);
- Visiting.erase(GV);
- }
- void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
- NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
- getSubtargetInfo().getFeatureBits());
- MCInst Inst;
- lowerToMCInst(MI, Inst);
- EmitToStreamer(*OutStreamer, Inst);
- }
- // Handle symbol backtracking for targets that do not support image handles
- bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
- unsigned OpNo, MCOperand &MCOp) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.TSFlags & NVPTXII::IsTexFlag) {
- // This is a texture fetch, so operand 4 is a texref and operand 5 is
- // a samplerref
- if (OpNo == 4 && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
- unsigned VecSize =
- 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
- // For a surface load of vector size N, the Nth operand will be the surfref
- if (OpNo == VecSize && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
- // This is a surface store, so operand 0 is a surfref
- if (OpNo == 0 && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
- // This is a query, so operand 1 is a surfref/texref
- if (OpNo == 1 && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- }
- return false;
- }
- void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
- // Ewwww
- LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
- NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
- const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
- const char *Sym = MFI->getImageHandleSymbol(Index);
- StringRef SymName = nvTM.getStrPool().save(Sym);
- MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
- }
- void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
- OutMI.setOpcode(MI->getOpcode());
- // Special: Do not mangle symbol operand of CALL_PROTOTYPE
- if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
- const MachineOperand &MO = MI->getOperand(0);
- OutMI.addOperand(GetSymbolRef(
- OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
- return;
- }
- const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp;
- if (!STI.hasImageHandles()) {
- if (lowerImageHandleOperand(MI, i, MCOp)) {
- OutMI.addOperand(MCOp);
- continue;
- }
- }
- if (lowerOperand(MO, MCOp))
- OutMI.addOperand(MCOp);
- }
- }
- bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
- MCOperand &MCOp) {
- switch (MO.getType()) {
- default: llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
- MO.getMBB()->getSymbol(), OutContext));
- break;
- case MachineOperand::MO_ExternalSymbol:
- MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
- break;
- case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
- break;
- case MachineOperand::MO_FPImmediate: {
- const ConstantFP *Cnt = MO.getFPImm();
- const APFloat &Val = Cnt->getValueAPF();
- switch (Cnt->getType()->getTypeID()) {
- default: report_fatal_error("Unsupported FP type"); break;
- case Type::HalfTyID:
- MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
- break;
- case Type::FloatTyID:
- MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
- break;
- case Type::DoubleTyID:
- MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
- break;
- }
- break;
- }
- }
- return true;
- }
- unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
- if (Register::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
- unsigned RegNum = RegMap[Reg];
- // Encode the register class in the upper 4 bits
- // Must be kept in sync with NVPTXInstPrinter::printRegName
- unsigned Ret = 0;
- if (RC == &NVPTX::Int1RegsRegClass) {
- Ret = (1 << 28);
- } else if (RC == &NVPTX::Int16RegsRegClass) {
- Ret = (2 << 28);
- } else if (RC == &NVPTX::Int32RegsRegClass) {
- Ret = (3 << 28);
- } else if (RC == &NVPTX::Int64RegsRegClass) {
- Ret = (4 << 28);
- } else if (RC == &NVPTX::Float32RegsRegClass) {
- Ret = (5 << 28);
- } else if (RC == &NVPTX::Float64RegsRegClass) {
- Ret = (6 << 28);
- } else if (RC == &NVPTX::Float16RegsRegClass) {
- Ret = (7 << 28);
- } else if (RC == &NVPTX::Float16x2RegsRegClass) {
- Ret = (8 << 28);
- } else {
- report_fatal_error("Bad register class");
- }
- // Insert the vreg number
- Ret |= (RegNum & 0x0FFFFFFF);
- return Ret;
- } else {
- // Some special-use registers are actually physical registers.
- // Encode this as the register class ID of 0 and the real register ID.
- return Reg & 0x0FFFFFFF;
- }
- }
- MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
- const MCExpr *Expr;
- Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- OutContext);
- return MCOperand::createExpr(Expr);
- }
- void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout &DL = getDataLayout();
- const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
- Type *Ty = F->getReturnType();
- bool isABI = (STI.getSmVersion() >= 20);
- if (Ty->getTypeID() == Type::VoidTyID)
- return;
- O << " (";
- if (isABI) {
- if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
- unsigned size = 0;
- if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
- size = ITy->getBitWidth();
- } else {
- assert(Ty->isFloatingPointTy() && "Floating point type expected here");
- size = Ty->getPrimitiveSizeInBits();
- }
- // PTX ABI requires all scalar return values to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type in
- // PTX, so its size must be adjusted here, too.
- size = promoteScalarArgumentSize(size);
- O << ".param .b" << size << " func_retval0";
- } else if (isa<PointerType>(Ty)) {
- O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
- << " func_retval0";
- } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
- unsigned totalsz = DL.getTypeAllocSize(Ty);
- unsigned retAlignment = 0;
- if (!getAlign(*F, 0, retAlignment))
- retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
- O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
- << "]";
- } else
- llvm_unreachable("Unknown return type");
- } else {
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, DL, Ty, vtparts);
- unsigned idx = 0;
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger())
- sz = promoteScalarArgumentSize(sz);
- O << ".reg .b" << sz << " func_retval" << idx;
- if (j < je - 1)
- O << ", ";
- ++idx;
- }
- if (i < e - 1)
- O << ", ";
- }
- }
- O << ") ";
- }
- void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
- raw_ostream &O) {
- const Function &F = MF.getFunction();
- printReturnValStr(&F, O);
- }
- // Return true if MBB is the header of a loop marked with
- // llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
- bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
- const MachineBasicBlock &MBB) const {
- MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
- // We insert .pragma "nounroll" only to the loop header.
- if (!LI.isLoopHeader(&MBB))
- return false;
- // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
- // we iterate through each back edge of the loop with header MBB, and check
- // whether its metadata contains llvm.loop.unroll.disable.
- for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
- if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
- // Edges from other loops to MBB are not back edges.
- continue;
- }
- if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
- if (MDNode *LoopID =
- PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
- if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
- return true;
- if (MDNode *UnrollCountMD =
- GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
- if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
- ->getZExtValue() == 1)
- return true;
- }
- }
- }
- }
- return false;
- }
- void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
- AsmPrinter::emitBasicBlockStart(MBB);
- if (isLoopHeaderOfNoUnroll(MBB))
- OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
- }
- void NVPTXAsmPrinter::emitFunctionEntryLabel() {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
- if (!GlobalsEmitted) {
- emitGlobals(*MF->getFunction().getParent());
- GlobalsEmitted = true;
- }
- // Set up
- MRI = &MF->getRegInfo();
- F = &MF->getFunction();
- emitLinkageDirective(F, O);
- if (isKernelFunction(*F))
- O << ".entry ";
- else {
- O << ".func ";
- printReturnValStr(*MF, O);
- }
- CurrentFnSym->print(O, MAI);
- emitFunctionParamList(*MF, O);
- if (isKernelFunction(*F))
- emitKernelFunctionDirectives(*F, O);
- if (shouldEmitPTXNoReturn(F, TM))
- O << ".noreturn";
- OutStreamer->emitRawText(O.str());
- VRegMapping.clear();
- // Emit open brace for function body.
- OutStreamer->emitRawText(StringRef("{\n"));
- setAndEmitFunctionVirtualRegisters(*MF);
- // Emit initial .loc debug directive for correct relocation symbol data.
- if (MMI && MMI->hasDebugInfo())
- emitInitialRawDwarfLocDirective(*MF);
- }
- bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
- bool Result = AsmPrinter::runOnMachineFunction(F);
- // Emit closing brace for the body of function F.
- // The closing brace must be emitted here because we need to emit additional
- // debug labels/data after the last basic block.
- // We need to emit the closing brace here because we don't have function that
- // finished emission of the function body.
- OutStreamer->emitRawText(StringRef("}\n"));
- return Result;
- }
- void NVPTXAsmPrinter::emitFunctionBodyStart() {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
- emitDemotedVars(&MF->getFunction(), O);
- OutStreamer->emitRawText(O.str());
- }
- void NVPTXAsmPrinter::emitFunctionBodyEnd() {
- VRegMapping.clear();
- }
- const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
- SmallString<128> Str;
- raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber();
- return OutContext.getOrCreateSymbol(Str);
- }
- void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
- Register RegNo = MI->getOperand(0).getReg();
- if (RegNo.isVirtual()) {
- OutStreamer->AddComment(Twine("implicit-def: ") +
- getVirtualRegisterName(RegNo));
- } else {
- const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
- OutStreamer->AddComment(Twine("implicit-def: ") +
- STI.getRegisterInfo()->getName(RegNo));
- }
- OutStreamer->addBlankLine();
- }
- void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
- raw_ostream &O) const {
- // If the NVVM IR has some of reqntid* specified, then output
- // the reqntid directive, and set the unspecified ones to 1.
- // If none of reqntid* is specified, don't output reqntid directive.
- unsigned reqntidx, reqntidy, reqntidz;
- bool specified = false;
- if (!getReqNTIDx(F, reqntidx))
- reqntidx = 1;
- else
- specified = true;
- if (!getReqNTIDy(F, reqntidy))
- reqntidy = 1;
- else
- specified = true;
- if (!getReqNTIDz(F, reqntidz))
- reqntidz = 1;
- else
- specified = true;
- if (specified)
- O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
- << "\n";
- // If the NVVM IR has some of maxntid* specified, then output
- // the maxntid directive, and set the unspecified ones to 1.
- // If none of maxntid* is specified, don't output maxntid directive.
- unsigned maxntidx, maxntidy, maxntidz;
- specified = false;
- if (!getMaxNTIDx(F, maxntidx))
- maxntidx = 1;
- else
- specified = true;
- if (!getMaxNTIDy(F, maxntidy))
- maxntidy = 1;
- else
- specified = true;
- if (!getMaxNTIDz(F, maxntidz))
- maxntidz = 1;
- else
- specified = true;
- if (specified)
- O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
- << "\n";
- unsigned mincta;
- if (getMinCTASm(F, mincta))
- O << ".minnctapersm " << mincta << "\n";
- unsigned maxnreg;
- if (getMaxNReg(F, maxnreg))
- O << ".maxnreg " << maxnreg << "\n";
- }
- std::string
- NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- std::string Name;
- raw_string_ostream NameStr(Name);
- VRegRCMap::const_iterator I = VRegMapping.find(RC);
- assert(I != VRegMapping.end() && "Bad register class");
- const DenseMap<unsigned, unsigned> &RegMap = I->second;
- VRegMap::const_iterator VI = RegMap.find(Reg);
- assert(VI != RegMap.end() && "Bad virtual register");
- unsigned MappedVR = VI->second;
- NameStr << getNVPTXRegClassStr(RC) << MappedVR;
- NameStr.flush();
- return Name;
- }
- void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
- raw_ostream &O) {
- O << getVirtualRegisterName(vr);
- }
- void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
- emitLinkageDirective(F, O);
- if (isKernelFunction(*F))
- O << ".entry ";
- else
- O << ".func ";
- printReturnValStr(F, O);
- getSymbol(F)->print(O, MAI);
- O << "\n";
- emitFunctionParamList(F, O);
- if (shouldEmitPTXNoReturn(F, TM))
- O << ".noreturn";
- O << ";\n";
- }
- static bool usedInGlobalVarDef(const Constant *C) {
- if (!C)
- return false;
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- return GV->getName() != "llvm.used";
- }
- for (const User *U : C->users())
- if (const Constant *C = dyn_cast<Constant>(U))
- if (usedInGlobalVarDef(C))
- return true;
- return false;
- }
- static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
- if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
- if (othergv->getName() == "llvm.used")
- return true;
- }
- if (const Instruction *instr = dyn_cast<Instruction>(U)) {
- if (instr->getParent() && instr->getParent()->getParent()) {
- const Function *curFunc = instr->getParent()->getParent();
- if (oneFunc && (curFunc != oneFunc))
- return false;
- oneFunc = curFunc;
- return true;
- } else
- return false;
- }
- for (const User *UU : U->users())
- if (!usedInOneFunc(UU, oneFunc))
- return false;
- return true;
- }
- /* Find out if a global variable can be demoted to local scope.
- * Currently, this is valid for CUDA shared variables, which have local
- * scope and global lifetime. So the conditions to check are :
- * 1. Is the global variable in shared address space?
- * 2. Does it have internal linkage?
- * 3. Is the global variable referenced only in one function?
- */
- static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (!gv->hasInternalLinkage())
- return false;
- PointerType *Pty = gv->getType();
- if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
- return false;
- const Function *oneFunc = nullptr;
- bool flag = usedInOneFunc(gv, oneFunc);
- if (!flag)
- return false;
- if (!oneFunc)
- return false;
- f = oneFunc;
- return true;
- }
- static bool useFuncSeen(const Constant *C,
- DenseMap<const Function *, bool> &seenMap) {
- for (const User *U : C->users()) {
- if (const Constant *cu = dyn_cast<Constant>(U)) {
- if (useFuncSeen(cu, seenMap))
- return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
- const BasicBlock *bb = I->getParent();
- if (!bb)
- continue;
- const Function *caller = bb->getParent();
- if (!caller)
- continue;
- if (seenMap.find(caller) != seenMap.end())
- return true;
- }
- }
- return false;
- }
- void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
- DenseMap<const Function *, bool> seenMap;
- for (const Function &F : M) {
- if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
- emitDeclaration(&F, O);
- continue;
- }
- if (F.isDeclaration()) {
- if (F.use_empty())
- continue;
- if (F.getIntrinsicID())
- continue;
- emitDeclaration(&F, O);
- continue;
- }
- for (const User *U : F.users()) {
- if (const Constant *C = dyn_cast<Constant>(U)) {
- if (usedInGlobalVarDef(C)) {
- // The use is in the initialization of a global variable
- // that is a function pointer, so print a declaration
- // for the original function
- emitDeclaration(&F, O);
- break;
- }
- // Emit a declaration of this function if the function that
- // uses this constant expr has already been seen.
- if (useFuncSeen(C, seenMap)) {
- emitDeclaration(&F, O);
- break;
- }
- }
- if (!isa<Instruction>(U))
- continue;
- const Instruction *instr = cast<Instruction>(U);
- const BasicBlock *bb = instr->getParent();
- if (!bb)
- continue;
- const Function *caller = bb->getParent();
- if (!caller)
- continue;
- // If a caller has already been seen, then the caller is
- // appearing in the module before the callee. so print out
- // a declaration for the callee.
- if (seenMap.find(caller) != seenMap.end()) {
- emitDeclaration(&F, O);
- break;
- }
- }
- seenMap[&F] = true;
- }
- }
- static bool isEmptyXXStructor(GlobalVariable *GV) {
- if (!GV) return true;
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!InitList) return true; // Not an array; we don't know how to parse.
- return InitList->getNumOperands() == 0;
- }
- void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
- // Construct a default subtarget off of the TargetMachine defaults. The
- // rest of NVPTX isn't friendly to change subtargets per function and
- // so the default TargetMachine will have all of the options.
- const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
- SmallString<128> Str1;
- raw_svector_ostream OS1(Str1);
- // Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1, *STI);
- OutStreamer->emitRawText(OS1.str());
- }
- bool NVPTXAsmPrinter::doInitialization(Module &M) {
- if (M.alias_size()) {
- report_fatal_error("Module has aliases, which NVPTX does not support.");
- return true; // error
- }
- if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
- report_fatal_error(
- "Module has a nontrivial global ctor, which NVPTX does not support.");
- return true; // error
- }
- if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
- report_fatal_error(
- "Module has a nontrivial global dtor, which NVPTX does not support.");
- return true; // error
- }
- // We need to call the parent's one explicitly.
- bool Result = AsmPrinter::doInitialization(M);
- GlobalsEmitted = false;
- return Result;
- }
- void NVPTXAsmPrinter::emitGlobals(const Module &M) {
- SmallString<128> Str2;
- raw_svector_ostream OS2(Str2);
- emitDeclarations(M, OS2);
- // As ptxas does not support forward references of globals, we need to first
- // sort the list of module-level globals in def-use order. We visit each
- // global variable in order, and ensure that we emit it *after* its dependent
- // globals. We use a little extra memory maintaining both a set and a list to
- // have fast searches while maintaining a strict ordering.
- SmallVector<const GlobalVariable *, 8> Globals;
- DenseSet<const GlobalVariable *> GVVisited;
- DenseSet<const GlobalVariable *> GVVisiting;
- // Visit each global variable, in order
- for (const GlobalVariable &I : M.globals())
- VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
- assert(GVVisited.size() == M.getGlobalList().size() &&
- "Missed a global variable");
- assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
- const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- const NVPTXSubtarget &STI =
- *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
- // Print out module-level global variables in proper order
- for (unsigned i = 0, e = Globals.size(); i != e; ++i)
- printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
- OS2 << '\n';
- OutStreamer->emitRawText(OS2.str());
- }
- void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
- const NVPTXSubtarget &STI) {
- O << "//\n";
- O << "// Generated by LLVM NVPTX Back-End\n";
- O << "//\n";
- O << "\n";
- unsigned PTXVersion = STI.getPTXVersion();
- O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
- O << ".target ";
- O << STI.getTargetName();
- const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- if (NTM.getDrvInterface() == NVPTX::NVCL)
- O << ", texmode_independent";
- bool HasFullDebugInfo = false;
- for (DICompileUnit *CU : M.debug_compile_units()) {
- switch(CU->getEmissionKind()) {
- case DICompileUnit::NoDebug:
- case DICompileUnit::DebugDirectivesOnly:
- break;
- case DICompileUnit::LineTablesOnly:
- case DICompileUnit::FullDebug:
- HasFullDebugInfo = true;
- break;
- }
- if (HasFullDebugInfo)
- break;
- }
- if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
- O << ", debug";
- O << "\n";
- O << ".address_size ";
- if (NTM.is64Bit())
- O << "64";
- else
- O << "32";
- O << "\n";
- O << "\n";
- }
- bool NVPTXAsmPrinter::doFinalization(Module &M) {
- bool HasDebugInfo = MMI && MMI->hasDebugInfo();
- // If we did not emit any functions, then the global declarations have not
- // yet been emitted.
- if (!GlobalsEmitted) {
- emitGlobals(M);
- GlobalsEmitted = true;
- }
- // call doFinalization
- bool ret = AsmPrinter::doFinalization(M);
- clearAnnotationCache(&M);
- auto *TS =
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
- // Close the last emitted section
- if (HasDebugInfo) {
- TS->closeLastSection();
- // Emit empty .debug_loc section for better support of the empty files.
- OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
- }
- // Output last DWARF .file directives, if any.
- TS->outputDwarfFileDirectives();
- return ret;
- }
- // This function emits appropriate linkage directives for
- // functions and global variables.
- //
- // extern function declaration -> .extern
- // extern function definition -> .visible
- // external global variable with init -> .visible
- // external without init -> .extern
- // appending -> not allowed, assert.
- // for any linkage other than
- // internal, private, linker_private,
- // linker_private_weak, linker_private_weak_def_auto,
- // we emit -> .weak.
- void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
- raw_ostream &O) {
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
- if (V->hasExternalLinkage()) {
- if (isa<GlobalVariable>(V)) {
- const GlobalVariable *GVar = cast<GlobalVariable>(V);
- if (GVar) {
- if (GVar->hasInitializer())
- O << ".visible ";
- else
- O << ".extern ";
- }
- } else if (V->isDeclaration())
- O << ".extern ";
- else
- O << ".visible ";
- } else if (V->hasAppendingLinkage()) {
- std::string msg;
- msg.append("Error: ");
- msg.append("Symbol ");
- if (V->hasName())
- msg.append(std::string(V->getName()));
- msg.append("has unsupported appending linkage type");
- llvm_unreachable(msg.c_str());
- } else if (!V->hasInternalLinkage() &&
- !V->hasPrivateLinkage()) {
- O << ".weak ";
- }
- }
- }
- void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
- raw_ostream &O, bool processDemoted,
- const NVPTXSubtarget &STI) {
- // Skip meta data
- if (GVar->hasSection()) {
- if (GVar->getSection() == "llvm.metadata")
- return;
- }
- // Skip LLVM intrinsic global variables
- if (GVar->getName().startswith("llvm.") ||
- GVar->getName().startswith("nvvm."))
- return;
- const DataLayout &DL = getDataLayout();
- // GlobalVariables are always constant pointers themselves.
- PointerType *PTy = GVar->getType();
- Type *ETy = GVar->getValueType();
- if (GVar->hasExternalLinkage()) {
- if (GVar->hasInitializer())
- O << ".visible ";
- else
- O << ".extern ";
- } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
- GVar->hasAvailableExternallyLinkage() ||
- GVar->hasCommonLinkage()) {
- O << ".weak ";
- }
- if (isTexture(*GVar)) {
- O << ".global .texref " << getTextureName(*GVar) << ";\n";
- return;
- }
- if (isSurface(*GVar)) {
- O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
- return;
- }
- if (GVar->isDeclaration()) {
- // (extern) declarations, no definition or initializer
- // Currently the only known declaration is for an automatic __local
- // (.shared) promoted to global.
- emitPTXGlobalVariable(GVar, O, STI);
- O << ";\n";
- return;
- }
- if (isSampler(*GVar)) {
- O << ".global .samplerref " << getSamplerName(*GVar);
- const Constant *Initializer = nullptr;
- if (GVar->hasInitializer())
- Initializer = GVar->getInitializer();
- const ConstantInt *CI = nullptr;
- if (Initializer)
- CI = dyn_cast<ConstantInt>(Initializer);
- if (CI) {
- unsigned sample = CI->getZExtValue();
- O << " = { ";
- for (int i = 0,
- addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
- i < 3; i++) {
- O << "addr_mode_" << i << " = ";
- switch (addr) {
- case 0:
- O << "wrap";
- break;
- case 1:
- O << "clamp_to_border";
- break;
- case 2:
- O << "clamp_to_edge";
- break;
- case 3:
- O << "wrap";
- break;
- case 4:
- O << "mirror";
- break;
- }
- O << ", ";
- }
- O << "filter_mode = ";
- switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
- case 0:
- O << "nearest";
- break;
- case 1:
- O << "linear";
- break;
- case 2:
- llvm_unreachable("Anisotropic filtering is not supported");
- default:
- O << "nearest";
- break;
- }
- if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
- O << ", force_unnormalized_coords = 1";
- }
- O << " }";
- }
- O << ";\n";
- return;
- }
- if (GVar->hasPrivateLinkage()) {
- if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
- return;
- // FIXME - need better way (e.g. Metadata) to avoid generating this global
- if (strncmp(GVar->getName().data(), "filename", 8) == 0)
- return;
- if (GVar->use_empty())
- return;
- }
- const Function *demotedFunc = nullptr;
- if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
- O << "// " << GVar->getName() << " has been demoted\n";
- if (localDecls.find(demotedFunc) != localDecls.end())
- localDecls[demotedFunc].push_back(GVar);
- else {
- std::vector<const GlobalVariable *> temp;
- temp.push_back(GVar);
- localDecls[demotedFunc] = temp;
- }
- return;
- }
- O << ".";
- emitPTXAddressSpace(PTy->getAddressSpace(), O);
- if (isManaged(*GVar)) {
- if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
- report_fatal_error(
- ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
- }
- O << " .attribute(.managed)";
- }
- if (MaybeAlign A = GVar->getAlign())
- O << " .align " << A->value();
- else
- O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
- if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
- (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
- O << " .";
- // Special case: ABI requires that we use .u8 for predicates
- if (ETy->isIntegerTy(1))
- O << "u8";
- else
- O << getPTXFundamentalTypeStr(ETy, false);
- O << " ";
- getSymbol(GVar)->print(O, MAI);
- // Ptx allows variable initilization only for constant and global state
- // spaces.
- if (GVar->hasInitializer()) {
- if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
- const Constant *Initializer = GVar->getInitializer();
- // 'undef' is treated as there is no value specified.
- if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
- O << " = ";
- printScalarConstant(Initializer, O);
- }
- } else {
- // The frontend adds zero-initializer to device and constant variables
- // that don't have an initial value, and UndefValue to shared
- // variables, so skip warning for this case.
- if (!GVar->getInitializer()->isNullValue() &&
- !isa<UndefValue>(GVar->getInitializer())) {
- report_fatal_error("initial value of '" + GVar->getName() +
- "' is not allowed in addrspace(" +
- Twine(PTy->getAddressSpace()) + ")");
- }
- }
- }
- } else {
- unsigned int ElementSize = 0;
- // Although PTX has direct support for struct type and array type and
- // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
- // targets that support these high level field accesses. Structs, arrays
- // and vectors are lowered into arrays of bytes.
- switch (ETy->getTypeID()) {
- case Type::IntegerTyID: // Integers larger than 64 bits
- case Type::StructTyID:
- case Type::ArrayTyID:
- case Type::FixedVectorTyID:
- ElementSize = DL.getTypeStoreSize(ETy);
- // Ptx allows variable initilization only for constant and
- // global state spaces.
- if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
- GVar->hasInitializer()) {
- const Constant *Initializer = GVar->getInitializer();
- if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
- AggBuffer aggBuffer(ElementSize, *this);
- bufferAggregateConstant(Initializer, &aggBuffer);
- if (aggBuffer.numSymbols()) {
- unsigned int ptrSize = MAI->getCodePointerSize();
- if (ElementSize % ptrSize ||
- !aggBuffer.allSymbolsAligned(ptrSize)) {
- // Print in bytes and use the mask() operator for pointers.
- if (!STI.hasMaskOperator())
- report_fatal_error(
- "initialized packed aggregate with pointers '" +
- GVar->getName() +
- "' requires at least PTX ISA version 7.1");
- O << " .u8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[" << ElementSize << "] = {";
- aggBuffer.printBytes(O);
- O << "}";
- } else {
- O << " .u" << ptrSize * 8 << " ";
- getSymbol(GVar)->print(O, MAI);
- O << "[" << ElementSize / ptrSize << "] = {";
- aggBuffer.printWords(O);
- O << "}";
- }
- } else {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[" << ElementSize << "] = {";
- aggBuffer.printBytes(O);
- O << "}";
- }
- } else {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- if (ElementSize) {
- O << "[";
- O << ElementSize;
- O << "]";
- }
- }
- } else {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- if (ElementSize) {
- O << "[";
- O << ElementSize;
- O << "]";
- }
- }
- break;
- default:
- llvm_unreachable("type not supported yet");
- }
- }
- O << ";\n";
- }
- void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
- const Value *v = Symbols[nSym];
- const Value *v0 = SymbolsBeforeStripping[nSym];
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- MCSymbol *Name = AP.getSymbol(GVar);
- PointerType *PTy = dyn_cast<PointerType>(v0->getType());
- // Is v0 a generic pointer?
- bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
- if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
- os << "generic(";
- Name->print(os, AP.MAI);
- os << ")";
- } else {
- Name->print(os, AP.MAI);
- }
- } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
- const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
- AP.printMCExpr(*Expr, os);
- } else
- llvm_unreachable("symbol type unknown");
- }
- void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
- unsigned int ptrSize = AP.MAI->getCodePointerSize();
- symbolPosInBuffer.push_back(size);
- unsigned int nSym = 0;
- unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
- for (unsigned int pos = 0; pos < size;) {
- if (pos)
- os << ", ";
- if (pos != nextSymbolPos) {
- os << (unsigned int)buffer[pos];
- ++pos;
- continue;
- }
- // Generate a per-byte mask() operator for the symbol, which looks like:
- // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
- // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
- std::string symText;
- llvm::raw_string_ostream oss(symText);
- printSymbol(nSym, oss);
- for (unsigned i = 0; i < ptrSize; ++i) {
- if (i)
- os << ", ";
- llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
- os << "(" << symText << ")";
- }
- pos += ptrSize;
- nextSymbolPos = symbolPosInBuffer[++nSym];
- assert(nextSymbolPos >= pos);
- }
- }
- void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
- unsigned int ptrSize = AP.MAI->getCodePointerSize();
- symbolPosInBuffer.push_back(size);
- unsigned int nSym = 0;
- unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
- assert(nextSymbolPos % ptrSize == 0);
- for (unsigned int pos = 0; pos < size; pos += ptrSize) {
- if (pos)
- os << ", ";
- if (pos == nextSymbolPos) {
- printSymbol(nSym, os);
- nextSymbolPos = symbolPosInBuffer[++nSym];
- assert(nextSymbolPos % ptrSize == 0);
- assert(nextSymbolPos >= pos + ptrSize);
- } else if (ptrSize == 4)
- os << support::endian::read32le(&buffer[pos]);
- else
- os << support::endian::read64le(&buffer[pos]);
- }
- }
- void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
- if (localDecls.find(f) == localDecls.end())
- return;
- std::vector<const GlobalVariable *> &gvars = localDecls[f];
- const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- const NVPTXSubtarget &STI =
- *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
- for (const GlobalVariable *GV : gvars) {
- O << "\t// demoted variable\n\t";
- printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
- }
- }
- void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
- raw_ostream &O) const {
- switch (AddressSpace) {
- case ADDRESS_SPACE_LOCAL:
- O << "local";
- break;
- case ADDRESS_SPACE_GLOBAL:
- O << "global";
- break;
- case ADDRESS_SPACE_CONST:
- O << "const";
- break;
- case ADDRESS_SPACE_SHARED:
- O << "shared";
- break;
- default:
- report_fatal_error("Bad address space found while emitting PTX: " +
- llvm::Twine(AddressSpace));
- break;
- }
- }
- std::string
- NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits == 1)
- return "pred";
- else if (NumBits <= 64) {
- std::string name = "u";
- return name + utostr(NumBits);
- } else {
- llvm_unreachable("Integer too large");
- break;
- }
- break;
- }
- case Type::HalfTyID:
- // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
- return "b16";
- case Type::FloatTyID:
- return "f32";
- case Type::DoubleTyID:
- return "f64";
- case Type::PointerTyID: {
- unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
- assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
- if (PtrSize == 64)
- if (useB4PTR)
- return "b64";
- else
- return "u64";
- else if (useB4PTR)
- return "b32";
- else
- return "u32";
- }
- default:
- break;
- }
- llvm_unreachable("unexpected type");
- }
- void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
- raw_ostream &O,
- const NVPTXSubtarget &STI) {
- const DataLayout &DL = getDataLayout();
- // GlobalVariables are always constant pointers themselves.
- Type *ETy = GVar->getValueType();
- O << ".";
- emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
- if (isManaged(*GVar)) {
- if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
- report_fatal_error(
- ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
- }
- O << " .attribute(.managed)";
- }
- if (MaybeAlign A = GVar->getAlign())
- O << " .align " << A->value();
- else
- O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
- // Special case for i128
- if (ETy->isIntegerTy(128)) {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[16]";
- return;
- }
- if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
- O << " .";
- O << getPTXFundamentalTypeStr(ETy);
- O << " ";
- getSymbol(GVar)->print(O, MAI);
- return;
- }
- int64_t ElementSize = 0;
- // Although PTX has direct support for struct type and array type and LLVM IR
- // is very similar to PTX, the LLVM CodeGen does not support for targets that
- // support these high level field accesses. Structs and arrays are lowered
- // into arrays of bytes.
- switch (ETy->getTypeID()) {
- case Type::StructTyID:
- case Type::ArrayTyID:
- case Type::FixedVectorTyID:
- ElementSize = DL.getTypeStoreSize(ETy);
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[";
- if (ElementSize) {
- O << ElementSize;
- }
- O << "]";
- break;
- default:
- llvm_unreachable("type not supported yet");
- }
- }
- void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
- int paramIndex, raw_ostream &O) {
- getSymbol(I->getParent())->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout &DL = getDataLayout();
- const AttributeList &PAL = F->getAttributes();
- const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
- Function::const_arg_iterator I, E;
- unsigned paramIndex = 0;
- bool first = true;
- bool isKernelFunc = isKernelFunction(*F);
- bool isABI = (STI.getSmVersion() >= 20);
- bool hasImageHandles = STI.hasImageHandles();
- if (F->arg_empty() && !F->isVarArg()) {
- O << "()\n";
- return;
- }
- O << "(\n";
- for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
- Type *Ty = I->getType();
- if (!first)
- O << ",\n";
- first = false;
- // Handle image/sampler parameters
- if (isKernelFunction(*F)) {
- if (isSampler(*I) || isImage(*I)) {
- if (isImage(*I)) {
- std::string sname = std::string(I->getName());
- if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (hasImageHandles)
- O << "\t.param .u64 .ptr .surfref ";
- else
- O << "\t.param .surfref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- else { // Default image is read_only
- if (hasImageHandles)
- O << "\t.param .u64 .ptr .texref ";
- else
- O << "\t.param .texref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- } else {
- if (hasImageHandles)
- O << "\t.param .u64 .ptr .samplerref ";
- else
- O << "\t.param .samplerref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- continue;
- }
- }
- auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
- paramIndex](Type *Ty) -> Align {
- Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
- MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
- return std::max(TypeAlign, ParamAlign.valueOrOne());
- };
- if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
- if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
- // Just print .param .align <a> .b8 .param[size];
- // <a> = optimal alignment for the element type; always multiple of
- // PAL.getParamAlignment
- // size = typeallocsize of element type
- Align OptimalAlign = getOptimalAlignForParam(Ty);
- O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
- printParamName(I, paramIndex, O);
- O << "[" << DL.getTypeAllocSize(Ty) << "]";
- continue;
- }
- // Just a scalar
- auto *PTy = dyn_cast<PointerType>(Ty);
- unsigned PTySizeInBits = 0;
- if (PTy) {
- PTySizeInBits =
- TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
- assert(PTySizeInBits && "Invalid pointer size");
- }
- if (isKernelFunc) {
- if (PTy) {
- // Special handling for pointer arguments to kernel
- O << "\t.param .u" << PTySizeInBits << " ";
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
- NVPTX::CUDA) {
- int addrSpace = PTy->getAddressSpace();
- switch (addrSpace) {
- default:
- O << ".ptr ";
- break;
- case ADDRESS_SPACE_CONST:
- O << ".ptr .const ";
- break;
- case ADDRESS_SPACE_SHARED:
- O << ".ptr .shared ";
- break;
- case ADDRESS_SPACE_GLOBAL:
- O << ".ptr .global ";
- break;
- }
- Align ParamAlign = I->getParamAlign().valueOrOne();
- O << ".align " << ParamAlign.value() << " ";
- }
- printParamName(I, paramIndex, O);
- continue;
- }
- // non-pointer scalar to kernel func
- O << "\t.param .";
- // Special case: predicate operands become .u8 types
- if (Ty->isIntegerTy(1))
- O << "u8";
- else
- O << getPTXFundamentalTypeStr(Ty);
- O << " ";
- printParamName(I, paramIndex, O);
- continue;
- }
- // Non-kernel function, just print .param .b<size> for ABI
- // and .reg .b<size> for non-ABI
- unsigned sz = 0;
- if (isa<IntegerType>(Ty)) {
- sz = cast<IntegerType>(Ty)->getBitWidth();
- sz = promoteScalarArgumentSize(sz);
- } else if (PTy) {
- assert(PTySizeInBits && "Invalid pointer size");
- sz = PTySizeInBits;
- } else if (Ty->isHalfTy())
- // PTX ABI requires all scalar parameters to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type
- // in PTX, so its size must be adjusted here, too.
- sz = 32;
- else
- sz = Ty->getPrimitiveSizeInBits();
- if (isABI)
- O << "\t.param .b" << sz << " ";
- else
- O << "\t.reg .b" << sz << " ";
- printParamName(I, paramIndex, O);
- continue;
- }
- // param has byVal attribute.
- Type *ETy = PAL.getParamByValType(paramIndex);
- assert(ETy && "Param should have byval type");
- if (isABI || isKernelFunc) {
- // Just print .param .align <a> .b8 .param[size];
- // <a> = optimal alignment for the element type; always multiple of
- // PAL.getParamAlignment
- // size = typeallocsize of element type
- Align OptimalAlign =
- isKernelFunc
- ? getOptimalAlignForParam(ETy)
- : TLI->getFunctionByValParamAlign(
- F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
- unsigned sz = DL.getTypeAllocSize(ETy);
- O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
- printParamName(I, paramIndex, O);
- O << "[" << sz << "]";
- continue;
- } else {
- // Split the ETy into constituent parts and
- // print .param .b<size> <name> for each part.
- // Further, if a part is vector, print the above for
- // each vector element.
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, DL, ETy, vtparts);
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger())
- sz = promoteScalarArgumentSize(sz);
- O << "\t.reg .b" << sz << " ";
- printParamName(I, paramIndex, O);
- if (j < je - 1)
- O << ",\n";
- ++paramIndex;
- }
- if (i < e - 1)
- O << ",\n";
- }
- --paramIndex;
- continue;
- }
- }
- if (F->isVarArg()) {
- if (!first)
- O << ",\n";
- O << "\t.param .align " << STI.getMaxRequiredAlignment();
- O << " .b8 ";
- getSymbol(F)->print(O, MAI);
- O << "_vararg[]";
- }
- O << "\n)\n";
- }
- void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
- raw_ostream &O) {
- const Function &F = MF.getFunction();
- emitFunctionParamList(&F, O);
- }
- void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
- const MachineFunction &MF) {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
- // Map the global virtual register number to a register class specific
- // virtual register number starting from 1 with that class.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- //unsigned numRegClasses = TRI->getNumRegClasses();
- // Emit the Fake Stack Object
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- int NumBytes = (int) MFI.getStackSize();
- if (NumBytes) {
- O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
- << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
- if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
- O << "\t.reg .b64 \t%SP;\n";
- O << "\t.reg .b64 \t%SPL;\n";
- } else {
- O << "\t.reg .b32 \t%SP;\n";
- O << "\t.reg .b32 \t%SPL;\n";
- }
- }
- // Go through all virtual registers to establish the mapping between the
- // global virtual
- // register number and the per class virtual register number.
- // We use the per class virtual register number in the ptx output.
- unsigned int numVRs = MRI->getNumVirtRegs();
- for (unsigned i = 0; i < numVRs; i++) {
- Register vr = Register::index2VirtReg(i);
- const TargetRegisterClass *RC = MRI->getRegClass(vr);
- DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
- int n = regmap.size();
- regmap.insert(std::make_pair(vr, n + 1));
- }
- // Emit register declarations
- // @TODO: Extract out the real register usage
- // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
- // Emit declaration of the virtual registers or 'physical' registers for
- // each register class
- for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
- const TargetRegisterClass *RC = TRI->getRegClass(i);
- DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
- std::string rcname = getNVPTXRegClassName(RC);
- std::string rcStr = getNVPTXRegClassStr(RC);
- int n = regmap.size();
- // Only declare those registers that may be used.
- if (n) {
- O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
- << ">;\n";
- }
- }
- OutStreamer->emitRawText(O.str());
- }
- void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
- APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
- bool ignored;
- unsigned int numHex;
- const char *lead;
- if (Fp->getType()->getTypeID() == Type::FloatTyID) {
- numHex = 8;
- lead = "0f";
- APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
- } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
- numHex = 16;
- lead = "0d";
- APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
- } else
- llvm_unreachable("unsupported fp type");
- APInt API = APF.bitcastToAPInt();
- O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
- }
- void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- O << CI->getValue();
- return;
- }
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
- printFPConstant(CFP, O);
- return;
- }
- if (isa<ConstantPointerNull>(CPV)) {
- O << "0";
- return;
- }
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- bool IsNonGenericPointer = false;
- if (GVar->getType()->getAddressSpace() != 0) {
- IsNonGenericPointer = true;
- }
- if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
- O << "generic(";
- getSymbol(GVar)->print(O, MAI);
- O << ")";
- } else {
- getSymbol(GVar)->print(O, MAI);
- }
- return;
- }
- if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
- printMCExpr(*E, O);
- return;
- }
- llvm_unreachable("Not scalar type found in printScalarConstant()");
- }
- void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
- AggBuffer *AggBuffer) {
- const DataLayout &DL = getDataLayout();
- int AllocSize = DL.getTypeAllocSize(CPV->getType());
- if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
- // only the space allocated by CPV.
- AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
- return;
- }
- // Helper for filling AggBuffer with APInts.
- auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
- size_t NumBytes = (Val.getBitWidth() + 7) / 8;
- SmallVector<unsigned char, 16> Buf(NumBytes);
- for (unsigned I = 0; I < NumBytes; ++I) {
- Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
- }
- AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
- };
- switch (CPV->getType()->getTypeID()) {
- case Type::IntegerTyID:
- if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
- AddIntToBuffer(CI->getValue());
- break;
- }
- if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *CI =
- dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
- AddIntToBuffer(CI->getValue());
- break;
- }
- if (Cexpr->getOpcode() == Instruction::PtrToInt) {
- Value *V = Cexpr->getOperand(0)->stripPointerCasts();
- AggBuffer->addSymbol(V, Cexpr->getOperand(0));
- AggBuffer->addZeros(AllocSize);
- break;
- }
- }
- llvm_unreachable("unsupported integer const type");
- break;
- case Type::HalfTyID:
- case Type::BFloatTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
- break;
- case Type::PointerTyID: {
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- AggBuffer->addSymbol(GVar, GVar);
- } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- const Value *v = Cexpr->stripPointerCasts();
- AggBuffer->addSymbol(v, Cexpr);
- }
- AggBuffer->addZeros(AllocSize);
- break;
- }
- case Type::ArrayTyID:
- case Type::FixedVectorTyID:
- case Type::StructTyID: {
- if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
- bufferAggregateConstant(CPV, AggBuffer);
- if (Bytes > AllocSize)
- AggBuffer->addZeros(Bytes - AllocSize);
- } else if (isa<ConstantAggregateZero>(CPV))
- AggBuffer->addZeros(Bytes);
- else
- llvm_unreachable("Unexpected Constant type");
- break;
- }
- default:
- llvm_unreachable("unsupported type");
- }
- }
- void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
- AggBuffer *aggBuffer) {
- const DataLayout &DL = getDataLayout();
- int Bytes;
- // Integers of arbitrary width
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- APInt Val = CI->getValue();
- for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
- uint8_t Byte = Val.getLoBits(8).getZExtValue();
- aggBuffer->addBytes(&Byte, 1, 1);
- Val.lshrInPlace(8);
- }
- return;
- }
- // Old constants
- if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
- if (CPV->getNumOperands())
- for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
- bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
- return;
- }
- if (const ConstantDataSequential *CDS =
- dyn_cast<ConstantDataSequential>(CPV)) {
- if (CDS->getNumElements())
- for (unsigned i = 0; i < CDS->getNumElements(); ++i)
- bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
- aggBuffer);
- return;
- }
- if (isa<ConstantStruct>(CPV)) {
- if (CPV->getNumOperands()) {
- StructType *ST = cast<StructType>(CPV->getType());
- for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
- if (i == (e - 1))
- Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
- DL.getTypeAllocSize(ST) -
- DL.getStructLayout(ST)->getElementOffset(i);
- else
- Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
- DL.getStructLayout(ST)->getElementOffset(i);
- bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
- }
- }
- return;
- }
- llvm_unreachable("unsupported constant type in printAggregateConstant()");
- }
- /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
- /// a copy from AsmPrinter::lowerConstant, except customized to only handle
- /// expressions that are representable in PTX and create
- /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
- const MCExpr *
- NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
- MCContext &Ctx = OutContext;
- if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::create(0, Ctx);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::create(CI->getZExtValue(), Ctx);
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- const MCSymbolRefExpr *Expr =
- MCSymbolRefExpr::create(getSymbol(GV), Ctx);
- if (ProcessingGeneric) {
- return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
- } else {
- return Expr;
- }
- }
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (!CE) {
- llvm_unreachable("Unknown constant value to lower!");
- }
- switch (CE->getOpcode()) {
- default: {
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- Constant *C = ConstantFoldConstant(CE, getDataLayout());
- if (C != CE)
- return lowerConstantForGV(C, ProcessingGeneric);
- // Otherwise report the problem to the user.
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
- }
- case Instruction::AddrSpaceCast: {
- // Strip the addrspacecast and pass along the operand
- PointerType *DstTy = cast<PointerType>(CE->getType());
- if (DstTy->getAddressSpace() == 0) {
- return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
- }
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
- }
- case Instruction::GetElementPtr: {
- const DataLayout &DL = getDataLayout();
- // Generate a symbolic expression for the byte address
- APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
- const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
- ProcessingGeneric);
- if (!OffsetAI)
- return Base;
- int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
- Ctx);
- }
- case Instruction::Trunc:
- // We emit the value and depend on the assembler to truncate the generated
- // expression properly. This is important for differences between
- // blockaddress labels. Since the two labels are in the same function, it
- // is reasonable to treat their delta as a 32-bit value.
- [[fallthrough]];
- case Instruction::BitCast:
- return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
- case Instruction::IntToPtr: {
- const DataLayout &DL = getDataLayout();
- // Handle casts to pointers by changing them into casts to the appropriate
- // integer type. This promotes constant folding and simplifies this code.
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
- false/*ZExt*/);
- return lowerConstantForGV(Op, ProcessingGeneric);
- }
- case Instruction::PtrToInt: {
- const DataLayout &DL = getDataLayout();
- // Support only foldable casts to/from pointers that can be eliminated by
- // changing the pointer to the appropriately sized integer type.
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
- const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
- // We can emit the pointer value into this slot if the slot is an
- // integer slot equal to the size of the pointer.
- if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
- return OpExpr;
- // Otherwise the pointer is smaller than the resultant integer, mask off
- // the high bits so we are sure to get a proper truncation if the input is
- // a constant expr.
- unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
- return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
- }
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add: {
- const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
- const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
- switch (CE->getOpcode()) {
- default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
- }
- }
- }
- }
- // Copy of MCExpr::print customized for NVPTX
- void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
- switch (Expr.getKind()) {
- case MCExpr::Target:
- return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
- case MCExpr::Constant:
- OS << cast<MCConstantExpr>(Expr).getValue();
- return;
- case MCExpr::SymbolRef: {
- const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
- const MCSymbol &Sym = SRE.getSymbol();
- Sym.print(OS, MAI);
- return;
- }
- case MCExpr::Unary: {
- const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
- switch (UE.getOpcode()) {
- case MCUnaryExpr::LNot: OS << '!'; break;
- case MCUnaryExpr::Minus: OS << '-'; break;
- case MCUnaryExpr::Not: OS << '~'; break;
- case MCUnaryExpr::Plus: OS << '+'; break;
- }
- printMCExpr(*UE.getSubExpr(), OS);
- return;
- }
- case MCExpr::Binary: {
- const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
- // Only print parens around the LHS if it is non-trivial.
- if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
- isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
- printMCExpr(*BE.getLHS(), OS);
- } else {
- OS << '(';
- printMCExpr(*BE.getLHS(), OS);
- OS<< ')';
- }
- switch (BE.getOpcode()) {
- case MCBinaryExpr::Add:
- // Print "X-42" instead of "X+-42".
- if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
- if (RHSC->getValue() < 0) {
- OS << RHSC->getValue();
- return;
- }
- }
- OS << '+';
- break;
- default: llvm_unreachable("Unhandled binary operator");
- }
- // Only print parens around the LHS if it is non-trivial.
- if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
- printMCExpr(*BE.getRHS(), OS);
- } else {
- OS << '(';
- printMCExpr(*BE.getRHS(), OS);
- OS << ')';
- }
- return;
- }
- }
- llvm_unreachable("Invalid expression kind!");
- }
- /// PrintAsmOperand - Print out an operand for an inline asm expression.
- ///
- bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- const char *ExtraCode, raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0)
- return true; // Unknown modifier.
- switch (ExtraCode[0]) {
- default:
- // See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
- case 'r':
- break;
- }
- }
- printOperand(MI, OpNo, O);
- return false;
- }
- bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
- O << '[';
- printMemOperand(MI, OpNo, O);
- O << ']';
- return false;
- }
- void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- if (MO.getReg().isPhysical()) {
- if (MO.getReg() == NVPTX::VRDepot)
- O << DEPOTNAME << getFunctionNumber();
- else
- O << NVPTXInstPrinter::getRegisterName(MO.getReg());
- } else {
- emitVirtualRegister(MO.getReg(), O);
- }
- break;
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- break;
- case MachineOperand::MO_FPImmediate:
- printFPConstant(MO.getFPImm(), O);
- break;
- case MachineOperand::MO_GlobalAddress:
- PrintSymbolOperand(MO, O);
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MO.getMBB()->getSymbol()->print(O, MAI);
- break;
- default:
- llvm_unreachable("Operand type not supported.");
- }
- }
- void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O, const char *Modifier) {
- printOperand(MI, opNum, O);
- if (Modifier && strcmp(Modifier, "add") == 0) {
- O << ", ";
- printOperand(MI, opNum + 1, O);
- } else {
- if (MI->getOperand(opNum + 1).isImm() &&
- MI->getOperand(opNum + 1).getImm() == 0)
- return; // don't print ',0' or '+0'
- O << "+";
- printOperand(MI, opNum + 1, O);
- }
- }
- // Force static initialization.
- extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() {
- RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
- RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
- }
|