1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152 |
- //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file contains a printer that converts from our internal representation
- // of machine-dependent LLVM code to NVPTX assembly language.
- //
- //===----------------------------------------------------------------------===//
- #include "NVPTXAsmPrinter.h"
- #include "MCTargetDesc/NVPTXBaseInfo.h"
- #include "MCTargetDesc/NVPTXInstPrinter.h"
- #include "MCTargetDesc/NVPTXMCAsmInfo.h"
- #include "MCTargetDesc/NVPTXTargetStreamer.h"
- #include "NVPTX.h"
- #include "NVPTXMCExpr.h"
- #include "NVPTXMachineFunctionInfo.h"
- #include "NVPTXRegisterInfo.h"
- #include "NVPTXSubtarget.h"
- #include "NVPTXTargetMachine.h"
- #include "NVPTXUtilities.h"
- #include "TargetInfo/NVPTXTargetInfo.h"
- #include "cl_common_defines.h"
- #include "llvm/ADT/APFloat.h"
- #include "llvm/ADT/APInt.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/DenseSet.h"
- #include "llvm/ADT/SmallString.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/StringExtras.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/ADT/Triple.h"
- #include "llvm/ADT/Twine.h"
- #include "llvm/Analysis/ConstantFolding.h"
- #include "llvm/CodeGen/Analysis.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineFunction.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineLoopInfo.h"
- #include "llvm/CodeGen/MachineModuleInfo.h"
- #include "llvm/CodeGen/MachineOperand.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/TargetLowering.h"
- #include "llvm/CodeGen/TargetRegisterInfo.h"
- #include "llvm/CodeGen/ValueTypes.h"
- #include "llvm/IR/Attributes.h"
- #include "llvm/IR/BasicBlock.h"
- #include "llvm/IR/Constant.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/DataLayout.h"
- #include "llvm/IR/DebugInfo.h"
- #include "llvm/IR/DebugInfoMetadata.h"
- #include "llvm/IR/DebugLoc.h"
- #include "llvm/IR/DerivedTypes.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/GlobalValue.h"
- #include "llvm/IR/GlobalVariable.h"
- #include "llvm/IR/Instruction.h"
- #include "llvm/IR/LLVMContext.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/Operator.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/User.h"
- #include "llvm/MC/MCExpr.h"
- #include "llvm/MC/MCInst.h"
- #include "llvm/MC/MCInstrDesc.h"
- #include "llvm/MC/MCStreamer.h"
- #include "llvm/MC/MCSymbol.h"
- #include "llvm/MC/TargetRegistry.h"
- #include "llvm/Support/Casting.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/ErrorHandling.h"
- #include "llvm/Support/MachineValueType.h"
- #include "llvm/Support/Path.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Target/TargetLoweringObjectFile.h"
- #include "llvm/Target/TargetMachine.h"
- #include "llvm/Transforms/Utils/UnrollLoop.h"
- #include <cassert>
- #include <cstdint>
- #include <cstring>
- #include <new>
- #include <string>
- #include <utility>
- #include <vector>
- using namespace llvm;
- #define DEPOTNAME "__local_depot"
- /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
- /// depends.
- static void
- DiscoverDependentGlobals(const Value *V,
- DenseSet<const GlobalVariable *> &Globals) {
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- Globals.insert(GV);
- else {
- if (const User *U = dyn_cast<User>(V)) {
- for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
- DiscoverDependentGlobals(U->getOperand(i), Globals);
- }
- }
- }
- }
- /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
- /// instances to be emitted, but only after any dependents have been added
- /// first.s
- static void
- VisitGlobalVariableForEmission(const GlobalVariable *GV,
- SmallVectorImpl<const GlobalVariable *> &Order,
- DenseSet<const GlobalVariable *> &Visited,
- DenseSet<const GlobalVariable *> &Visiting) {
- // Have we already visited this one?
- if (Visited.count(GV))
- return;
- // Do we have a circular dependency?
- if (!Visiting.insert(GV).second)
- report_fatal_error("Circular dependency found in global variable set");
- // Make sure we visit all dependents first
- DenseSet<const GlobalVariable *> Others;
- for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
- DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (const GlobalVariable *GV : Others)
- VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
- // Now we can visit ourself
- Order.push_back(GV);
- Visited.insert(GV);
- Visiting.erase(GV);
- }
- void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
- MCInst Inst;
- lowerToMCInst(MI, Inst);
- EmitToStreamer(*OutStreamer, Inst);
- }
- // Handle symbol backtracking for targets that do not support image handles
- bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
- unsigned OpNo, MCOperand &MCOp) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.TSFlags & NVPTXII::IsTexFlag) {
- // This is a texture fetch, so operand 4 is a texref and operand 5 is
- // a samplerref
- if (OpNo == 4 && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
- unsigned VecSize =
- 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
- // For a surface load of vector size N, the Nth operand will be the surfref
- if (OpNo == VecSize && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
- // This is a surface store, so operand 0 is a surfref
- if (OpNo == 0 && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
- // This is a query, so operand 1 is a surfref/texref
- if (OpNo == 1 && MO.isImm()) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
- return false;
- }
- return false;
- }
- void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
- // Ewwww
- LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
- NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
- const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
- const char *Sym = MFI->getImageHandleSymbol(Index);
- std::string *SymNamePtr =
- nvTM.getManagedStrPool()->getManagedString(Sym);
- MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
- }
- void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
- OutMI.setOpcode(MI->getOpcode());
- // Special: Do not mangle symbol operand of CALL_PROTOTYPE
- if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
- const MachineOperand &MO = MI->getOperand(0);
- OutMI.addOperand(GetSymbolRef(
- OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
- return;
- }
- const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp;
- if (!STI.hasImageHandles()) {
- if (lowerImageHandleOperand(MI, i, MCOp)) {
- OutMI.addOperand(MCOp);
- continue;
- }
- }
- if (lowerOperand(MO, MCOp))
- OutMI.addOperand(MCOp);
- }
- }
- bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
- MCOperand &MCOp) {
- switch (MO.getType()) {
- default: llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
- MO.getMBB()->getSymbol(), OutContext));
- break;
- case MachineOperand::MO_ExternalSymbol:
- MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
- break;
- case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
- break;
- case MachineOperand::MO_FPImmediate: {
- const ConstantFP *Cnt = MO.getFPImm();
- const APFloat &Val = Cnt->getValueAPF();
- switch (Cnt->getType()->getTypeID()) {
- default: report_fatal_error("Unsupported FP type"); break;
- case Type::HalfTyID:
- MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
- break;
- case Type::FloatTyID:
- MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
- break;
- case Type::DoubleTyID:
- MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
- break;
- }
- break;
- }
- }
- return true;
- }
- unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
- if (Register::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
- unsigned RegNum = RegMap[Reg];
- // Encode the register class in the upper 4 bits
- // Must be kept in sync with NVPTXInstPrinter::printRegName
- unsigned Ret = 0;
- if (RC == &NVPTX::Int1RegsRegClass) {
- Ret = (1 << 28);
- } else if (RC == &NVPTX::Int16RegsRegClass) {
- Ret = (2 << 28);
- } else if (RC == &NVPTX::Int32RegsRegClass) {
- Ret = (3 << 28);
- } else if (RC == &NVPTX::Int64RegsRegClass) {
- Ret = (4 << 28);
- } else if (RC == &NVPTX::Float32RegsRegClass) {
- Ret = (5 << 28);
- } else if (RC == &NVPTX::Float64RegsRegClass) {
- Ret = (6 << 28);
- } else if (RC == &NVPTX::Float16RegsRegClass) {
- Ret = (7 << 28);
- } else if (RC == &NVPTX::Float16x2RegsRegClass) {
- Ret = (8 << 28);
- } else {
- report_fatal_error("Bad register class");
- }
- // Insert the vreg number
- Ret |= (RegNum & 0x0FFFFFFF);
- return Ret;
- } else {
- // Some special-use registers are actually physical registers.
- // Encode this as the register class ID of 0 and the real register ID.
- return Reg & 0x0FFFFFFF;
- }
- }
- MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
- const MCExpr *Expr;
- Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- OutContext);
- return MCOperand::createExpr(Expr);
- }
- void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout &DL = getDataLayout();
- const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const TargetLowering *TLI = STI.getTargetLowering();
- Type *Ty = F->getReturnType();
- bool isABI = (STI.getSmVersion() >= 20);
- if (Ty->getTypeID() == Type::VoidTyID)
- return;
- O << " (";
- if (isABI) {
- if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
- unsigned size = 0;
- if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
- size = ITy->getBitWidth();
- } else {
- assert(Ty->isFloatingPointTy() && "Floating point type expected here");
- size = Ty->getPrimitiveSizeInBits();
- }
- // PTX ABI requires all scalar return values to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type in
- // PTX, so its size must be adjusted here, too.
- if (size < 32)
- size = 32;
- O << ".param .b" << size << " func_retval0";
- } else if (isa<PointerType>(Ty)) {
- O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
- << " func_retval0";
- } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
- unsigned totalsz = DL.getTypeAllocSize(Ty);
- unsigned retAlignment = 0;
- if (!getAlign(*F, 0, retAlignment))
- retAlignment = DL.getABITypeAlignment(Ty);
- O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
- << "]";
- } else
- llvm_unreachable("Unknown return type");
- } else {
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, DL, Ty, vtparts);
- unsigned idx = 0;
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32))
- sz = 32;
- O << ".reg .b" << sz << " func_retval" << idx;
- if (j < je - 1)
- O << ", ";
- ++idx;
- }
- if (i < e - 1)
- O << ", ";
- }
- }
- O << ") ";
- }
- void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
- raw_ostream &O) {
- const Function &F = MF.getFunction();
- printReturnValStr(&F, O);
- }
- // Return true if MBB is the header of a loop marked with
- // llvm.loop.unroll.disable.
- // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
- bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
- const MachineBasicBlock &MBB) const {
- MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
- // We insert .pragma "nounroll" only to the loop header.
- if (!LI.isLoopHeader(&MBB))
- return false;
- // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
- // we iterate through each back edge of the loop with header MBB, and check
- // whether its metadata contains llvm.loop.unroll.disable.
- for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
- if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
- // Edges from other loops to MBB are not back edges.
- continue;
- }
- if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
- if (MDNode *LoopID =
- PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
- if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
- return true;
- }
- }
- }
- return false;
- }
- void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
- AsmPrinter::emitBasicBlockStart(MBB);
- if (isLoopHeaderOfNoUnroll(MBB))
- OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
- }
- void NVPTXAsmPrinter::emitFunctionEntryLabel() {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
- if (!GlobalsEmitted) {
- emitGlobals(*MF->getFunction().getParent());
- GlobalsEmitted = true;
- }
- // Set up
- MRI = &MF->getRegInfo();
- F = &MF->getFunction();
- emitLinkageDirective(F, O);
- if (isKernelFunction(*F))
- O << ".entry ";
- else {
- O << ".func ";
- printReturnValStr(*MF, O);
- }
- CurrentFnSym->print(O, MAI);
- emitFunctionParamList(*MF, O);
- if (isKernelFunction(*F))
- emitKernelFunctionDirectives(*F, O);
- OutStreamer->emitRawText(O.str());
- VRegMapping.clear();
- // Emit open brace for function body.
- OutStreamer->emitRawText(StringRef("{\n"));
- setAndEmitFunctionVirtualRegisters(*MF);
- // Emit initial .loc debug directive for correct relocation symbol data.
- if (MMI && MMI->hasDebugInfo())
- emitInitialRawDwarfLocDirective(*MF);
- }
- bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
- bool Result = AsmPrinter::runOnMachineFunction(F);
- // Emit closing brace for the body of function F.
- // The closing brace must be emitted here because we need to emit additional
- // debug labels/data after the last basic block.
- // We need to emit the closing brace here because we don't have function that
- // finished emission of the function body.
- OutStreamer->emitRawText(StringRef("}\n"));
- return Result;
- }
- void NVPTXAsmPrinter::emitFunctionBodyStart() {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
- emitDemotedVars(&MF->getFunction(), O);
- OutStreamer->emitRawText(O.str());
- }
- void NVPTXAsmPrinter::emitFunctionBodyEnd() {
- VRegMapping.clear();
- }
- const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
- SmallString<128> Str;
- raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber();
- return OutContext.getOrCreateSymbol(Str);
- }
- void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
- Register RegNo = MI->getOperand(0).getReg();
- if (Register::isVirtualRegister(RegNo)) {
- OutStreamer->AddComment(Twine("implicit-def: ") +
- getVirtualRegisterName(RegNo));
- } else {
- const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
- OutStreamer->AddComment(Twine("implicit-def: ") +
- STI.getRegisterInfo()->getName(RegNo));
- }
- OutStreamer->AddBlankLine();
- }
- void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
- raw_ostream &O) const {
- // If the NVVM IR has some of reqntid* specified, then output
- // the reqntid directive, and set the unspecified ones to 1.
- // If none of reqntid* is specified, don't output reqntid directive.
- unsigned reqntidx, reqntidy, reqntidz;
- bool specified = false;
- if (!getReqNTIDx(F, reqntidx))
- reqntidx = 1;
- else
- specified = true;
- if (!getReqNTIDy(F, reqntidy))
- reqntidy = 1;
- else
- specified = true;
- if (!getReqNTIDz(F, reqntidz))
- reqntidz = 1;
- else
- specified = true;
- if (specified)
- O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
- << "\n";
- // If the NVVM IR has some of maxntid* specified, then output
- // the maxntid directive, and set the unspecified ones to 1.
- // If none of maxntid* is specified, don't output maxntid directive.
- unsigned maxntidx, maxntidy, maxntidz;
- specified = false;
- if (!getMaxNTIDx(F, maxntidx))
- maxntidx = 1;
- else
- specified = true;
- if (!getMaxNTIDy(F, maxntidy))
- maxntidy = 1;
- else
- specified = true;
- if (!getMaxNTIDz(F, maxntidz))
- maxntidz = 1;
- else
- specified = true;
- if (specified)
- O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
- << "\n";
- unsigned mincta;
- if (getMinCTASm(F, mincta))
- O << ".minnctapersm " << mincta << "\n";
- unsigned maxnreg;
- if (getMaxNReg(F, maxnreg))
- O << ".maxnreg " << maxnreg << "\n";
- }
- std::string
- NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- std::string Name;
- raw_string_ostream NameStr(Name);
- VRegRCMap::const_iterator I = VRegMapping.find(RC);
- assert(I != VRegMapping.end() && "Bad register class");
- const DenseMap<unsigned, unsigned> &RegMap = I->second;
- VRegMap::const_iterator VI = RegMap.find(Reg);
- assert(VI != RegMap.end() && "Bad virtual register");
- unsigned MappedVR = VI->second;
- NameStr << getNVPTXRegClassStr(RC) << MappedVR;
- NameStr.flush();
- return Name;
- }
- void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
- raw_ostream &O) {
- O << getVirtualRegisterName(vr);
- }
- void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
- emitLinkageDirective(F, O);
- if (isKernelFunction(*F))
- O << ".entry ";
- else
- O << ".func ";
- printReturnValStr(F, O);
- getSymbol(F)->print(O, MAI);
- O << "\n";
- emitFunctionParamList(F, O);
- O << ";\n";
- }
- static bool usedInGlobalVarDef(const Constant *C) {
- if (!C)
- return false;
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- return GV->getName() != "llvm.used";
- }
- for (const User *U : C->users())
- if (const Constant *C = dyn_cast<Constant>(U))
- if (usedInGlobalVarDef(C))
- return true;
- return false;
- }
- static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
- if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
- if (othergv->getName() == "llvm.used")
- return true;
- }
- if (const Instruction *instr = dyn_cast<Instruction>(U)) {
- if (instr->getParent() && instr->getParent()->getParent()) {
- const Function *curFunc = instr->getParent()->getParent();
- if (oneFunc && (curFunc != oneFunc))
- return false;
- oneFunc = curFunc;
- return true;
- } else
- return false;
- }
- for (const User *UU : U->users())
- if (!usedInOneFunc(UU, oneFunc))
- return false;
- return true;
- }
- /* Find out if a global variable can be demoted to local scope.
- * Currently, this is valid for CUDA shared variables, which have local
- * scope and global lifetime. So the conditions to check are :
- * 1. Is the global variable in shared address space?
- * 2. Does it have internal linkage?
- * 3. Is the global variable referenced only in one function?
- */
- static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (!gv->hasInternalLinkage())
- return false;
- PointerType *Pty = gv->getType();
- if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
- return false;
- const Function *oneFunc = nullptr;
- bool flag = usedInOneFunc(gv, oneFunc);
- if (!flag)
- return false;
- if (!oneFunc)
- return false;
- f = oneFunc;
- return true;
- }
- static bool useFuncSeen(const Constant *C,
- DenseMap<const Function *, bool> &seenMap) {
- for (const User *U : C->users()) {
- if (const Constant *cu = dyn_cast<Constant>(U)) {
- if (useFuncSeen(cu, seenMap))
- return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
- const BasicBlock *bb = I->getParent();
- if (!bb)
- continue;
- const Function *caller = bb->getParent();
- if (!caller)
- continue;
- if (seenMap.find(caller) != seenMap.end())
- return true;
- }
- }
- return false;
- }
- void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
- DenseMap<const Function *, bool> seenMap;
- for (const Function &F : M) {
- if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
- emitDeclaration(&F, O);
- continue;
- }
- if (F.isDeclaration()) {
- if (F.use_empty())
- continue;
- if (F.getIntrinsicID())
- continue;
- emitDeclaration(&F, O);
- continue;
- }
- for (const User *U : F.users()) {
- if (const Constant *C = dyn_cast<Constant>(U)) {
- if (usedInGlobalVarDef(C)) {
- // The use is in the initialization of a global variable
- // that is a function pointer, so print a declaration
- // for the original function
- emitDeclaration(&F, O);
- break;
- }
- // Emit a declaration of this function if the function that
- // uses this constant expr has already been seen.
- if (useFuncSeen(C, seenMap)) {
- emitDeclaration(&F, O);
- break;
- }
- }
- if (!isa<Instruction>(U))
- continue;
- const Instruction *instr = cast<Instruction>(U);
- const BasicBlock *bb = instr->getParent();
- if (!bb)
- continue;
- const Function *caller = bb->getParent();
- if (!caller)
- continue;
- // If a caller has already been seen, then the caller is
- // appearing in the module before the callee. so print out
- // a declaration for the callee.
- if (seenMap.find(caller) != seenMap.end()) {
- emitDeclaration(&F, O);
- break;
- }
- }
- seenMap[&F] = true;
- }
- }
- static bool isEmptyXXStructor(GlobalVariable *GV) {
- if (!GV) return true;
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!InitList) return true; // Not an array; we don't know how to parse.
- return InitList->getNumOperands() == 0;
- }
- void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
- // Construct a default subtarget off of the TargetMachine defaults. The
- // rest of NVPTX isn't friendly to change subtargets per function and
- // so the default TargetMachine will have all of the options.
- const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
- SmallString<128> Str1;
- raw_svector_ostream OS1(Str1);
- // Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1, *STI);
- OutStreamer->emitRawText(OS1.str());
- }
- bool NVPTXAsmPrinter::doInitialization(Module &M) {
- if (M.alias_size()) {
- report_fatal_error("Module has aliases, which NVPTX does not support.");
- return true; // error
- }
- if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
- report_fatal_error(
- "Module has a nontrivial global ctor, which NVPTX does not support.");
- return true; // error
- }
- if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
- report_fatal_error(
- "Module has a nontrivial global dtor, which NVPTX does not support.");
- return true; // error
- }
- // We need to call the parent's one explicitly.
- bool Result = AsmPrinter::doInitialization(M);
- GlobalsEmitted = false;
- return Result;
- }
- void NVPTXAsmPrinter::emitGlobals(const Module &M) {
- SmallString<128> Str2;
- raw_svector_ostream OS2(Str2);
- emitDeclarations(M, OS2);
- // As ptxas does not support forward references of globals, we need to first
- // sort the list of module-level globals in def-use order. We visit each
- // global variable in order, and ensure that we emit it *after* its dependent
- // globals. We use a little extra memory maintaining both a set and a list to
- // have fast searches while maintaining a strict ordering.
- SmallVector<const GlobalVariable *, 8> Globals;
- DenseSet<const GlobalVariable *> GVVisited;
- DenseSet<const GlobalVariable *> GVVisiting;
- // Visit each global variable, in order
- for (const GlobalVariable &I : M.globals())
- VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
- assert(GVVisited.size() == M.getGlobalList().size() &&
- "Missed a global variable");
- assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
- // Print out module-level global variables in proper order
- for (unsigned i = 0, e = Globals.size(); i != e; ++i)
- printModuleLevelGV(Globals[i], OS2);
- OS2 << '\n';
- OutStreamer->emitRawText(OS2.str());
- }
- void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
- const NVPTXSubtarget &STI) {
- O << "//\n";
- O << "// Generated by LLVM NVPTX Back-End\n";
- O << "//\n";
- O << "\n";
- unsigned PTXVersion = STI.getPTXVersion();
- O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
- O << ".target ";
- O << STI.getTargetName();
- const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- if (NTM.getDrvInterface() == NVPTX::NVCL)
- O << ", texmode_independent";
- bool HasFullDebugInfo = false;
- for (DICompileUnit *CU : M.debug_compile_units()) {
- switch(CU->getEmissionKind()) {
- case DICompileUnit::NoDebug:
- case DICompileUnit::DebugDirectivesOnly:
- break;
- case DICompileUnit::LineTablesOnly:
- case DICompileUnit::FullDebug:
- HasFullDebugInfo = true;
- break;
- }
- if (HasFullDebugInfo)
- break;
- }
- if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
- O << ", debug";
- O << "\n";
- O << ".address_size ";
- if (NTM.is64Bit())
- O << "64";
- else
- O << "32";
- O << "\n";
- O << "\n";
- }
- bool NVPTXAsmPrinter::doFinalization(Module &M) {
- bool HasDebugInfo = MMI && MMI->hasDebugInfo();
- // If we did not emit any functions, then the global declarations have not
- // yet been emitted.
- if (!GlobalsEmitted) {
- emitGlobals(M);
- GlobalsEmitted = true;
- }
- // call doFinalization
- bool ret = AsmPrinter::doFinalization(M);
- clearAnnotationCache(&M);
- // Close the last emitted section
- if (HasDebugInfo) {
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
- ->closeLastSection();
- // Emit empty .debug_loc section for better support of the empty files.
- OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
- }
- // Output last DWARF .file directives, if any.
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
- ->outputDwarfFileDirectives();
- return ret;
- //bool Result = AsmPrinter::doFinalization(M);
- // Instead of calling the parents doFinalization, we may
- // clone parents doFinalization and customize here.
- // Currently, we if NVISA out the EmitGlobals() in
- // parent's doFinalization, which is too intrusive.
- //
- // Same for the doInitialization.
- //return Result;
- }
- // This function emits appropriate linkage directives for
- // functions and global variables.
- //
- // extern function declaration -> .extern
- // extern function definition -> .visible
- // external global variable with init -> .visible
- // external without init -> .extern
- // appending -> not allowed, assert.
- // for any linkage other than
- // internal, private, linker_private,
- // linker_private_weak, linker_private_weak_def_auto,
- // we emit -> .weak.
- void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
- raw_ostream &O) {
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
- if (V->hasExternalLinkage()) {
- if (isa<GlobalVariable>(V)) {
- const GlobalVariable *GVar = cast<GlobalVariable>(V);
- if (GVar) {
- if (GVar->hasInitializer())
- O << ".visible ";
- else
- O << ".extern ";
- }
- } else if (V->isDeclaration())
- O << ".extern ";
- else
- O << ".visible ";
- } else if (V->hasAppendingLinkage()) {
- std::string msg;
- msg.append("Error: ");
- msg.append("Symbol ");
- if (V->hasName())
- msg.append(std::string(V->getName()));
- msg.append("has unsupported appending linkage type");
- llvm_unreachable(msg.c_str());
- } else if (!V->hasInternalLinkage() &&
- !V->hasPrivateLinkage()) {
- O << ".weak ";
- }
- }
- }
- void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
- raw_ostream &O,
- bool processDemoted) {
- // Skip meta data
- if (GVar->hasSection()) {
- if (GVar->getSection() == "llvm.metadata")
- return;
- }
- // Skip LLVM intrinsic global variables
- if (GVar->getName().startswith("llvm.") ||
- GVar->getName().startswith("nvvm."))
- return;
- const DataLayout &DL = getDataLayout();
- // GlobalVariables are always constant pointers themselves.
- PointerType *PTy = GVar->getType();
- Type *ETy = GVar->getValueType();
- if (GVar->hasExternalLinkage()) {
- if (GVar->hasInitializer())
- O << ".visible ";
- else
- O << ".extern ";
- } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
- GVar->hasAvailableExternallyLinkage() ||
- GVar->hasCommonLinkage()) {
- O << ".weak ";
- }
- if (isTexture(*GVar)) {
- O << ".global .texref " << getTextureName(*GVar) << ";\n";
- return;
- }
- if (isSurface(*GVar)) {
- O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
- return;
- }
- if (GVar->isDeclaration()) {
- // (extern) declarations, no definition or initializer
- // Currently the only known declaration is for an automatic __local
- // (.shared) promoted to global.
- emitPTXGlobalVariable(GVar, O);
- O << ";\n";
- return;
- }
- if (isSampler(*GVar)) {
- O << ".global .samplerref " << getSamplerName(*GVar);
- const Constant *Initializer = nullptr;
- if (GVar->hasInitializer())
- Initializer = GVar->getInitializer();
- const ConstantInt *CI = nullptr;
- if (Initializer)
- CI = dyn_cast<ConstantInt>(Initializer);
- if (CI) {
- unsigned sample = CI->getZExtValue();
- O << " = { ";
- for (int i = 0,
- addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
- i < 3; i++) {
- O << "addr_mode_" << i << " = ";
- switch (addr) {
- case 0:
- O << "wrap";
- break;
- case 1:
- O << "clamp_to_border";
- break;
- case 2:
- O << "clamp_to_edge";
- break;
- case 3:
- O << "wrap";
- break;
- case 4:
- O << "mirror";
- break;
- }
- O << ", ";
- }
- O << "filter_mode = ";
- switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
- case 0:
- O << "nearest";
- break;
- case 1:
- O << "linear";
- break;
- case 2:
- llvm_unreachable("Anisotropic filtering is not supported");
- default:
- O << "nearest";
- break;
- }
- if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
- O << ", force_unnormalized_coords = 1";
- }
- O << " }";
- }
- O << ";\n";
- return;
- }
- if (GVar->hasPrivateLinkage()) {
- if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
- return;
- // FIXME - need better way (e.g. Metadata) to avoid generating this global
- if (strncmp(GVar->getName().data(), "filename", 8) == 0)
- return;
- if (GVar->use_empty())
- return;
- }
- const Function *demotedFunc = nullptr;
- if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
- O << "// " << GVar->getName() << " has been demoted\n";
- if (localDecls.find(demotedFunc) != localDecls.end())
- localDecls[demotedFunc].push_back(GVar);
- else {
- std::vector<const GlobalVariable *> temp;
- temp.push_back(GVar);
- localDecls[demotedFunc] = temp;
- }
- return;
- }
- O << ".";
- emitPTXAddressSpace(PTy->getAddressSpace(), O);
- if (isManaged(*GVar)) {
- O << " .attribute(.managed)";
- }
- if (MaybeAlign A = GVar->getAlign())
- O << " .align " << A->value();
- else
- O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
- if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
- (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
- O << " .";
- // Special case: ABI requires that we use .u8 for predicates
- if (ETy->isIntegerTy(1))
- O << "u8";
- else
- O << getPTXFundamentalTypeStr(ETy, false);
- O << " ";
- getSymbol(GVar)->print(O, MAI);
- // Ptx allows variable initilization only for constant and global state
- // spaces.
- if (GVar->hasInitializer()) {
- if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
- const Constant *Initializer = GVar->getInitializer();
- // 'undef' is treated as there is no value specified.
- if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
- O << " = ";
- printScalarConstant(Initializer, O);
- }
- } else {
- // The frontend adds zero-initializer to device and constant variables
- // that don't have an initial value, and UndefValue to shared
- // variables, so skip warning for this case.
- if (!GVar->getInitializer()->isNullValue() &&
- !isa<UndefValue>(GVar->getInitializer())) {
- report_fatal_error("initial value of '" + GVar->getName() +
- "' is not allowed in addrspace(" +
- Twine(PTy->getAddressSpace()) + ")");
- }
- }
- }
- } else {
- unsigned int ElementSize = 0;
- // Although PTX has direct support for struct type and array type and
- // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
- // targets that support these high level field accesses. Structs, arrays
- // and vectors are lowered into arrays of bytes.
- switch (ETy->getTypeID()) {
- case Type::IntegerTyID: // Integers larger than 64 bits
- case Type::StructTyID:
- case Type::ArrayTyID:
- case Type::FixedVectorTyID:
- ElementSize = DL.getTypeStoreSize(ETy);
- // Ptx allows variable initilization only for constant and
- // global state spaces.
- if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
- GVar->hasInitializer()) {
- const Constant *Initializer = GVar->getInitializer();
- if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
- AggBuffer aggBuffer(ElementSize, O, *this);
- bufferAggregateConstant(Initializer, &aggBuffer);
- if (aggBuffer.numSymbols) {
- if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
- O << " .u64 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[";
- O << ElementSize / 8;
- } else {
- O << " .u32 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[";
- O << ElementSize / 4;
- }
- O << "]";
- } else {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[";
- O << ElementSize;
- O << "]";
- }
- O << " = {";
- aggBuffer.print();
- O << "}";
- } else {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- if (ElementSize) {
- O << "[";
- O << ElementSize;
- O << "]";
- }
- }
- } else {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- if (ElementSize) {
- O << "[";
- O << ElementSize;
- O << "]";
- }
- }
- break;
- default:
- llvm_unreachable("type not supported yet");
- }
- }
- O << ";\n";
- }
- void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
- if (localDecls.find(f) == localDecls.end())
- return;
- std::vector<const GlobalVariable *> &gvars = localDecls[f];
- for (const GlobalVariable *GV : gvars) {
- O << "\t// demoted variable\n\t";
- printModuleLevelGV(GV, O, true);
- }
- }
- void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
- raw_ostream &O) const {
- switch (AddressSpace) {
- case ADDRESS_SPACE_LOCAL:
- O << "local";
- break;
- case ADDRESS_SPACE_GLOBAL:
- O << "global";
- break;
- case ADDRESS_SPACE_CONST:
- O << "const";
- break;
- case ADDRESS_SPACE_SHARED:
- O << "shared";
- break;
- default:
- report_fatal_error("Bad address space found while emitting PTX: " +
- llvm::Twine(AddressSpace));
- break;
- }
- }
- std::string
- NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits == 1)
- return "pred";
- else if (NumBits <= 64) {
- std::string name = "u";
- return name + utostr(NumBits);
- } else {
- llvm_unreachable("Integer too large");
- break;
- }
- break;
- }
- case Type::HalfTyID:
- // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
- return "b16";
- case Type::FloatTyID:
- return "f32";
- case Type::DoubleTyID:
- return "f64";
- case Type::PointerTyID:
- if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
- if (useB4PTR)
- return "b64";
- else
- return "u64";
- else if (useB4PTR)
- return "b32";
- else
- return "u32";
- default:
- break;
- }
- llvm_unreachable("unexpected type");
- }
- void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
- raw_ostream &O) {
- const DataLayout &DL = getDataLayout();
- // GlobalVariables are always constant pointers themselves.
- Type *ETy = GVar->getValueType();
- O << ".";
- emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
- if (MaybeAlign A = GVar->getAlign())
- O << " .align " << A->value();
- else
- O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
- // Special case for i128
- if (ETy->isIntegerTy(128)) {
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[16]";
- return;
- }
- if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
- O << " .";
- O << getPTXFundamentalTypeStr(ETy);
- O << " ";
- getSymbol(GVar)->print(O, MAI);
- return;
- }
- int64_t ElementSize = 0;
- // Although PTX has direct support for struct type and array type and LLVM IR
- // is very similar to PTX, the LLVM CodeGen does not support for targets that
- // support these high level field accesses. Structs and arrays are lowered
- // into arrays of bytes.
- switch (ETy->getTypeID()) {
- case Type::StructTyID:
- case Type::ArrayTyID:
- case Type::FixedVectorTyID:
- ElementSize = DL.getTypeStoreSize(ETy);
- O << " .b8 ";
- getSymbol(GVar)->print(O, MAI);
- O << "[";
- if (ElementSize) {
- O << ElementSize;
- }
- O << "]";
- break;
- default:
- llvm_unreachable("type not supported yet");
- }
- }
- static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
- if (Ty->isSingleValueType())
- return DL.getPrefTypeAlignment(Ty);
- auto *ATy = dyn_cast<ArrayType>(Ty);
- if (ATy)
- return getOpenCLAlignment(DL, ATy->getElementType());
- auto *STy = dyn_cast<StructType>(Ty);
- if (STy) {
- unsigned int alignStruct = 1;
- // Go through each element of the struct and find the
- // largest alignment.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
- Type *ETy = STy->getElementType(i);
- unsigned int align = getOpenCLAlignment(DL, ETy);
- if (align > alignStruct)
- alignStruct = align;
- }
- return alignStruct;
- }
- auto *FTy = dyn_cast<FunctionType>(Ty);
- if (FTy)
- return DL.getPointerPrefAlignment().value();
- return DL.getPrefTypeAlignment(Ty);
- }
- void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
- int paramIndex, raw_ostream &O) {
- getSymbol(I->getParent())->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout &DL = getDataLayout();
- const AttributeList &PAL = F->getAttributes();
- const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const TargetLowering *TLI = STI.getTargetLowering();
- Function::const_arg_iterator I, E;
- unsigned paramIndex = 0;
- bool first = true;
- bool isKernelFunc = isKernelFunction(*F);
- bool isABI = (STI.getSmVersion() >= 20);
- bool hasImageHandles = STI.hasImageHandles();
- MVT thePointerTy = TLI->getPointerTy(DL);
- if (F->arg_empty()) {
- O << "()\n";
- return;
- }
- O << "(\n";
- for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
- Type *Ty = I->getType();
- if (!first)
- O << ",\n";
- first = false;
- // Handle image/sampler parameters
- if (isKernelFunction(*F)) {
- if (isSampler(*I) || isImage(*I)) {
- if (isImage(*I)) {
- std::string sname = std::string(I->getName());
- if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (hasImageHandles)
- O << "\t.param .u64 .ptr .surfref ";
- else
- O << "\t.param .surfref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- else { // Default image is read_only
- if (hasImageHandles)
- O << "\t.param .u64 .ptr .texref ";
- else
- O << "\t.param .texref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- } else {
- if (hasImageHandles)
- O << "\t.param .u64 .ptr .samplerref ";
- else
- O << "\t.param .samplerref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
- }
- continue;
- }
- }
- if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
- if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
- // Just print .param .align <a> .b8 .param[size];
- // <a> = PAL.getparamalignment
- // size = typeallocsize of element type
- const Align align = DL.getValueOrABITypeAlignment(
- PAL.getParamAlignment(paramIndex), Ty);
- unsigned sz = DL.getTypeAllocSize(Ty);
- O << "\t.param .align " << align.value() << " .b8 ";
- printParamName(I, paramIndex, O);
- O << "[" << sz << "]";
- continue;
- }
- // Just a scalar
- auto *PTy = dyn_cast<PointerType>(Ty);
- if (isKernelFunc) {
- if (PTy) {
- // Special handling for pointer arguments to kernel
- O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
- NVPTX::CUDA) {
- Type *ETy = PTy->getPointerElementType();
- int addrSpace = PTy->getAddressSpace();
- switch (addrSpace) {
- default:
- O << ".ptr ";
- break;
- case ADDRESS_SPACE_CONST:
- O << ".ptr .const ";
- break;
- case ADDRESS_SPACE_SHARED:
- O << ".ptr .shared ";
- break;
- case ADDRESS_SPACE_GLOBAL:
- O << ".ptr .global ";
- break;
- }
- O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
- }
- printParamName(I, paramIndex, O);
- continue;
- }
- // non-pointer scalar to kernel func
- O << "\t.param .";
- // Special case: predicate operands become .u8 types
- if (Ty->isIntegerTy(1))
- O << "u8";
- else
- O << getPTXFundamentalTypeStr(Ty);
- O << " ";
- printParamName(I, paramIndex, O);
- continue;
- }
- // Non-kernel function, just print .param .b<size> for ABI
- // and .reg .b<size> for non-ABI
- unsigned sz = 0;
- if (isa<IntegerType>(Ty)) {
- sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32)
- sz = 32;
- } else if (isa<PointerType>(Ty))
- sz = thePointerTy.getSizeInBits();
- else if (Ty->isHalfTy())
- // PTX ABI requires all scalar parameters to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type
- // in PTX, so its size must be adjusted here, too.
- sz = 32;
- else
- sz = Ty->getPrimitiveSizeInBits();
- if (isABI)
- O << "\t.param .b" << sz << " ";
- else
- O << "\t.reg .b" << sz << " ";
- printParamName(I, paramIndex, O);
- continue;
- }
- // param has byVal attribute. So should be a pointer
- auto *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy && "Param with byval attribute should be a pointer type");
- Type *ETy = PTy->getPointerElementType();
- if (isABI || isKernelFunc) {
- // Just print .param .align <a> .b8 .param[size];
- // <a> = PAL.getparamalignment
- // size = typeallocsize of element type
- Align align =
- DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
- // Work around a bug in ptxas. When PTX code takes address of
- // byval parameter with alignment < 4, ptxas generates code to
- // spill argument into memory. Alas on sm_50+ ptxas generates
- // SASS code that fails with misaligned access. To work around
- // the problem, make sure that we align byval parameters by at
- // least 4. Matching change must be made in LowerCall() where we
- // prepare parameters for the call.
- //
- // TODO: this will need to be undone when we get to support multi-TU
- // device-side compilation as it breaks ABI compatibility with nvcc.
- // Hopefully ptxas bug is fixed by then.
- if (!isKernelFunc && align < Align(4))
- align = Align(4);
- unsigned sz = DL.getTypeAllocSize(ETy);
- O << "\t.param .align " << align.value() << " .b8 ";
- printParamName(I, paramIndex, O);
- O << "[" << sz << "]";
- continue;
- } else {
- // Split the ETy into constituent parts and
- // print .param .b<size> <name> for each part.
- // Further, if a part is vector, print the above for
- // each vector element.
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, DL, ETy, vtparts);
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32))
- sz = 32;
- O << "\t.reg .b" << sz << " ";
- printParamName(I, paramIndex, O);
- if (j < je - 1)
- O << ",\n";
- ++paramIndex;
- }
- if (i < e - 1)
- O << ",\n";
- }
- --paramIndex;
- continue;
- }
- }
- O << "\n)\n";
- }
- void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
- raw_ostream &O) {
- const Function &F = MF.getFunction();
- emitFunctionParamList(&F, O);
- }
- void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
- const MachineFunction &MF) {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
- // Map the global virtual register number to a register class specific
- // virtual register number starting from 1 with that class.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- //unsigned numRegClasses = TRI->getNumRegClasses();
- // Emit the Fake Stack Object
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- int NumBytes = (int) MFI.getStackSize();
- if (NumBytes) {
- O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
- << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
- if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
- O << "\t.reg .b64 \t%SP;\n";
- O << "\t.reg .b64 \t%SPL;\n";
- } else {
- O << "\t.reg .b32 \t%SP;\n";
- O << "\t.reg .b32 \t%SPL;\n";
- }
- }
- // Go through all virtual registers to establish the mapping between the
- // global virtual
- // register number and the per class virtual register number.
- // We use the per class virtual register number in the ptx output.
- unsigned int numVRs = MRI->getNumVirtRegs();
- for (unsigned i = 0; i < numVRs; i++) {
- Register vr = Register::index2VirtReg(i);
- const TargetRegisterClass *RC = MRI->getRegClass(vr);
- DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
- int n = regmap.size();
- regmap.insert(std::make_pair(vr, n + 1));
- }
- // Emit register declarations
- // @TODO: Extract out the real register usage
- // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
- // Emit declaration of the virtual registers or 'physical' registers for
- // each register class
- for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
- const TargetRegisterClass *RC = TRI->getRegClass(i);
- DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
- std::string rcname = getNVPTXRegClassName(RC);
- std::string rcStr = getNVPTXRegClassStr(RC);
- int n = regmap.size();
- // Only declare those registers that may be used.
- if (n) {
- O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
- << ">;\n";
- }
- }
- OutStreamer->emitRawText(O.str());
- }
- void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
- APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
- bool ignored;
- unsigned int numHex;
- const char *lead;
- if (Fp->getType()->getTypeID() == Type::FloatTyID) {
- numHex = 8;
- lead = "0f";
- APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
- } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
- numHex = 16;
- lead = "0d";
- APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
- } else
- llvm_unreachable("unsupported fp type");
- APInt API = APF.bitcastToAPInt();
- O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
- }
- void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- O << CI->getValue();
- return;
- }
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
- printFPConstant(CFP, O);
- return;
- }
- if (isa<ConstantPointerNull>(CPV)) {
- O << "0";
- return;
- }
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- bool IsNonGenericPointer = false;
- if (GVar->getType()->getAddressSpace() != 0) {
- IsNonGenericPointer = true;
- }
- if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
- O << "generic(";
- getSymbol(GVar)->print(O, MAI);
- O << ")";
- } else {
- getSymbol(GVar)->print(O, MAI);
- }
- return;
- }
- if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- const Value *v = Cexpr->stripPointerCasts();
- PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
- bool IsNonGenericPointer = false;
- if (PTy && PTy->getAddressSpace() != 0) {
- IsNonGenericPointer = true;
- }
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
- O << "generic(";
- getSymbol(GVar)->print(O, MAI);
- O << ")";
- } else {
- getSymbol(GVar)->print(O, MAI);
- }
- return;
- } else {
- lowerConstant(CPV)->print(O, MAI);
- return;
- }
- }
- llvm_unreachable("Not scalar type found in printScalarConstant()");
- }
- void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
- AggBuffer *AggBuffer) {
- const DataLayout &DL = getDataLayout();
- int AllocSize = DL.getTypeAllocSize(CPV->getType());
- if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
- // only the space allocated by CPV.
- AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
- return;
- }
- // Helper for filling AggBuffer with APInts.
- auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
- size_t NumBytes = (Val.getBitWidth() + 7) / 8;
- SmallVector<unsigned char, 16> Buf(NumBytes);
- for (unsigned I = 0; I < NumBytes; ++I) {
- Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
- }
- AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
- };
- switch (CPV->getType()->getTypeID()) {
- case Type::IntegerTyID:
- if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
- AddIntToBuffer(CI->getValue());
- break;
- }
- if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *CI =
- dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
- AddIntToBuffer(CI->getValue());
- break;
- }
- if (Cexpr->getOpcode() == Instruction::PtrToInt) {
- Value *V = Cexpr->getOperand(0)->stripPointerCasts();
- AggBuffer->addSymbol(V, Cexpr->getOperand(0));
- AggBuffer->addZeros(AllocSize);
- break;
- }
- }
- llvm_unreachable("unsupported integer const type");
- break;
- case Type::HalfTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
- break;
- case Type::PointerTyID: {
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- AggBuffer->addSymbol(GVar, GVar);
- } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- const Value *v = Cexpr->stripPointerCasts();
- AggBuffer->addSymbol(v, Cexpr);
- }
- AggBuffer->addZeros(AllocSize);
- break;
- }
- case Type::ArrayTyID:
- case Type::FixedVectorTyID:
- case Type::StructTyID: {
- if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
- bufferAggregateConstant(CPV, AggBuffer);
- if (Bytes > AllocSize)
- AggBuffer->addZeros(Bytes - AllocSize);
- } else if (isa<ConstantAggregateZero>(CPV))
- AggBuffer->addZeros(Bytes);
- else
- llvm_unreachable("Unexpected Constant type");
- break;
- }
- default:
- llvm_unreachable("unsupported type");
- }
- }
- void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
- AggBuffer *aggBuffer) {
- const DataLayout &DL = getDataLayout();
- int Bytes;
- // Integers of arbitrary width
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- APInt Val = CI->getValue();
- for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
- uint8_t Byte = Val.getLoBits(8).getZExtValue();
- aggBuffer->addBytes(&Byte, 1, 1);
- Val.lshrInPlace(8);
- }
- return;
- }
- // Old constants
- if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
- if (CPV->getNumOperands())
- for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
- bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
- return;
- }
- if (const ConstantDataSequential *CDS =
- dyn_cast<ConstantDataSequential>(CPV)) {
- if (CDS->getNumElements())
- for (unsigned i = 0; i < CDS->getNumElements(); ++i)
- bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
- aggBuffer);
- return;
- }
- if (isa<ConstantStruct>(CPV)) {
- if (CPV->getNumOperands()) {
- StructType *ST = cast<StructType>(CPV->getType());
- for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
- if (i == (e - 1))
- Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
- DL.getTypeAllocSize(ST) -
- DL.getStructLayout(ST)->getElementOffset(i);
- else
- Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
- DL.getStructLayout(ST)->getElementOffset(i);
- bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
- }
- }
- return;
- }
- llvm_unreachable("unsupported constant type in printAggregateConstant()");
- }
- /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
- /// a copy from AsmPrinter::lowerConstant, except customized to only handle
- /// expressions that are representable in PTX and create
- /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
- const MCExpr *
- NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
- MCContext &Ctx = OutContext;
- if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::create(0, Ctx);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::create(CI->getZExtValue(), Ctx);
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- const MCSymbolRefExpr *Expr =
- MCSymbolRefExpr::create(getSymbol(GV), Ctx);
- if (ProcessingGeneric) {
- return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
- } else {
- return Expr;
- }
- }
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (!CE) {
- llvm_unreachable("Unknown constant value to lower!");
- }
- switch (CE->getOpcode()) {
- default: {
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- Constant *C = ConstantFoldConstant(CE, getDataLayout());
- if (C != CE)
- return lowerConstantForGV(C, ProcessingGeneric);
- // Otherwise report the problem to the user.
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
- }
- case Instruction::AddrSpaceCast: {
- // Strip the addrspacecast and pass along the operand
- PointerType *DstTy = cast<PointerType>(CE->getType());
- if (DstTy->getAddressSpace() == 0) {
- return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
- }
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
- }
- case Instruction::GetElementPtr: {
- const DataLayout &DL = getDataLayout();
- // Generate a symbolic expression for the byte address
- APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
- const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
- ProcessingGeneric);
- if (!OffsetAI)
- return Base;
- int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
- Ctx);
- }
- case Instruction::Trunc:
- // We emit the value and depend on the assembler to truncate the generated
- // expression properly. This is important for differences between
- // blockaddress labels. Since the two labels are in the same function, it
- // is reasonable to treat their delta as a 32-bit value.
- LLVM_FALLTHROUGH;
- case Instruction::BitCast:
- return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
- case Instruction::IntToPtr: {
- const DataLayout &DL = getDataLayout();
- // Handle casts to pointers by changing them into casts to the appropriate
- // integer type. This promotes constant folding and simplifies this code.
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
- false/*ZExt*/);
- return lowerConstantForGV(Op, ProcessingGeneric);
- }
- case Instruction::PtrToInt: {
- const DataLayout &DL = getDataLayout();
- // Support only foldable casts to/from pointers that can be eliminated by
- // changing the pointer to the appropriately sized integer type.
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
- const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
- // We can emit the pointer value into this slot if the slot is an
- // integer slot equal to the size of the pointer.
- if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
- return OpExpr;
- // Otherwise the pointer is smaller than the resultant integer, mask off
- // the high bits so we are sure to get a proper truncation if the input is
- // a constant expr.
- unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
- return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
- }
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add: {
- const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
- const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
- switch (CE->getOpcode()) {
- default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
- }
- }
- }
- }
- // Copy of MCExpr::print customized for NVPTX
- void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
- switch (Expr.getKind()) {
- case MCExpr::Target:
- return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
- case MCExpr::Constant:
- OS << cast<MCConstantExpr>(Expr).getValue();
- return;
- case MCExpr::SymbolRef: {
- const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
- const MCSymbol &Sym = SRE.getSymbol();
- Sym.print(OS, MAI);
- return;
- }
- case MCExpr::Unary: {
- const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
- switch (UE.getOpcode()) {
- case MCUnaryExpr::LNot: OS << '!'; break;
- case MCUnaryExpr::Minus: OS << '-'; break;
- case MCUnaryExpr::Not: OS << '~'; break;
- case MCUnaryExpr::Plus: OS << '+'; break;
- }
- printMCExpr(*UE.getSubExpr(), OS);
- return;
- }
- case MCExpr::Binary: {
- const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
- // Only print parens around the LHS if it is non-trivial.
- if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
- isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
- printMCExpr(*BE.getLHS(), OS);
- } else {
- OS << '(';
- printMCExpr(*BE.getLHS(), OS);
- OS<< ')';
- }
- switch (BE.getOpcode()) {
- case MCBinaryExpr::Add:
- // Print "X-42" instead of "X+-42".
- if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
- if (RHSC->getValue() < 0) {
- OS << RHSC->getValue();
- return;
- }
- }
- OS << '+';
- break;
- default: llvm_unreachable("Unhandled binary operator");
- }
- // Only print parens around the LHS if it is non-trivial.
- if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
- printMCExpr(*BE.getRHS(), OS);
- } else {
- OS << '(';
- printMCExpr(*BE.getRHS(), OS);
- OS << ')';
- }
- return;
- }
- }
- llvm_unreachable("Invalid expression kind!");
- }
- /// PrintAsmOperand - Print out an operand for an inline asm expression.
- ///
- bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- const char *ExtraCode, raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0)
- return true; // Unknown modifier.
- switch (ExtraCode[0]) {
- default:
- // See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
- case 'r':
- break;
- }
- }
- printOperand(MI, OpNo, O);
- return false;
- }
- bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
- O << '[';
- printMemOperand(MI, OpNo, O);
- O << ']';
- return false;
- }
- void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- if (Register::isPhysicalRegister(MO.getReg())) {
- if (MO.getReg() == NVPTX::VRDepot)
- O << DEPOTNAME << getFunctionNumber();
- else
- O << NVPTXInstPrinter::getRegisterName(MO.getReg());
- } else {
- emitVirtualRegister(MO.getReg(), O);
- }
- break;
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- break;
- case MachineOperand::MO_FPImmediate:
- printFPConstant(MO.getFPImm(), O);
- break;
- case MachineOperand::MO_GlobalAddress:
- PrintSymbolOperand(MO, O);
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MO.getMBB()->getSymbol()->print(O, MAI);
- break;
- default:
- llvm_unreachable("Operand type not supported.");
- }
- }
- void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O, const char *Modifier) {
- printOperand(MI, opNum, O);
- if (Modifier && strcmp(Modifier, "add") == 0) {
- O << ", ";
- printOperand(MI, opNum + 1, O);
- } else {
- if (MI->getOperand(opNum + 1).isImm() &&
- MI->getOperand(opNum + 1).getImm() == 0)
- return; // don't print ',0' or '+0'
- O << "+";
- printOperand(MI, opNum + 1, O);
- }
- }
- // Force static initialization.
- extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() {
- RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
- RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
- }
|