//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // The CodeEmitterGen component for variable-length instructions. // // The basic CodeEmitterGen is almost exclusively designed for fixed- // length instructions. A good analogy for its encoding scheme is how printf // works: The (immutable) formatting string represent the fixed values in the // encoded instruction. Placeholders (i.e. %something), on the other hand, // represent encoding for instruction operands. // ``` // printf("1101 %src 1001 %dst", , // ); // ``` // VarLenCodeEmitterGen in this file provides an alternative encoding scheme // that works more like a C++ stream operator: // ``` // OS << 0b1101; // if (Cond) // OS << OperandEncoding0; // OS << 0b1001 << OperandEncoding1; // ``` // You are free to concatenate arbitrary types (and sizes) of encoding // fragments on any bit position, bringing more flexibilities on defining // encoding for variable-length instructions. // // In a more specific way, instruction encoding is represented by a DAG type // `Inst` field. Here is an example: // ``` // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001, // (operand "$dst", 4)); // ``` // It represents the following instruction encoding: // ``` // MSB LSB // 11011001 // ``` // For more details about DAG operators in the above snippet, please // refer to \file include/llvm/Target/Target.td. // // VarLenCodeEmitter will convert the above DAG into the same helper function // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except // for few details). // //===----------------------------------------------------------------------===// #include "VarLenCodeEmitterGen.h" #include "CodeGenHwModes.h" #include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "InfoByHwMode.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" using namespace llvm; namespace { class VarLenCodeEmitterGen { RecordKeeper &Records; DenseMap VarLenInsts; // Emit based values (i.e. fixed bits in the encoded instructions) void emitInstructionBaseValues( raw_ostream &OS, ArrayRef NumberedInstructions, CodeGenTarget &Target, int HwMode = -1); std::string getInstructionCase(Record *R, CodeGenTarget &Target); std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, CodeGenTarget &Target); public: explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {} void run(raw_ostream &OS); }; } // end anonymous namespace // Get the name of custom encoder or decoder, if there is any. // Returns `{encoder name, decoder name}`. static std::pair getCustomCoders(ArrayRef Args) { std::pair Result; for (const auto *Arg : Args) { const auto *DI = dyn_cast(Arg); if (!DI) continue; const Init *Op = DI->getOperator(); if (!isa(Op)) continue; // syntax: `( "function name")` StringRef OpName = cast(Op)->getDef()->getName(); if (OpName != "encoder" && OpName != "decoder") continue; if (!DI->getNumArgs() || !isa(DI->getArg(0))) PrintFatalError("expected '" + OpName + "' directive to be followed by a custom function name."); StringRef FuncName = cast(DI->getArg(0))->getValue(); if (OpName == "encoder") Result.first = FuncName; else Result.second = FuncName; } return Result; } VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef) : TheDef(TheDef), NumBits(0U) { buildRec(DI); for (const auto &S : Segments) NumBits += S.BitWidth; } void VarLenInst::buildRec(const DagInit *DI) { assert(TheDef && "The def record is nullptr ?"); std::string Op = DI->getOperator()->getAsString(); if (Op == "ascend" || Op == "descend") { bool Reverse = Op == "descend"; int i = Reverse ? DI->getNumArgs() - 1 : 0; int e = Reverse ? -1 : DI->getNumArgs(); int s = Reverse ? -1 : 1; for (; i != e; i += s) { const Init *Arg = DI->getArg(i); if (const auto *BI = dyn_cast(Arg)) { if (!BI->isComplete()) PrintFatalError(TheDef->getLoc(), "Expecting complete bits init in `" + Op + "`"); Segments.push_back({BI->getNumBits(), BI}); } else if (const auto *BI = dyn_cast(Arg)) { if (!BI->isConcrete()) PrintFatalError(TheDef->getLoc(), "Expecting concrete bit init in `" + Op + "`"); Segments.push_back({1, BI}); } else if (const auto *SubDI = dyn_cast(Arg)) { buildRec(SubDI); } else { PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" + Op + "`: " + Arg->getAsString()); } } } else if (Op == "operand") { // (operand , <# of bits>, // [(encoder )][, (decoder )]) if (DI->getNumArgs() < 2) PrintFatalError(TheDef->getLoc(), "Expecting at least 2 arguments for `operand`"); HasDynamicSegment = true; const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1); if (!isa(OperandName) || !isa(NumBits)) PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`"); auto NumBitsVal = cast(NumBits)->getValue(); if (NumBitsVal <= 0) PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`"); auto [CustomEncoder, CustomDecoder] = getCustomCoders(DI->getArgs().slice(2)); Segments.push_back({static_cast(NumBitsVal), OperandName, CustomEncoder, CustomDecoder}); } else if (Op == "slice") { // (slice , , , // [(encoder )][, (decoder )]) if (DI->getNumArgs() < 3) PrintFatalError(TheDef->getLoc(), "Expecting at least 3 arguments for `slice`"); HasDynamicSegment = true; Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1), *LoBit = DI->getArg(2); if (!isa(OperandName) || !isa(HiBit) || !isa(LoBit)) PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`"); auto HiBitVal = cast(HiBit)->getValue(), LoBitVal = cast(LoBit)->getValue(); if (HiBitVal < 0 || LoBitVal < 0) PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`"); bool NeedSwap = false; unsigned NumBits = 0U; if (HiBitVal < LoBitVal) { NeedSwap = true; NumBits = static_cast(LoBitVal - HiBitVal + 1); } else { NumBits = static_cast(HiBitVal - LoBitVal + 1); } auto [CustomEncoder, CustomDecoder] = getCustomCoders(DI->getArgs().slice(3)); if (NeedSwap) { // Normalization: Hi bit should always be the second argument. Init *const NewArgs[] = {OperandName, LoBit, HiBit}; Segments.push_back({NumBits, DagInit::get(DI->getOperator(), nullptr, NewArgs, {}), CustomEncoder, CustomDecoder}); } else { Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder}); } } } void VarLenCodeEmitterGen::run(raw_ostream &OS) { CodeGenTarget Target(Records); auto Insts = Records.getAllDerivedDefinitions("Instruction"); auto NumberedInstructions = Target.getInstructionsByEnumValue(); const CodeGenHwModes &HWM = Target.getHwModes(); // The set of HwModes used by instruction encodings. std::set HwModes; for (const CodeGenInstruction *CGI : NumberedInstructions) { Record *R = CGI->TheDef; // Create the corresponding VarLenInst instance. if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) continue; if (const RecordVal *RV = R->getValue("EncodingInfos")) { if (auto *DI = dyn_cast_or_null(RV->getValue())) { EncodingInfoByHwMode EBM(DI->getDef(), HWM); for (auto &KV : EBM) { HwModes.insert(KV.first); Record *EncodingDef = KV.second; RecordVal *RV = EncodingDef->getValue("Inst"); DagInit *DI = cast(RV->getValue()); VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)}); } continue; } } RecordVal *RV = R->getValue("Inst"); DagInit *DI = cast(RV->getValue()); VarLenInsts.insert({R, VarLenInst(DI, RV)}); } // Emit function declaration OS << "void " << Target.getName() << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" << " SmallVectorImpl &Fixups,\n" << " APInt &Inst,\n" << " APInt &Scratch,\n" << " const MCSubtargetInfo &STI) const {\n"; // Emit instruction base values if (HwModes.empty()) { emitInstructionBaseValues(OS, NumberedInstructions, Target); } else { for (unsigned HwMode : HwModes) emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode); } if (!HwModes.empty()) { OS << " const unsigned **Index;\n"; OS << " const uint64_t *InstBits;\n"; OS << " unsigned HwMode = STI.getHwMode();\n"; OS << " switch (HwMode) {\n"; OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; for (unsigned I : HwModes) { OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n"; } OS << " };\n"; } // Emit helper function to retrieve base values. OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n" << " unsigned NumBits = Index[Opcode][0];\n" << " if (!NumBits)\n" << " return APInt::getZeroWidth();\n" << " unsigned Idx = Index[Opcode][1];\n" << " ArrayRef Data(&InstBits[Idx], " << "APInt::getNumWords(NumBits));\n" << " return APInt(NumBits, Data);\n" << " };\n"; // Map to accumulate all the cases. std::map> CaseMap; // Construct all cases statement for each opcode for (Record *R : Insts) { if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) continue; std::string InstName = (R->getValueAsString("Namespace") + "::" + R->getName()).str(); std::string Case = getInstructionCase(R, Target); CaseMap[Case].push_back(std::move(InstName)); } // Emit initial function code OS << " const unsigned opcode = MI.getOpcode();\n" << " switch (opcode) {\n"; // Emit each case statement for (const auto &C : CaseMap) { const std::string &Case = C.first; const auto &InstList = C.second; ListSeparator LS("\n"); for (const auto &InstName : InstList) OS << LS << " case " << InstName << ":"; OS << " {\n"; OS << Case; OS << " break;\n" << " }\n"; } // Default case: unhandled opcode OS << " default:\n" << " std::string msg;\n" << " raw_string_ostream Msg(msg);\n" << " Msg << \"Not supported instr: \" << MI;\n" << " report_fatal_error(Msg.str().c_str());\n" << " }\n"; OS << "}\n\n"; } static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits, unsigned &Index) { if (!Bits.getNumWords()) { IS.indent(4) << "{/*NumBits*/0, /*Index*/0},"; return; } IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " << "/*Index*/" << Index << "},"; SS.indent(4); for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index) SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),"; } void VarLenCodeEmitterGen::emitInstructionBaseValues( raw_ostream &OS, ArrayRef NumberedInstructions, CodeGenTarget &Target, int HwMode) { std::string IndexArray, StorageArray; raw_string_ostream IS(IndexArray), SS(StorageArray); const CodeGenHwModes &HWM = Target.getHwModes(); if (HwMode == -1) { IS << " static const unsigned Index[][2] = {\n"; SS << " static const uint64_t InstBits[] = {\n"; } else { StringRef Name = HWM.getMode(HwMode).Name; IS << " static const unsigned Index_" << Name << "[][2] = {\n"; SS << " static const uint64_t InstBits_" << Name << "[] = {\n"; } unsigned NumFixedValueWords = 0U; for (const CodeGenInstruction *CGI : NumberedInstructions) { Record *R = CGI->TheDef; if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) { IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n"; continue; } Record *EncodingDef = R; if (const RecordVal *RV = R->getValue("EncodingInfos")) { if (auto *DI = dyn_cast_or_null(RV->getValue())) { EncodingInfoByHwMode EBM(DI->getDef(), HWM); if (EBM.hasMode(HwMode)) EncodingDef = EBM.get(HwMode); } } auto It = VarLenInsts.find(EncodingDef); if (It == VarLenInsts.end()) PrintFatalError(EncodingDef, "VarLenInst not found for this record"); const VarLenInst &VLI = It->second; unsigned i = 0U, BitWidth = VLI.size(); // Start by filling in fixed values. APInt Value(BitWidth, 0); auto SI = VLI.begin(), SE = VLI.end(); // Scan through all the segments that have fixed-bits values. while (i < BitWidth && SI != SE) { unsigned SegmentNumBits = SI->BitWidth; if (const auto *BI = dyn_cast(SI->Value)) { for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) { auto *B = cast(BI->getBit(Idx)); Value.setBitVal(i + Idx, B->getValue()); } } if (const auto *BI = dyn_cast(SI->Value)) Value.setBitVal(i, BI->getValue()); i += SegmentNumBits; ++SI; } emitInstBits(IS, SS, Value, NumFixedValueWords); IS << '\t' << "// " << R->getName() << "\n"; if (Value.getNumWords()) SS << '\t' << "// " << R->getName() << "\n"; } IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n"; SS.indent(4) << "UINT64_C(0)\n };\n"; OS << IS.str() << SS.str(); } std::string VarLenCodeEmitterGen::getInstructionCase(Record *R, CodeGenTarget &Target) { std::string Case; if (const RecordVal *RV = R->getValue("EncodingInfos")) { if (auto *DI = dyn_cast_or_null(RV->getValue())) { const CodeGenHwModes &HWM = Target.getHwModes(); EncodingInfoByHwMode EBM(DI->getDef(), HWM); Case += " switch (HwMode) {\n"; Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; for (auto &KV : EBM) { Case += " case " + itostr(KV.first) + ": {\n"; Case += getInstructionCaseForEncoding(R, KV.second, Target); Case += " break;\n"; Case += " }\n"; } Case += " }\n"; return Case; } } return getInstructionCaseForEncoding(R, R, Target); } std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( Record *R, Record *EncodingDef, CodeGenTarget &Target) { auto It = VarLenInsts.find(EncodingDef); if (It == VarLenInsts.end()) PrintFatalError(EncodingDef, "Parsed encoding record not found"); const VarLenInst &VLI = It->second; size_t BitWidth = VLI.size(); CodeGenInstruction &CGI = Target.getInstruction(R); std::string Case; raw_string_ostream SS(Case); // Resize the scratch buffer. if (BitWidth && !VLI.isFixedValueOnly()) SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n"; // Populate based value. SS.indent(6) << "Inst = getInstBits(opcode);\n"; // Process each segment in VLI. size_t Offset = 0U; for (const auto &ES : VLI) { unsigned NumBits = ES.BitWidth; const Init *Val = ES.Value; // If it's a StringInit or DagInit, it's a reference to an operand // or part of an operand. if (isa(Val) || isa(Val)) { StringRef OperandName; unsigned LoBit = 0U; if (const auto *SV = dyn_cast(Val)) { OperandName = SV->getValue(); } else { // Normalized: (slice , , ) const auto *DV = cast(Val); OperandName = cast(DV->getArg(0))->getValue(); LoBit = static_cast(cast(DV->getArg(2))->getValue()); } auto OpIdx = CGI.Operands.ParseOperandName(OperandName); unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx); StringRef CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second]; if (ES.CustomEncoder.size()) CustomEncoder = ES.CustomEncoder; SS.indent(6) << "Scratch.clearAllBits();\n"; SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n"; if (CustomEncoder.empty()) SS.indent(6) << "getMachineOpValue(MI, MI.getOperand(" << utostr(FlatOpIdx) << ")"; else SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx); SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n"; SS.indent(6) << "Inst.insertBits(" << "Scratch.extractBits(" << utostr(NumBits) << ", " << utostr(LoBit) << ")" << ", " << Offset << ");\n"; } Offset += NumBits; } StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); if (!PostEmitter.empty()) SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n"; return Case; } namespace llvm { void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) { VarLenCodeEmitterGen(R).run(OS); } } // end namespace llvm