123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513 |
- //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // The CodeEmitterGen component for variable-length instructions.
- //
- // The basic CodeEmitterGen is almost exclusively designed for fixed-
- // length instructions. A good analogy for its encoding scheme is how printf
- // works: The (immutable) formatting string represent the fixed values in the
- // encoded instruction. Placeholders (i.e. %something), on the other hand,
- // represent encoding for instruction operands.
- // ```
- // printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
- // <encoded value for operand `dst`>);
- // ```
- // VarLenCodeEmitterGen in this file provides an alternative encoding scheme
- // that works more like a C++ stream operator:
- // ```
- // OS << 0b1101;
- // if (Cond)
- // OS << OperandEncoding0;
- // OS << 0b1001 << OperandEncoding1;
- // ```
- // You are free to concatenate arbitrary types (and sizes) of encoding
- // fragments on any bit position, bringing more flexibilities on defining
- // encoding for variable-length instructions.
- //
- // In a more specific way, instruction encoding is represented by a DAG type
- // `Inst` field. Here is an example:
- // ```
- // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
- // (operand "$dst", 4));
- // ```
- // It represents the following instruction encoding:
- // ```
- // MSB LSB
- // 1101<encoding for operand src>1001<encoding for operand dst>
- // ```
- // For more details about DAG operators in the above snippet, please
- // refer to \file include/llvm/Target/Target.td.
- //
- // VarLenCodeEmitter will convert the above DAG into the same helper function
- // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
- // for few details).
- //
- //===----------------------------------------------------------------------===//
- #include "VarLenCodeEmitterGen.h"
- #include "CodeGenHwModes.h"
- #include "CodeGenInstruction.h"
- #include "CodeGenTarget.h"
- #include "InfoByHwMode.h"
- #include "llvm/ADT/ArrayRef.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/TableGen/Error.h"
- using namespace llvm;
- namespace {
- class VarLenCodeEmitterGen {
- RecordKeeper &Records;
- DenseMap<Record *, VarLenInst> VarLenInsts;
- // Emit based values (i.e. fixed bits in the encoded instructions)
- void emitInstructionBaseValues(
- raw_ostream &OS,
- ArrayRef<const CodeGenInstruction *> NumberedInstructions,
- CodeGenTarget &Target, int HwMode = -1);
- std::string getInstructionCase(Record *R, CodeGenTarget &Target);
- std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
- CodeGenTarget &Target);
- public:
- explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
- void run(raw_ostream &OS);
- };
- } // end anonymous namespace
- // Get the name of custom encoder or decoder, if there is any.
- // Returns `{encoder name, decoder name}`.
- static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {
- std::pair<StringRef, StringRef> Result;
- for (const auto *Arg : Args) {
- const auto *DI = dyn_cast<DagInit>(Arg);
- if (!DI)
- continue;
- const Init *Op = DI->getOperator();
- if (!isa<DefInit>(Op))
- continue;
- // syntax: `(<encoder | decoder> "function name")`
- StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
- if (OpName != "encoder" && OpName != "decoder")
- continue;
- if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))
- PrintFatalError("expected '" + OpName +
- "' directive to be followed by a custom function name.");
- StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
- if (OpName == "encoder")
- Result.first = FuncName;
- else
- Result.second = FuncName;
- }
- return Result;
- }
- VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
- : TheDef(TheDef), NumBits(0U) {
- buildRec(DI);
- for (const auto &S : Segments)
- NumBits += S.BitWidth;
- }
- void VarLenInst::buildRec(const DagInit *DI) {
- assert(TheDef && "The def record is nullptr ?");
- std::string Op = DI->getOperator()->getAsString();
- if (Op == "ascend" || Op == "descend") {
- bool Reverse = Op == "descend";
- int i = Reverse ? DI->getNumArgs() - 1 : 0;
- int e = Reverse ? -1 : DI->getNumArgs();
- int s = Reverse ? -1 : 1;
- for (; i != e; i += s) {
- const Init *Arg = DI->getArg(i);
- if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
- if (!BI->isComplete())
- PrintFatalError(TheDef->getLoc(),
- "Expecting complete bits init in `" + Op + "`");
- Segments.push_back({BI->getNumBits(), BI});
- } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
- if (!BI->isConcrete())
- PrintFatalError(TheDef->getLoc(),
- "Expecting concrete bit init in `" + Op + "`");
- Segments.push_back({1, BI});
- } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
- buildRec(SubDI);
- } else {
- PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
- Op + "`: " + Arg->getAsString());
- }
- }
- } else if (Op == "operand") {
- // (operand <operand name>, <# of bits>,
- // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
- if (DI->getNumArgs() < 2)
- PrintFatalError(TheDef->getLoc(),
- "Expecting at least 2 arguments for `operand`");
- HasDynamicSegment = true;
- const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
- if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
- PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");
- auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
- if (NumBitsVal <= 0)
- PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");
- auto [CustomEncoder, CustomDecoder] =
- getCustomCoders(DI->getArgs().slice(2));
- Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
- CustomEncoder, CustomDecoder});
- } else if (Op == "slice") {
- // (slice <operand name>, <high / low bit>, <low / high bit>,
- // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
- if (DI->getNumArgs() < 3)
- PrintFatalError(TheDef->getLoc(),
- "Expecting at least 3 arguments for `slice`");
- HasDynamicSegment = true;
- Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
- *LoBit = DI->getArg(2);
- if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
- !isa<IntInit>(LoBit))
- PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");
- auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
- LoBitVal = cast<IntInit>(LoBit)->getValue();
- if (HiBitVal < 0 || LoBitVal < 0)
- PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
- bool NeedSwap = false;
- unsigned NumBits = 0U;
- if (HiBitVal < LoBitVal) {
- NeedSwap = true;
- NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
- } else {
- NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
- }
- auto [CustomEncoder, CustomDecoder] =
- getCustomCoders(DI->getArgs().slice(3));
- if (NeedSwap) {
- // Normalization: Hi bit should always be the second argument.
- Init *const NewArgs[] = {OperandName, LoBit, HiBit};
- Segments.push_back({NumBits,
- DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
- CustomEncoder, CustomDecoder});
- } else {
- Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
- }
- }
- }
- void VarLenCodeEmitterGen::run(raw_ostream &OS) {
- CodeGenTarget Target(Records);
- auto Insts = Records.getAllDerivedDefinitions("Instruction");
- auto NumberedInstructions = Target.getInstructionsByEnumValue();
- const CodeGenHwModes &HWM = Target.getHwModes();
- // The set of HwModes used by instruction encodings.
- std::set<unsigned> HwModes;
- for (const CodeGenInstruction *CGI : NumberedInstructions) {
- Record *R = CGI->TheDef;
- // Create the corresponding VarLenInst instance.
- if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- R->getValueAsBit("isPseudo"))
- continue;
- if (const RecordVal *RV = R->getValue("EncodingInfos")) {
- if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
- EncodingInfoByHwMode EBM(DI->getDef(), HWM);
- for (auto &KV : EBM) {
- HwModes.insert(KV.first);
- Record *EncodingDef = KV.second;
- RecordVal *RV = EncodingDef->getValue("Inst");
- DagInit *DI = cast<DagInit>(RV->getValue());
- VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)});
- }
- continue;
- }
- }
- RecordVal *RV = R->getValue("Inst");
- DagInit *DI = cast<DagInit>(RV->getValue());
- VarLenInsts.insert({R, VarLenInst(DI, RV)});
- }
- // Emit function declaration
- OS << "void " << Target.getName()
- << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
- << " SmallVectorImpl<MCFixup> &Fixups,\n"
- << " APInt &Inst,\n"
- << " APInt &Scratch,\n"
- << " const MCSubtargetInfo &STI) const {\n";
- // Emit instruction base values
- if (HwModes.empty()) {
- emitInstructionBaseValues(OS, NumberedInstructions, Target);
- } else {
- for (unsigned HwMode : HwModes)
- emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
- }
- if (!HwModes.empty()) {
- OS << " const unsigned **Index;\n";
- OS << " const uint64_t *InstBits;\n";
- OS << " unsigned HwMode = STI.getHwMode();\n";
- OS << " switch (HwMode) {\n";
- OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
- for (unsigned I : HwModes) {
- OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
- << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
- }
- OS << " };\n";
- }
- // Emit helper function to retrieve base values.
- OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
- << " unsigned NumBits = Index[Opcode][0];\n"
- << " if (!NumBits)\n"
- << " return APInt::getZeroWidth();\n"
- << " unsigned Idx = Index[Opcode][1];\n"
- << " ArrayRef<uint64_t> Data(&InstBits[Idx], "
- << "APInt::getNumWords(NumBits));\n"
- << " return APInt(NumBits, Data);\n"
- << " };\n";
- // Map to accumulate all the cases.
- std::map<std::string, std::vector<std::string>> CaseMap;
- // Construct all cases statement for each opcode
- for (Record *R : Insts) {
- if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- R->getValueAsBit("isPseudo"))
- continue;
- std::string InstName =
- (R->getValueAsString("Namespace") + "::" + R->getName()).str();
- std::string Case = getInstructionCase(R, Target);
- CaseMap[Case].push_back(std::move(InstName));
- }
- // Emit initial function code
- OS << " const unsigned opcode = MI.getOpcode();\n"
- << " switch (opcode) {\n";
- // Emit each case statement
- for (const auto &C : CaseMap) {
- const std::string &Case = C.first;
- const auto &InstList = C.second;
- ListSeparator LS("\n");
- for (const auto &InstName : InstList)
- OS << LS << " case " << InstName << ":";
- OS << " {\n";
- OS << Case;
- OS << " break;\n"
- << " }\n";
- }
- // Default case: unhandled opcode
- OS << " default:\n"
- << " std::string msg;\n"
- << " raw_string_ostream Msg(msg);\n"
- << " Msg << \"Not supported instr: \" << MI;\n"
- << " report_fatal_error(Msg.str().c_str());\n"
- << " }\n";
- OS << "}\n\n";
- }
- static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
- unsigned &Index) {
- if (!Bits.getNumWords()) {
- IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
- return;
- }
- IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
- << "/*Index*/" << Index << "},";
- SS.indent(4);
- for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
- SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
- }
- void VarLenCodeEmitterGen::emitInstructionBaseValues(
- raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
- CodeGenTarget &Target, int HwMode) {
- std::string IndexArray, StorageArray;
- raw_string_ostream IS(IndexArray), SS(StorageArray);
- const CodeGenHwModes &HWM = Target.getHwModes();
- if (HwMode == -1) {
- IS << " static const unsigned Index[][2] = {\n";
- SS << " static const uint64_t InstBits[] = {\n";
- } else {
- StringRef Name = HWM.getMode(HwMode).Name;
- IS << " static const unsigned Index_" << Name << "[][2] = {\n";
- SS << " static const uint64_t InstBits_" << Name << "[] = {\n";
- }
- unsigned NumFixedValueWords = 0U;
- for (const CodeGenInstruction *CGI : NumberedInstructions) {
- Record *R = CGI->TheDef;
- if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- R->getValueAsBit("isPseudo")) {
- IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
- continue;
- }
- Record *EncodingDef = R;
- if (const RecordVal *RV = R->getValue("EncodingInfos")) {
- if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
- EncodingInfoByHwMode EBM(DI->getDef(), HWM);
- if (EBM.hasMode(HwMode))
- EncodingDef = EBM.get(HwMode);
- }
- }
- auto It = VarLenInsts.find(EncodingDef);
- if (It == VarLenInsts.end())
- PrintFatalError(EncodingDef, "VarLenInst not found for this record");
- const VarLenInst &VLI = It->second;
- unsigned i = 0U, BitWidth = VLI.size();
- // Start by filling in fixed values.
- APInt Value(BitWidth, 0);
- auto SI = VLI.begin(), SE = VLI.end();
- // Scan through all the segments that have fixed-bits values.
- while (i < BitWidth && SI != SE) {
- unsigned SegmentNumBits = SI->BitWidth;
- if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
- for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
- auto *B = cast<BitInit>(BI->getBit(Idx));
- Value.setBitVal(i + Idx, B->getValue());
- }
- }
- if (const auto *BI = dyn_cast<BitInit>(SI->Value))
- Value.setBitVal(i, BI->getValue());
- i += SegmentNumBits;
- ++SI;
- }
- emitInstBits(IS, SS, Value, NumFixedValueWords);
- IS << '\t' << "// " << R->getName() << "\n";
- if (Value.getNumWords())
- SS << '\t' << "// " << R->getName() << "\n";
- }
- IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n";
- SS.indent(4) << "UINT64_C(0)\n };\n";
- OS << IS.str() << SS.str();
- }
- std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
- CodeGenTarget &Target) {
- std::string Case;
- if (const RecordVal *RV = R->getValue("EncodingInfos")) {
- if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
- const CodeGenHwModes &HWM = Target.getHwModes();
- EncodingInfoByHwMode EBM(DI->getDef(), HWM);
- Case += " switch (HwMode) {\n";
- Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
- for (auto &KV : EBM) {
- Case += " case " + itostr(KV.first) + ": {\n";
- Case += getInstructionCaseForEncoding(R, KV.second, Target);
- Case += " break;\n";
- Case += " }\n";
- }
- Case += " }\n";
- return Case;
- }
- }
- return getInstructionCaseForEncoding(R, R, Target);
- }
- std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
- Record *R, Record *EncodingDef, CodeGenTarget &Target) {
- auto It = VarLenInsts.find(EncodingDef);
- if (It == VarLenInsts.end())
- PrintFatalError(EncodingDef, "Parsed encoding record not found");
- const VarLenInst &VLI = It->second;
- size_t BitWidth = VLI.size();
- CodeGenInstruction &CGI = Target.getInstruction(R);
- std::string Case;
- raw_string_ostream SS(Case);
- // Resize the scratch buffer.
- if (BitWidth && !VLI.isFixedValueOnly())
- SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n";
- // Populate based value.
- SS.indent(6) << "Inst = getInstBits(opcode);\n";
- // Process each segment in VLI.
- size_t Offset = 0U;
- for (const auto &ES : VLI) {
- unsigned NumBits = ES.BitWidth;
- const Init *Val = ES.Value;
- // If it's a StringInit or DagInit, it's a reference to an operand
- // or part of an operand.
- if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
- StringRef OperandName;
- unsigned LoBit = 0U;
- if (const auto *SV = dyn_cast<StringInit>(Val)) {
- OperandName = SV->getValue();
- } else {
- // Normalized: (slice <operand name>, <high bit>, <low bit>)
- const auto *DV = cast<DagInit>(Val);
- OperandName = cast<StringInit>(DV->getArg(0))->getValue();
- LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
- }
- auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
- unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
- StringRef CustomEncoder =
- CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
- if (ES.CustomEncoder.size())
- CustomEncoder = ES.CustomEncoder;
- SS.indent(6) << "Scratch.clearAllBits();\n";
- SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
- if (CustomEncoder.empty())
- SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
- << utostr(FlatOpIdx) << ")";
- else
- SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
- SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
- SS.indent(6) << "Inst.insertBits("
- << "Scratch.extractBits(" << utostr(NumBits) << ", "
- << utostr(LoBit) << ")"
- << ", " << Offset << ");\n";
- }
- Offset += NumBits;
- }
- StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
- if (!PostEmitter.empty())
- SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
- return Case;
- }
- namespace llvm {
- void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
- VarLenCodeEmitterGen(R).run(OS);
- }
- } // end namespace llvm
|