VarLenCodeEmitterGen.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // The CodeEmitterGen component for variable-length instructions.
  10. //
  11. // The basic CodeEmitterGen is almost exclusively designed for fixed-
  12. // length instructions. A good analogy for its encoding scheme is how printf
  13. // works: The (immutable) formatting string represent the fixed values in the
  14. // encoded instruction. Placeholders (i.e. %something), on the other hand,
  15. // represent encoding for instruction operands.
  16. // ```
  17. // printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
  18. // <encoded value for operand `dst`>);
  19. // ```
  20. // VarLenCodeEmitterGen in this file provides an alternative encoding scheme
  21. // that works more like a C++ stream operator:
  22. // ```
  23. // OS << 0b1101;
  24. // if (Cond)
  25. // OS << OperandEncoding0;
  26. // OS << 0b1001 << OperandEncoding1;
  27. // ```
  28. // You are free to concatenate arbitrary types (and sizes) of encoding
  29. // fragments on any bit position, bringing more flexibilities on defining
  30. // encoding for variable-length instructions.
  31. //
  32. // In a more specific way, instruction encoding is represented by a DAG type
  33. // `Inst` field. Here is an example:
  34. // ```
  35. // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
  36. // (operand "$dst", 4));
  37. // ```
  38. // It represents the following instruction encoding:
  39. // ```
  40. // MSB LSB
  41. // 1101<encoding for operand src>1001<encoding for operand dst>
  42. // ```
  43. // For more details about DAG operators in the above snippet, please
  44. // refer to \file include/llvm/Target/Target.td.
  45. //
  46. // VarLenCodeEmitter will convert the above DAG into the same helper function
  47. // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
  48. // for few details).
  49. //
  50. //===----------------------------------------------------------------------===//
  51. #include "VarLenCodeEmitterGen.h"
  52. #include "CodeGenHwModes.h"
  53. #include "CodeGenInstruction.h"
  54. #include "CodeGenTarget.h"
  55. #include "InfoByHwMode.h"
  56. #include "llvm/ADT/ArrayRef.h"
  57. #include "llvm/ADT/DenseMap.h"
  58. #include "llvm/Support/raw_ostream.h"
  59. #include "llvm/TableGen/Error.h"
  60. using namespace llvm;
  61. namespace {
  62. class VarLenCodeEmitterGen {
  63. RecordKeeper &Records;
  64. DenseMap<Record *, VarLenInst> VarLenInsts;
  65. // Emit based values (i.e. fixed bits in the encoded instructions)
  66. void emitInstructionBaseValues(
  67. raw_ostream &OS,
  68. ArrayRef<const CodeGenInstruction *> NumberedInstructions,
  69. CodeGenTarget &Target, int HwMode = -1);
  70. std::string getInstructionCase(Record *R, CodeGenTarget &Target);
  71. std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
  72. CodeGenTarget &Target);
  73. public:
  74. explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
  75. void run(raw_ostream &OS);
  76. };
  77. } // end anonymous namespace
  78. // Get the name of custom encoder or decoder, if there is any.
  79. // Returns `{encoder name, decoder name}`.
  80. static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {
  81. std::pair<StringRef, StringRef> Result;
  82. for (const auto *Arg : Args) {
  83. const auto *DI = dyn_cast<DagInit>(Arg);
  84. if (!DI)
  85. continue;
  86. const Init *Op = DI->getOperator();
  87. if (!isa<DefInit>(Op))
  88. continue;
  89. // syntax: `(<encoder | decoder> "function name")`
  90. StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
  91. if (OpName != "encoder" && OpName != "decoder")
  92. continue;
  93. if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))
  94. PrintFatalError("expected '" + OpName +
  95. "' directive to be followed by a custom function name.");
  96. StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
  97. if (OpName == "encoder")
  98. Result.first = FuncName;
  99. else
  100. Result.second = FuncName;
  101. }
  102. return Result;
  103. }
  104. VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
  105. : TheDef(TheDef), NumBits(0U) {
  106. buildRec(DI);
  107. for (const auto &S : Segments)
  108. NumBits += S.BitWidth;
  109. }
  110. void VarLenInst::buildRec(const DagInit *DI) {
  111. assert(TheDef && "The def record is nullptr ?");
  112. std::string Op = DI->getOperator()->getAsString();
  113. if (Op == "ascend" || Op == "descend") {
  114. bool Reverse = Op == "descend";
  115. int i = Reverse ? DI->getNumArgs() - 1 : 0;
  116. int e = Reverse ? -1 : DI->getNumArgs();
  117. int s = Reverse ? -1 : 1;
  118. for (; i != e; i += s) {
  119. const Init *Arg = DI->getArg(i);
  120. if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
  121. if (!BI->isComplete())
  122. PrintFatalError(TheDef->getLoc(),
  123. "Expecting complete bits init in `" + Op + "`");
  124. Segments.push_back({BI->getNumBits(), BI});
  125. } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
  126. if (!BI->isConcrete())
  127. PrintFatalError(TheDef->getLoc(),
  128. "Expecting concrete bit init in `" + Op + "`");
  129. Segments.push_back({1, BI});
  130. } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
  131. buildRec(SubDI);
  132. } else {
  133. PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
  134. Op + "`: " + Arg->getAsString());
  135. }
  136. }
  137. } else if (Op == "operand") {
  138. // (operand <operand name>, <# of bits>,
  139. // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
  140. if (DI->getNumArgs() < 2)
  141. PrintFatalError(TheDef->getLoc(),
  142. "Expecting at least 2 arguments for `operand`");
  143. HasDynamicSegment = true;
  144. const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
  145. if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
  146. PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");
  147. auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
  148. if (NumBitsVal <= 0)
  149. PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");
  150. auto [CustomEncoder, CustomDecoder] =
  151. getCustomCoders(DI->getArgs().slice(2));
  152. Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
  153. CustomEncoder, CustomDecoder});
  154. } else if (Op == "slice") {
  155. // (slice <operand name>, <high / low bit>, <low / high bit>,
  156. // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
  157. if (DI->getNumArgs() < 3)
  158. PrintFatalError(TheDef->getLoc(),
  159. "Expecting at least 3 arguments for `slice`");
  160. HasDynamicSegment = true;
  161. Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
  162. *LoBit = DI->getArg(2);
  163. if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
  164. !isa<IntInit>(LoBit))
  165. PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");
  166. auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
  167. LoBitVal = cast<IntInit>(LoBit)->getValue();
  168. if (HiBitVal < 0 || LoBitVal < 0)
  169. PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
  170. bool NeedSwap = false;
  171. unsigned NumBits = 0U;
  172. if (HiBitVal < LoBitVal) {
  173. NeedSwap = true;
  174. NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
  175. } else {
  176. NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
  177. }
  178. auto [CustomEncoder, CustomDecoder] =
  179. getCustomCoders(DI->getArgs().slice(3));
  180. if (NeedSwap) {
  181. // Normalization: Hi bit should always be the second argument.
  182. Init *const NewArgs[] = {OperandName, LoBit, HiBit};
  183. Segments.push_back({NumBits,
  184. DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
  185. CustomEncoder, CustomDecoder});
  186. } else {
  187. Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
  188. }
  189. }
  190. }
  191. void VarLenCodeEmitterGen::run(raw_ostream &OS) {
  192. CodeGenTarget Target(Records);
  193. auto Insts = Records.getAllDerivedDefinitions("Instruction");
  194. auto NumberedInstructions = Target.getInstructionsByEnumValue();
  195. const CodeGenHwModes &HWM = Target.getHwModes();
  196. // The set of HwModes used by instruction encodings.
  197. std::set<unsigned> HwModes;
  198. for (const CodeGenInstruction *CGI : NumberedInstructions) {
  199. Record *R = CGI->TheDef;
  200. // Create the corresponding VarLenInst instance.
  201. if (R->getValueAsString("Namespace") == "TargetOpcode" ||
  202. R->getValueAsBit("isPseudo"))
  203. continue;
  204. if (const RecordVal *RV = R->getValue("EncodingInfos")) {
  205. if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
  206. EncodingInfoByHwMode EBM(DI->getDef(), HWM);
  207. for (auto &KV : EBM) {
  208. HwModes.insert(KV.first);
  209. Record *EncodingDef = KV.second;
  210. RecordVal *RV = EncodingDef->getValue("Inst");
  211. DagInit *DI = cast<DagInit>(RV->getValue());
  212. VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)});
  213. }
  214. continue;
  215. }
  216. }
  217. RecordVal *RV = R->getValue("Inst");
  218. DagInit *DI = cast<DagInit>(RV->getValue());
  219. VarLenInsts.insert({R, VarLenInst(DI, RV)});
  220. }
  221. // Emit function declaration
  222. OS << "void " << Target.getName()
  223. << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
  224. << " SmallVectorImpl<MCFixup> &Fixups,\n"
  225. << " APInt &Inst,\n"
  226. << " APInt &Scratch,\n"
  227. << " const MCSubtargetInfo &STI) const {\n";
  228. // Emit instruction base values
  229. if (HwModes.empty()) {
  230. emitInstructionBaseValues(OS, NumberedInstructions, Target);
  231. } else {
  232. for (unsigned HwMode : HwModes)
  233. emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
  234. }
  235. if (!HwModes.empty()) {
  236. OS << " const unsigned **Index;\n";
  237. OS << " const uint64_t *InstBits;\n";
  238. OS << " unsigned HwMode = STI.getHwMode();\n";
  239. OS << " switch (HwMode) {\n";
  240. OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
  241. for (unsigned I : HwModes) {
  242. OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
  243. << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
  244. }
  245. OS << " };\n";
  246. }
  247. // Emit helper function to retrieve base values.
  248. OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
  249. << " unsigned NumBits = Index[Opcode][0];\n"
  250. << " if (!NumBits)\n"
  251. << " return APInt::getZeroWidth();\n"
  252. << " unsigned Idx = Index[Opcode][1];\n"
  253. << " ArrayRef<uint64_t> Data(&InstBits[Idx], "
  254. << "APInt::getNumWords(NumBits));\n"
  255. << " return APInt(NumBits, Data);\n"
  256. << " };\n";
  257. // Map to accumulate all the cases.
  258. std::map<std::string, std::vector<std::string>> CaseMap;
  259. // Construct all cases statement for each opcode
  260. for (Record *R : Insts) {
  261. if (R->getValueAsString("Namespace") == "TargetOpcode" ||
  262. R->getValueAsBit("isPseudo"))
  263. continue;
  264. std::string InstName =
  265. (R->getValueAsString("Namespace") + "::" + R->getName()).str();
  266. std::string Case = getInstructionCase(R, Target);
  267. CaseMap[Case].push_back(std::move(InstName));
  268. }
  269. // Emit initial function code
  270. OS << " const unsigned opcode = MI.getOpcode();\n"
  271. << " switch (opcode) {\n";
  272. // Emit each case statement
  273. for (const auto &C : CaseMap) {
  274. const std::string &Case = C.first;
  275. const auto &InstList = C.second;
  276. ListSeparator LS("\n");
  277. for (const auto &InstName : InstList)
  278. OS << LS << " case " << InstName << ":";
  279. OS << " {\n";
  280. OS << Case;
  281. OS << " break;\n"
  282. << " }\n";
  283. }
  284. // Default case: unhandled opcode
  285. OS << " default:\n"
  286. << " std::string msg;\n"
  287. << " raw_string_ostream Msg(msg);\n"
  288. << " Msg << \"Not supported instr: \" << MI;\n"
  289. << " report_fatal_error(Msg.str().c_str());\n"
  290. << " }\n";
  291. OS << "}\n\n";
  292. }
  293. static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
  294. unsigned &Index) {
  295. if (!Bits.getNumWords()) {
  296. IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
  297. return;
  298. }
  299. IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
  300. << "/*Index*/" << Index << "},";
  301. SS.indent(4);
  302. for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
  303. SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
  304. }
  305. void VarLenCodeEmitterGen::emitInstructionBaseValues(
  306. raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
  307. CodeGenTarget &Target, int HwMode) {
  308. std::string IndexArray, StorageArray;
  309. raw_string_ostream IS(IndexArray), SS(StorageArray);
  310. const CodeGenHwModes &HWM = Target.getHwModes();
  311. if (HwMode == -1) {
  312. IS << " static const unsigned Index[][2] = {\n";
  313. SS << " static const uint64_t InstBits[] = {\n";
  314. } else {
  315. StringRef Name = HWM.getMode(HwMode).Name;
  316. IS << " static const unsigned Index_" << Name << "[][2] = {\n";
  317. SS << " static const uint64_t InstBits_" << Name << "[] = {\n";
  318. }
  319. unsigned NumFixedValueWords = 0U;
  320. for (const CodeGenInstruction *CGI : NumberedInstructions) {
  321. Record *R = CGI->TheDef;
  322. if (R->getValueAsString("Namespace") == "TargetOpcode" ||
  323. R->getValueAsBit("isPseudo")) {
  324. IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
  325. continue;
  326. }
  327. Record *EncodingDef = R;
  328. if (const RecordVal *RV = R->getValue("EncodingInfos")) {
  329. if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
  330. EncodingInfoByHwMode EBM(DI->getDef(), HWM);
  331. if (EBM.hasMode(HwMode))
  332. EncodingDef = EBM.get(HwMode);
  333. }
  334. }
  335. auto It = VarLenInsts.find(EncodingDef);
  336. if (It == VarLenInsts.end())
  337. PrintFatalError(EncodingDef, "VarLenInst not found for this record");
  338. const VarLenInst &VLI = It->second;
  339. unsigned i = 0U, BitWidth = VLI.size();
  340. // Start by filling in fixed values.
  341. APInt Value(BitWidth, 0);
  342. auto SI = VLI.begin(), SE = VLI.end();
  343. // Scan through all the segments that have fixed-bits values.
  344. while (i < BitWidth && SI != SE) {
  345. unsigned SegmentNumBits = SI->BitWidth;
  346. if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
  347. for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
  348. auto *B = cast<BitInit>(BI->getBit(Idx));
  349. Value.setBitVal(i + Idx, B->getValue());
  350. }
  351. }
  352. if (const auto *BI = dyn_cast<BitInit>(SI->Value))
  353. Value.setBitVal(i, BI->getValue());
  354. i += SegmentNumBits;
  355. ++SI;
  356. }
  357. emitInstBits(IS, SS, Value, NumFixedValueWords);
  358. IS << '\t' << "// " << R->getName() << "\n";
  359. if (Value.getNumWords())
  360. SS << '\t' << "// " << R->getName() << "\n";
  361. }
  362. IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n";
  363. SS.indent(4) << "UINT64_C(0)\n };\n";
  364. OS << IS.str() << SS.str();
  365. }
  366. std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
  367. CodeGenTarget &Target) {
  368. std::string Case;
  369. if (const RecordVal *RV = R->getValue("EncodingInfos")) {
  370. if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
  371. const CodeGenHwModes &HWM = Target.getHwModes();
  372. EncodingInfoByHwMode EBM(DI->getDef(), HWM);
  373. Case += " switch (HwMode) {\n";
  374. Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
  375. for (auto &KV : EBM) {
  376. Case += " case " + itostr(KV.first) + ": {\n";
  377. Case += getInstructionCaseForEncoding(R, KV.second, Target);
  378. Case += " break;\n";
  379. Case += " }\n";
  380. }
  381. Case += " }\n";
  382. return Case;
  383. }
  384. }
  385. return getInstructionCaseForEncoding(R, R, Target);
  386. }
  387. std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
  388. Record *R, Record *EncodingDef, CodeGenTarget &Target) {
  389. auto It = VarLenInsts.find(EncodingDef);
  390. if (It == VarLenInsts.end())
  391. PrintFatalError(EncodingDef, "Parsed encoding record not found");
  392. const VarLenInst &VLI = It->second;
  393. size_t BitWidth = VLI.size();
  394. CodeGenInstruction &CGI = Target.getInstruction(R);
  395. std::string Case;
  396. raw_string_ostream SS(Case);
  397. // Resize the scratch buffer.
  398. if (BitWidth && !VLI.isFixedValueOnly())
  399. SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n";
  400. // Populate based value.
  401. SS.indent(6) << "Inst = getInstBits(opcode);\n";
  402. // Process each segment in VLI.
  403. size_t Offset = 0U;
  404. for (const auto &ES : VLI) {
  405. unsigned NumBits = ES.BitWidth;
  406. const Init *Val = ES.Value;
  407. // If it's a StringInit or DagInit, it's a reference to an operand
  408. // or part of an operand.
  409. if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
  410. StringRef OperandName;
  411. unsigned LoBit = 0U;
  412. if (const auto *SV = dyn_cast<StringInit>(Val)) {
  413. OperandName = SV->getValue();
  414. } else {
  415. // Normalized: (slice <operand name>, <high bit>, <low bit>)
  416. const auto *DV = cast<DagInit>(Val);
  417. OperandName = cast<StringInit>(DV->getArg(0))->getValue();
  418. LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
  419. }
  420. auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
  421. unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
  422. StringRef CustomEncoder =
  423. CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
  424. if (ES.CustomEncoder.size())
  425. CustomEncoder = ES.CustomEncoder;
  426. SS.indent(6) << "Scratch.clearAllBits();\n";
  427. SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
  428. if (CustomEncoder.empty())
  429. SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
  430. << utostr(FlatOpIdx) << ")";
  431. else
  432. SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
  433. SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
  434. SS.indent(6) << "Inst.insertBits("
  435. << "Scratch.extractBits(" << utostr(NumBits) << ", "
  436. << utostr(LoBit) << ")"
  437. << ", " << Offset << ");\n";
  438. }
  439. Offset += NumBits;
  440. }
  441. StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
  442. if (!PostEmitter.empty())
  443. SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
  444. return Case;
  445. }
  446. namespace llvm {
  447. void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
  448. VarLenCodeEmitterGen(R).run(OS);
  449. }
  450. } // end namespace llvm