X86FastTileConfig.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file Pass to config the shape of AMX physical registers
  10. /// AMX register need to be configured before use. Before FastRegAllocation pass
  11. /// the ldtilecfg instruction is inserted, however at that time we don't
  12. /// know the shape of each physical tile registers, because the register
  13. /// allocation is not done yet. This pass runs after register allocation
  14. /// pass. It collects the shape information of each physical tile register
  15. /// and store the shape in the stack slot that is allocated for load config
  16. /// to tile config register.
  17. //
  18. //===----------------------------------------------------------------------===//
  19. #include "X86.h"
  20. #include "X86InstrBuilder.h"
  21. #include "X86MachineFunctionInfo.h"
  22. #include "X86RegisterInfo.h"
  23. #include "X86Subtarget.h"
  24. #include "llvm/CodeGen/MachineFrameInfo.h"
  25. #include "llvm/CodeGen/MachineFunctionPass.h"
  26. #include "llvm/CodeGen/MachineInstr.h"
  27. #include "llvm/CodeGen/MachineRegisterInfo.h"
  28. #include "llvm/CodeGen/Passes.h"
  29. #include "llvm/CodeGen/TargetInstrInfo.h"
  30. #include "llvm/CodeGen/TargetRegisterInfo.h"
  31. #include "llvm/InitializePasses.h"
  32. using namespace llvm;
  33. #define DEBUG_TYPE "fasttileconfig"
  34. namespace {
  35. class X86FastTileConfig : public MachineFunctionPass {
  36. // context
  37. MachineFunction *MF = nullptr;
  38. const TargetInstrInfo *TII = nullptr;
  39. MachineRegisterInfo *MRI = nullptr;
  40. const TargetRegisterInfo *TRI = nullptr;
  41. X86MachineFunctionInfo *X86FI = nullptr;
  42. bool configBasicBlock(MachineBasicBlock &MBB);
  43. public:
  44. X86FastTileConfig() : MachineFunctionPass(ID) {}
  45. /// Return the pass name.
  46. StringRef getPassName() const override {
  47. return "Fast Tile Register Configure";
  48. }
  49. void getAnalysisUsage(AnalysisUsage &AU) const override {
  50. AU.setPreservesAll();
  51. MachineFunctionPass::getAnalysisUsage(AU);
  52. }
  53. /// Perform register allocation.
  54. bool runOnMachineFunction(MachineFunction &MFunc) override;
  55. MachineFunctionProperties getRequiredProperties() const override {
  56. return MachineFunctionProperties().set(
  57. MachineFunctionProperties::Property::NoPHIs);
  58. }
  59. static char ID;
  60. };
  61. } // end anonymous namespace
  62. char X86FastTileConfig::ID = 0;
  63. INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
  64. "Fast Tile Register Configure", false, false)
  65. INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
  66. "Fast Tile Register Configure", false, false)
  67. static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
  68. // There is no phi instruction after register allocation.
  69. assert(MI.isPHI() == false);
  70. // The instruction must have 3 operands: tile def, row, col.
  71. // It should be AMX pseudo instruction that have shape operand.
  72. if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
  73. !MI.isPseudo())
  74. return false;
  75. MachineOperand &MO = MI.getOperand(0);
  76. if (MO.isReg()) {
  77. Register Reg = MO.getReg();
  78. // FIXME it may be used after Greedy RA and the physical
  79. // register is not rewritten yet.
  80. if (Reg.isVirtual() &&
  81. MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
  82. return true;
  83. if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
  84. return true;
  85. }
  86. return false;
  87. }
  88. // PreTileConfig should configure the tile registers based on basic
  89. // block.
  90. bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
  91. bool Change = false;
  92. SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
  93. for (MachineInstr &MI : reverse(MBB)) {
  94. if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
  95. continue;
  96. // AMX instructions that define tile register.
  97. if (MI.getOpcode() != X86::PLDTILECFGV) {
  98. MachineOperand &Row = MI.getOperand(1);
  99. MachineOperand &Col = MI.getOperand(2);
  100. unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
  101. ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
  102. } else { // PLDTILECFGV
  103. // Rewrite the shape information to memory. Stack slot should have
  104. // been initialized to zero in pre config.
  105. int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
  106. for (auto &ShapeInfo : ShapeInfos) {
  107. DebugLoc DL;
  108. unsigned TMMIdx = ShapeInfo.first;
  109. Register RowReg = ShapeInfo.second.getRow()->getReg();
  110. Register ColReg = ShapeInfo.second.getCol()->getReg();
  111. // Here is the data format for the tile config.
  112. // 0 palette
  113. // 1 start_row
  114. // 2-15 reserved, must be zero
  115. // 16-17 tile0.colsb Tile 0 bytes per row.
  116. // 18-19 tile1.colsb Tile 1 bytes per row.
  117. // 20-21 tile2.colsb Tile 2 bytes per row.
  118. // ... (sequence continues)
  119. // 30-31 tile7.colsb Tile 7 bytes per row.
  120. // 32-47 reserved, must be zero
  121. // 48 tile0.rows Tile 0 rows.
  122. // 49 tile1.rows Tile 1 rows.
  123. // 50 tile2.rows Tile 2 rows.
  124. // ... (sequence continues)
  125. // 55 tile7.rows Tile 7 rows.
  126. // 56-63 reserved, must be zero
  127. int RowOffset = 48 + TMMIdx;
  128. int ColOffset = 16 + TMMIdx * 2;
  129. Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
  130. BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
  131. MachineInstrBuilder StoreRow =
  132. BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
  133. addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
  134. MachineInstrBuilder StoreCol =
  135. BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
  136. addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
  137. }
  138. ShapeInfos.clear();
  139. Change = true;
  140. }
  141. }
  142. if (Change)
  143. X86FI->setHasVirtualTileReg(true);
  144. return Change;
  145. }
  146. bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
  147. MF = &MFunc;
  148. MRI = &MFunc.getRegInfo();
  149. const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
  150. TRI = ST->getRegisterInfo();
  151. TII = MFunc.getSubtarget().getInstrInfo();
  152. X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
  153. bool Change = false;
  154. // Loop over all of the basic blocks, eliminating virtual register references
  155. for (MachineBasicBlock &MBB : MFunc)
  156. Change |= configBasicBlock(MBB);
  157. return Change;
  158. }
  159. FunctionPass *llvm::createX86FastTileConfigPass() {
  160. return new X86FastTileConfig();
  161. }