X86FastTileConfig.cpp 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file Pass to config the shape of AMX physical registers
  10. /// AMX register need to be configured before use. Before FastRegAllocation pass
  11. /// the ldtilecfg instruction is inserted, however at that time we don't
  12. /// know the shape of each physical tile registers, because the register
  13. /// allocation is not done yet. This pass runs after register allocation
  14. /// pass. It collects the shape information of each physical tile register
  15. /// and store the shape in the stack slot that is allocated for load config
  16. /// to tile config register.
  17. //
  18. //===----------------------------------------------------------------------===//
  19. #include "X86.h"
  20. #include "X86InstrBuilder.h"
  21. #include "X86MachineFunctionInfo.h"
  22. #include "X86RegisterInfo.h"
  23. #include "X86Subtarget.h"
  24. #include "llvm/CodeGen/MachineFrameInfo.h"
  25. #include "llvm/CodeGen/MachineFunctionPass.h"
  26. #include "llvm/CodeGen/MachineInstr.h"
  27. #include "llvm/CodeGen/MachineRegisterInfo.h"
  28. #include "llvm/CodeGen/Passes.h"
  29. #include "llvm/CodeGen/TargetInstrInfo.h"
  30. #include "llvm/CodeGen/TargetRegisterInfo.h"
  31. #include "llvm/InitializePasses.h"
  32. using namespace llvm;
  33. #define DEBUG_TYPE "fasttileconfig"
  34. namespace {
  35. class X86FastTileConfig : public MachineFunctionPass {
  36. // context
  37. MachineFunction *MF = nullptr;
  38. const X86Subtarget *ST = nullptr;
  39. const TargetRegisterInfo *TRI = nullptr;
  40. const TargetInstrInfo *TII = nullptr;
  41. MachineRegisterInfo *MRI = nullptr;
  42. X86MachineFunctionInfo *X86FI = nullptr;
  43. MachineInstr *getTileConfigPoint();
  44. void tileConfig();
  45. public:
  46. X86FastTileConfig() : MachineFunctionPass(ID) {}
  47. bool fastTileConfig();
  48. bool isTileLoad(MachineInstr &MI);
  49. bool isTileStore(MachineInstr &MI);
  50. bool isAMXInstr(MachineInstr &MI);
  51. MachineInstr *getKeyAMXInstr(MachineInstr *MI);
  52. void getTileShapesCfg(MachineInstr *MI,
  53. SmallVector<MachineOperand *> &ShapedTiles);
  54. void getShapeCfgInstrs(MachineInstr *MI,
  55. std::map<unsigned, MachineInstr *> &RowCfgs,
  56. std::map<unsigned, MachineInstr *> &ColCfgs);
  57. /// Return the pass name.
  58. StringRef getPassName() const override {
  59. return "Fast Tile Register Configure";
  60. }
  61. void materializeTileCfg(MachineInstr *MI);
  62. void rewriteTileCfg(SmallVector<MachineOperand *> &ShapedTiles,
  63. std::map<unsigned, MachineInstr *> &RowCfgs,
  64. std::map<unsigned, MachineInstr *> &ColCfgs);
  65. /// Perform register allocation.
  66. bool runOnMachineFunction(MachineFunction &MFunc) override;
  67. MachineFunctionProperties getRequiredProperties() const override {
  68. return MachineFunctionProperties().set(
  69. MachineFunctionProperties::Property::NoPHIs);
  70. }
  71. static char ID;
  72. };
  73. } // end anonymous namespace
  74. char X86FastTileConfig::ID = 0;
  75. INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
  76. "Fast Tile Register Configure", false, false)
  77. INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
  78. "Fast Tile Register Configure", false, false)
  79. static bool isTilePhysReg(MachineOperand &Op) {
  80. if (!Op.isReg())
  81. return false;
  82. Register Reg = Op.getReg();
  83. if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
  84. return true;
  85. return false;
  86. }
  87. static unsigned getTilePhysRegIdx(MachineOperand *Op) {
  88. assert(isTilePhysReg(*Op) && "Tile Operand is invalid");
  89. return Op->getReg() - X86::TMM0;
  90. }
  91. static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) {
  92. unsigned Offset = 48 + TIdx;
  93. MI->getOperand(3).ChangeToImmediate(Offset);
  94. }
  95. static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) {
  96. unsigned Offset = 16 + TIdx * 2;
  97. MI->getOperand(3).ChangeToImmediate(Offset);
  98. }
  99. bool X86FastTileConfig::isTileLoad(MachineInstr &MI) {
  100. return MI.getOpcode() == X86::PTILELOADDV ||
  101. MI.getOpcode() == X86::PTILELOADDT1V;
  102. }
  103. bool X86FastTileConfig::isTileStore(MachineInstr &MI) {
  104. return MI.getOpcode() == X86::PTILESTOREDV;
  105. }
  106. bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) {
  107. // TODO: May need to handle some special nontile amx instrucion.
  108. if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr())
  109. return false;
  110. return llvm::any_of(MI.operands(), isTilePhysReg);
  111. }
  112. MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) {
  113. auto Cfg = MachineBasicBlock::iterator(MI);
  114. MachineBasicBlock *MBB = MI->getParent();
  115. MachineInstr *KeyMI = nullptr;
  116. int KeyAMXNum = 0;
  117. for (auto II = Cfg; II != MBB->end(); II++) {
  118. if (isTileLoad(*II)) {
  119. KeyMI = &*II;
  120. continue;
  121. }
  122. if (isTileStore(*II)) {
  123. assert(KeyMI && "Key AMX Should be found before!");
  124. break;
  125. }
  126. if (isAMXInstr(*II)) {
  127. assert((KeyAMXNum == 0) && "Too many Key AMX instruction!");
  128. KeyAMXNum++;
  129. KeyMI = &*II;
  130. }
  131. }
  132. assert(KeyMI && "There must be an AMX instruction.");
  133. return KeyMI;
  134. }
  135. // Orderly get the tiles in key amx instruction, uses before defs.
  136. void X86FastTileConfig::getTileShapesCfg(
  137. MachineInstr *CfgMI, SmallVector<MachineOperand *> &ShapedTiles) {
  138. MachineInstr *KeyMI = getKeyAMXInstr(CfgMI);
  139. SmallVector<MachineOperand *> DefTiles;
  140. for (MachineOperand &MO : KeyMI->operands()) {
  141. if (!isTilePhysReg(MO))
  142. continue;
  143. if (MO.isDef())
  144. DefTiles.push_back(&MO);
  145. else
  146. ShapedTiles.push_back(&MO);
  147. }
  148. ShapedTiles.append(DefTiles);
  149. }
  150. // We pre-config the shapes at position named with "amx.tmm.N.shape.row* and
  151. // amx.shape.N.col*" at pass "Pre AMX Tile Config".
  152. // The 'N' implies the order of tiles in key amx intrinsic.
  153. void X86FastTileConfig::getShapeCfgInstrs(
  154. MachineInstr *MI, std::map<unsigned, MachineInstr *> &RowCfgs,
  155. std::map<unsigned, MachineInstr *> &ColCfgs) {
  156. auto Cfg = MachineBasicBlock::iterator(MI);
  157. MachineBasicBlock *MBB = MI->getParent();
  158. for (auto II = Cfg; II != MBB->begin(); II--) {
  159. if (isAMXInstr(*II) || II->isTerminator() || II->isCall())
  160. break;
  161. if (!II->mayStore() || !II->hasOneMemOperand())
  162. continue;
  163. const Value *MemPtr = II->memoperands()[0]->getValue();
  164. if (!MemPtr)
  165. continue;
  166. StringRef Name = MemPtr->getName();
  167. if (!Name.startswith("amx.tmm."))
  168. continue;
  169. // Get the 'N'th tile shape config in key amx instruction.
  170. auto N = Name.find(".shape");
  171. StringRef STileIdx = Name.slice(8, N);
  172. unsigned Idx;
  173. STileIdx.getAsInteger(10, Idx);
  174. // And related them with their store instructions.
  175. if (Name.contains("row"))
  176. RowCfgs[Idx] = &*II;
  177. else if (Name.contains("col"))
  178. ColCfgs[Idx] = &*II;
  179. else
  180. llvm_unreachable("Invalid tile shape info!");
  181. }
  182. assert((RowCfgs.size() == ColCfgs.size()) &&
  183. "The number of tile row and col must be equal!");
  184. }
  185. // Here is the data format for the tile config.
  186. // 0 palette = 1 now.
  187. // 1 start_row = 0 now.
  188. // 2-15 reserved, must be zero
  189. // 16-17 tile0.colsb Tile 0 bytes per row.
  190. // 18-19 tile1.colsb Tile 1 bytes per row.
  191. // 20-21 tile2.colsb Tile 2 bytes per row.
  192. // ... (sequence continues)
  193. // 30-31 tile7.colsb Tile 7 bytes per row.
  194. // 32-47 reserved, must be zero
  195. // 48 tile0.rows Tile 0 rows.
  196. // 49 tile1.rows Tile 1 rows.
  197. // 50 tile2.rows Tile 2 rows.
  198. // ... (sequence continues)
  199. // 55 tile7.rows Tile 7 rows.
  200. // 56-63 reserved, must be zero
  201. void X86FastTileConfig::rewriteTileCfg(
  202. SmallVector<MachineOperand *> &ShapedTiles,
  203. std::map<unsigned, MachineInstr *> &RowCfgs,
  204. std::map<unsigned, MachineInstr *> &ColCfgs) {
  205. assert((RowCfgs.size() == ShapedTiles.size()) &&
  206. "The number of tile shapes not equal with the number of tiles!");
  207. // Orderly get the tiles and adjust the shape config.
  208. for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) {
  209. MachineOperand *MO = ShapedTiles[I];
  210. unsigned TmmIdx = getTilePhysRegIdx(MO);
  211. if (I == TmmIdx)
  212. continue;
  213. adjustRowCfg(TmmIdx, RowCfgs[I]);
  214. adjustColCfg(TmmIdx, ColCfgs[I]);
  215. }
  216. }
  217. // We have already preconfig the shapes before fast register allocation at
  218. // X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register
  219. // allocation, the shapes pre-written before may not rightly corresponding
  220. // to the correct tmm registers, so we need adjust them.
  221. void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) {
  222. SmallVector<MachineOperand *> ShapedTiles;
  223. std::map<unsigned, MachineInstr *> RowCfgs;
  224. std::map<unsigned, MachineInstr *> ColCfgs;
  225. // Orderly keep the tile uses and def in ShapedTiles;
  226. getTileShapesCfg(CfgMI, ShapedTiles);
  227. assert(ShapedTiles.size() && "Not find shapes config!");
  228. getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs);
  229. rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs);
  230. }
  231. bool X86FastTileConfig::fastTileConfig() {
  232. bool Changed = false;
  233. for (MachineBasicBlock &MBB : *MF) {
  234. SmallVector<MachineInstr *, 2> CFGs;
  235. for (MachineInstr &MI : MBB)
  236. if (MI.getOpcode() == X86::PLDTILECFGV)
  237. CFGs.push_back(&MI);
  238. for (auto *MI : CFGs)
  239. materializeTileCfg(MI);
  240. if (!CFGs.empty())
  241. Changed = true;
  242. }
  243. if (Changed)
  244. X86FI->setHasVirtualTileReg(true);
  245. return Changed;
  246. }
  247. bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
  248. MF = &MFunc;
  249. MRI = &MFunc.getRegInfo();
  250. ST = &MFunc.getSubtarget<X86Subtarget>();
  251. TRI = ST->getRegisterInfo();
  252. TII = MFunc.getSubtarget().getInstrInfo();
  253. X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
  254. return fastTileConfig();
  255. }
  256. FunctionPass *llvm::createX86FastTileConfigPass() {
  257. return new X86FastTileConfig();
  258. }