X86FastPreTileConfig.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file Pass to preconfig the shape of physical tile registers
  10. /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
  11. /// walk each instruction of basic block in reverse order. All the tile
  12. /// registers that live out the basic block would be spilled and reloaded
  13. /// before its user. It also check the depenedency of the shape to ensure
  14. /// the shape is defined before ldtilecfg.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "X86.h"
  18. #include "X86InstrBuilder.h"
  19. #include "X86MachineFunctionInfo.h"
  20. #include "X86RegisterInfo.h"
  21. #include "X86Subtarget.h"
  22. #include "llvm/ADT/DepthFirstIterator.h"
  23. #include "llvm/ADT/PostOrderIterator.h"
  24. #include "llvm/ADT/Statistic.h"
  25. #include "llvm/CodeGen/MachineFrameInfo.h"
  26. #include "llvm/CodeGen/MachineFunctionPass.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineRegisterInfo.h"
  29. #include "llvm/CodeGen/Passes.h"
  30. #include "llvm/CodeGen/TargetInstrInfo.h"
  31. #include "llvm/CodeGen/TargetRegisterInfo.h"
  32. #include "llvm/InitializePasses.h"
  33. #include "llvm/Support/Debug.h"
  34. using namespace llvm;
  35. #define DEBUG_TYPE "fastpretileconfig"
  36. STATISTIC(NumStores, "Number of stores added");
  37. STATISTIC(NumLoads, "Number of loads added");
  38. namespace {
  39. class X86FastPreTileConfig : public MachineFunctionPass {
  40. MachineFunction *MF = nullptr;
  41. const X86Subtarget *ST = nullptr;
  42. const TargetInstrInfo *TII = nullptr;
  43. MachineRegisterInfo *MRI = nullptr;
  44. X86MachineFunctionInfo *X86FI = nullptr;
  45. MachineFrameInfo *MFI = nullptr;
  46. const TargetRegisterInfo *TRI = nullptr;
  47. MachineBasicBlock *MBB = nullptr;
  48. int CfgSS = -1;
  49. struct PHIInfo {
  50. Register Row;
  51. Register Col;
  52. Register StackAddr;
  53. };
  54. DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
  55. /// Maps virtual regs to the frame index where these values are spilled.
  56. IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
  57. /// Has a bit set for tile virtual register for which it was determined
  58. /// that it is alive across blocks.
  59. BitVector MayLiveAcrossBlocks;
  60. int getStackSpaceFor(Register VirtReg);
  61. void InitializeTileConfigStackSpace();
  62. bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
  63. void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
  64. void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
  65. MachineOperand *RowMO, MachineOperand *ColMO);
  66. void canonicalizePHIs(MachineBasicBlock &MBB);
  67. void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
  68. void convertPHIs(MachineBasicBlock &MBB);
  69. bool configBasicBlock(MachineBasicBlock &MBB);
  70. public:
  71. X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
  72. /// Return the pass name.
  73. StringRef getPassName() const override {
  74. return "Fast Tile Register Preconfigure";
  75. }
  76. /// Perform tile register configure.
  77. bool runOnMachineFunction(MachineFunction &MFunc) override;
  78. static char ID;
  79. };
  80. } // end anonymous namespace
  81. char X86FastPreTileConfig::ID = 0;
  82. INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
  83. "Fast Tile Register Preconfigure", false, false)
  84. INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
  85. "Fast Tile Register Preconfigure", false, false)
  86. static bool dominates(MachineBasicBlock &MBB,
  87. MachineBasicBlock::const_iterator A,
  88. MachineBasicBlock::const_iterator B) {
  89. auto MBBEnd = MBB.end();
  90. if (B == MBBEnd)
  91. return true;
  92. MachineBasicBlock::const_iterator I = MBB.begin();
  93. for (; &*I != A && &*I != B; ++I)
  94. ;
  95. return &*I == A;
  96. }
  97. /// This allocates space for the specified virtual register to be held on the
  98. /// stack.
  99. int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
  100. // Find the location Reg would belong...
  101. int SS = StackSlotForVirtReg[VirtReg];
  102. // Already has space allocated?
  103. if (SS != -1)
  104. return SS;
  105. // Allocate a new stack object for this spill location...
  106. const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
  107. unsigned Size = TRI->getSpillSize(RC);
  108. Align Alignment = TRI->getSpillAlign(RC);
  109. int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
  110. // Assign the slot.
  111. StackSlotForVirtReg[VirtReg] = FrameIdx;
  112. return FrameIdx;
  113. }
  114. /// Returns false if \p VirtReg is known to not live out of the current config.
  115. /// If \p VirtReg live out of the current MBB, it must live out of the current
  116. /// config
  117. bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
  118. if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
  119. return true;
  120. for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
  121. if (UseInst.getParent() != MBB) {
  122. MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
  123. return true;
  124. }
  125. // The use and def are in the same MBB. If the tile register is
  126. // reconfigured, it is crobbered and we need to spill and reload
  127. // tile register.
  128. if (CfgMI) {
  129. if (dominates(*MBB, *CfgMI, UseInst)) {
  130. MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
  131. return true;
  132. }
  133. }
  134. }
  135. return false;
  136. }
  137. void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
  138. MachineBasicBlock &MBB = MF->front();
  139. MachineInstr *MI = &*MBB.getFirstNonPHI();
  140. DebugLoc DL;
  141. if (ST->hasAVX512()) {
  142. Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
  143. BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
  144. addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
  145. .addReg(Zmm);
  146. } else if (ST->hasAVX2()) {
  147. Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
  148. BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
  149. addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
  150. .addReg(Ymm);
  151. addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
  152. 32)
  153. .addReg(Ymm);
  154. } else {
  155. assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
  156. unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
  157. Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
  158. BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
  159. addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
  160. .addReg(Xmm);
  161. addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
  162. .addReg(Xmm);
  163. addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
  164. .addReg(Xmm);
  165. addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
  166. .addReg(Xmm);
  167. }
  168. // Fill in the palette first.
  169. addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
  170. .addImm(1);
  171. }
  172. /// Insert spill instruction for \p AssignedReg before \p Before.
  173. /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
  174. void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
  175. Register VirtReg, bool Kill) {
  176. LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
  177. int FI = getStackSpaceFor(VirtReg);
  178. LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
  179. const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
  180. // Don't need shape information for tile store, becasue it is adjacent to
  181. // the tile def instruction.
  182. TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
  183. Register());
  184. ++NumStores;
  185. // TODO: update DBG_VALUEs
  186. }
  187. /// Insert reload instruction for \p PhysReg before \p Before.
  188. void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
  189. Register OrigReg, MachineOperand *RowMO,
  190. MachineOperand *ColMO) {
  191. int FI = getStackSpaceFor(OrigReg);
  192. const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
  193. Register TileReg;
  194. // Fold copy to tileload
  195. // BB1:
  196. // spill src to s
  197. //
  198. // BB2:
  199. // t = copy src
  200. // -->
  201. // t = tileload (s)
  202. if (UseMI->isCopy())
  203. TileReg = UseMI->getOperand(0).getReg();
  204. else
  205. TileReg = MRI->createVirtualRegister(&RC);
  206. // Can't use TII->loadRegFromStackSlot(), because we need the shape
  207. // information for reload.
  208. // tileloadd (%sp, %idx), %tmm
  209. unsigned Opc = X86::PTILELOADDV;
  210. Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
  211. // FIXME: MBB is not the parent of UseMI.
  212. MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
  213. TII->get(X86::MOV64ri), StrideReg)
  214. .addImm(64);
  215. NewMI = addFrameReference(
  216. BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
  217. .addReg(RowMO->getReg())
  218. .addReg(ColMO->getReg()),
  219. FI);
  220. MachineOperand &MO = NewMI->getOperand(5);
  221. MO.setReg(StrideReg);
  222. MO.setIsKill(true);
  223. RowMO->setIsKill(false);
  224. ColMO->setIsKill(false);
  225. // Erase copy instruction after it is folded.
  226. if (UseMI->isCopy()) {
  227. UseMI->eraseFromParent();
  228. } else {
  229. // Replace the register in the user MI.
  230. for (auto &MO : UseMI->operands()) {
  231. if (MO.isReg() && MO.getReg() == OrigReg)
  232. MO.setReg(TileReg);
  233. }
  234. }
  235. ++NumLoads;
  236. LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
  237. << printReg(TileReg, TRI) << '\n');
  238. }
  239. static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
  240. // The instruction must have 3 operands: tile def, row, col.
  241. if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
  242. return false;
  243. MachineOperand &MO = MI.getOperand(0);
  244. if (MO.isReg()) {
  245. Register Reg = MO.getReg();
  246. // FIXME it may be used after Greedy RA and the physical
  247. // register is not rewritten yet.
  248. if (Reg.isVirtual() &&
  249. MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
  250. return true;
  251. if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
  252. return true;
  253. }
  254. return false;
  255. }
  256. static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
  257. MachineInstr *MI = MRI->getVRegDef(TileReg);
  258. if (isTileDef(MRI, *MI)) {
  259. MachineOperand *RowMO = &MI->getOperand(1);
  260. MachineOperand *ColMO = &MI->getOperand(2);
  261. return ShapeT(RowMO, ColMO, MRI);
  262. } else if (MI->isCopy()) {
  263. TileReg = MI->getOperand(1).getReg();
  264. return getShape(MRI, TileReg);
  265. }
  266. // The def should not be PHI node, because we walk the MBB in reverse post
  267. // order.
  268. assert(MI->isPHI() && "Unexpected PHI when get shape.");
  269. llvm_unreachable("Unexpected MI when get shape.");
  270. }
  271. // BB0:
  272. // spill t0 to s0
  273. // BB1:
  274. // spill t1 to s1
  275. //
  276. // BB2:
  277. // t = phi [t0, bb0] [t1, bb1]
  278. // -->
  279. // row = phi [r0, bb0] [r1, bb1]
  280. // col = phi [c0, bb0] [c1, bb1]
  281. // s = phi [s0, bb0] [s1, bb1]
  282. // t = tileload row, col, s
  283. // The new instruction is inserted at the end of the phi node. The order
  284. // of the original phi node is not ensured.
  285. void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
  286. MachineInstr &PHI) {
  287. // 1. Create instruction to get stack slot address of each incoming block.
  288. // 2. Create PHI node for the stack address.
  289. // 3. Create PHI node for shape. If one of the incoming shape is immediate
  290. // use the immediate and delete the PHI node.
  291. // 4. Create tileload instruction from the stack address.
  292. Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
  293. MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
  294. TII->get(X86::PHI), StackAddrReg);
  295. Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
  296. MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
  297. TII->get(X86::PHI), RowReg);
  298. Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
  299. MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
  300. TII->get(X86::PHI), ColReg);
  301. // Record the mapping of phi node and its row/column information.
  302. VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
  303. for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
  304. // Get the 2 incoming value of tile register and MBB.
  305. Register InTileReg = PHI.getOperand(I).getReg();
  306. // Mark it as liveout, so that it will be spilled when visit
  307. // the incoming MBB. Otherwise since phi will be deleted, it
  308. // would miss spill when visit incoming MBB.
  309. MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
  310. MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
  311. MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
  312. MachineBasicBlock::iterator InsertPos;
  313. if (TileDefMI->isPHI()) {
  314. InsertPos = TileDefMI->getParent()->getFirstNonPHI();
  315. if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
  316. // def t1
  317. // / \
  318. // def t2 t3 = phi(t1, t4) <--
  319. // \ / |
  320. // t4 = phi(t2, t3)-------------
  321. //
  322. // For each (row, column and stack address) append phi incoming value.
  323. // Create r3 = phi(r1, r4)
  324. // Create r4 = phi(r2, r3)
  325. Register InRowReg = VisitedPHIs[TileDefMI].Row;
  326. Register InColReg = VisitedPHIs[TileDefMI].Col;
  327. Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
  328. RowPHI.addReg(InRowReg).addMBB(InMBB);
  329. ColPHI.addReg(InColReg).addMBB(InMBB);
  330. AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
  331. continue;
  332. } else {
  333. // Recursively convert PHI to tileload
  334. convertPHI(TileDefMI->getParent(), *TileDefMI);
  335. // The PHI node is coverted to tileload instruction. Get the stack
  336. // address from tileload operands.
  337. MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
  338. assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
  339. Register InRowReg = TileLoad->getOperand(1).getReg();
  340. Register InColReg = TileLoad->getOperand(2).getReg();
  341. Register InStackAddrReg = TileLoad->getOperand(3).getReg();
  342. RowPHI.addReg(InRowReg).addMBB(InMBB);
  343. ColPHI.addReg(InColReg).addMBB(InMBB);
  344. AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
  345. }
  346. } else {
  347. InsertPos = TileDefMI->getIterator();
  348. // Fill the incoming operand of row/column phi instruction.
  349. ShapeT Shape = getShape(MRI, InTileReg);
  350. Shape.getRow()->setIsKill(false);
  351. Shape.getCol()->setIsKill(false);
  352. RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
  353. ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
  354. // The incoming tile register live out of its def BB, it would be spilled.
  355. // Create MI to get the spill stack slot address for the tile register
  356. int FI = getStackSpaceFor(InTileReg);
  357. Register InStackAddrReg =
  358. MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
  359. addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
  360. TII->get(X86::LEA64r), InStackAddrReg)
  361. .addFrameIndex(FI),
  362. 0);
  363. AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
  364. }
  365. }
  366. MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
  367. Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
  368. BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
  369. .addImm(64);
  370. Register TileReg = PHI.getOperand(0).getReg();
  371. MachineInstr *NewMI = addDirectMem(
  372. BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
  373. .addReg(RowReg)
  374. .addReg(ColReg),
  375. StackAddrReg);
  376. MachineOperand &MO = NewMI->getOperand(5);
  377. MO.setReg(StrideReg);
  378. MO.setIsKill(true);
  379. PHI.eraseFromParent();
  380. VisitedPHIs.erase(&PHI);
  381. }
  382. static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
  383. MachineOperand &MO = MI.getOperand(0);
  384. if (MO.isReg() && MO.getReg().isVirtual() &&
  385. MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
  386. return true;
  387. return false;
  388. }
  389. void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
  390. SmallVector<MachineInstr *, 8> PHIs;
  391. for (MachineInstr &MI : MBB) {
  392. if (!MI.isPHI())
  393. break;
  394. if (!isTileRegDef(MRI, MI))
  395. continue;
  396. PHIs.push_back(&MI);
  397. }
  398. // Canonicalize the phi node first. One tile phi may depeneds previous
  399. // phi node. For below case, we need convert %t4.
  400. //
  401. // BB0:
  402. // %t3 = phi (t1 BB1, t2 BB0)
  403. // %t4 = phi (t5 BB1, t3 BB0)
  404. // -->
  405. // %t3 = phi (t1 BB1, t2 BB0)
  406. // %t4 = phi (t5 BB1, t2 BB0)
  407. //
  408. while (!PHIs.empty()) {
  409. MachineInstr *PHI = PHIs.pop_back_val();
  410. // Find the operand that is incoming from the same MBB and the def
  411. // is also phi node.
  412. MachineOperand *InMO = nullptr;
  413. MachineInstr *DefMI = nullptr;
  414. for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
  415. Register InTileReg = PHI->getOperand(I).getReg();
  416. MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
  417. DefMI = MRI->getVRegDef(InTileReg);
  418. if (InMBB != &MBB || !DefMI->isPHI())
  419. continue;
  420. InMO = &PHI->getOperand(I);
  421. break;
  422. }
  423. // If can't find such operand, do nothing.
  424. if (!InMO)
  425. continue;
  426. // Current phi node depends on previous phi node. Break the
  427. // dependency.
  428. Register DefTileReg;
  429. for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
  430. MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
  431. if (InMBB != &MBB)
  432. continue;
  433. DefTileReg = DefMI->getOperand(I).getReg();
  434. InMO->setReg(DefTileReg);
  435. break;
  436. }
  437. }
  438. }
  439. void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
  440. SmallVector<MachineInstr *, 8> PHIs;
  441. for (MachineInstr &MI : MBB) {
  442. if (!MI.isPHI())
  443. break;
  444. if (!isTileRegDef(MRI, MI))
  445. continue;
  446. PHIs.push_back(&MI);
  447. }
  448. while (!PHIs.empty()) {
  449. MachineInstr *MI = PHIs.pop_back_val();
  450. VisitedPHIs.clear();
  451. convertPHI(&MBB, *MI);
  452. }
  453. }
  454. // PreTileConfig should configure the tile registers based on basic
  455. // block.
  456. bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
  457. this->MBB = &MBB;
  458. bool Change = false;
  459. MachineInstr *LastShapeMI = nullptr;
  460. MachineInstr *LastTileCfg = nullptr;
  461. bool HasUnconfigTile = false;
  462. auto Config = [&](MachineInstr &Before) {
  463. if (CfgSS == -1)
  464. CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
  465. ST->getTileConfigAlignment(), false);
  466. LastTileCfg = addFrameReference(
  467. BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
  468. LastShapeMI = nullptr;
  469. Change = true;
  470. };
  471. auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
  472. for (const MachineOperand &MO : MI.operands()) {
  473. if (!MO.isReg())
  474. continue;
  475. Register Reg = MO.getReg();
  476. if (Reg.isVirtual() &&
  477. MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
  478. return true;
  479. }
  480. return false;
  481. };
  482. for (MachineInstr &MI : reverse(MBB)) {
  483. // We have transformed phi node before configuring BB.
  484. if (MI.isPHI())
  485. break;
  486. // Don't collect the shape of used tile, the tile should be defined
  487. // before the tile use. Spill and reload would happen if there is only
  488. // tile use after ldtilecfg, so the shape can be collected from reload.
  489. // Take below code for example. %t would be reloaded before tilestore
  490. // call
  491. // ....
  492. // tilestore %r, %c, %t
  493. // -->
  494. // call
  495. // ldtilecfg
  496. // %t = tileload %r, %c
  497. // tilestore %r, %c, %t
  498. if (HasTileOperand(MRI, MI))
  499. HasUnconfigTile = true;
  500. // According to AMX ABI, all the tile registers including config register
  501. // are volatile. Caller need to save/restore config register.
  502. if (MI.isCall() && HasUnconfigTile) {
  503. MachineBasicBlock::iterator I;
  504. if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
  505. I = ++LastShapeMI->getIterator();
  506. else
  507. I = ++MI.getIterator();
  508. Config(*I);
  509. HasUnconfigTile = false;
  510. continue;
  511. }
  512. if (!isTileDef(MRI, MI))
  513. continue;
  514. //
  515. //---------------------------------------------------------------------
  516. // Don't handle COPY instruction. If the src and dst of the COPY can be
  517. // in the same config in below case, we just check the shape of t0.
  518. // def row0
  519. // def col0
  520. // ldtilecfg
  521. // t0 = tielzero(row0, col0)
  522. // t1 = copy t0
  523. // ...
  524. // If the src and dst of the COPY can NOT be in the same config in below
  525. // case. Reload would be generated befor the copy instruction.
  526. // def row0
  527. // def col0
  528. // t0 = tielzero(row0, col0)
  529. // spill t0
  530. // ...
  531. // def row1
  532. // def col1
  533. // ldtilecfg
  534. // t1 = tilezero(row1, col1)
  535. // reload t0
  536. // t1 = copy t0
  537. //---------------------------------------------------------------------
  538. //
  539. // If MI dominate the last shape def instruction, we need insert
  540. // ldtilecfg after LastShapeMI now. The config doesn't include
  541. // current MI.
  542. // def row0
  543. // def col0
  544. // tilezero(row0, col0) <- MI
  545. // def row1
  546. // def col1
  547. // ldtilecfg <- insert
  548. // tilezero(row1, col1)
  549. if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
  550. Config(*(++LastShapeMI->getIterator()));
  551. MachineOperand *RowMO = &MI.getOperand(1);
  552. MachineOperand *ColMO = &MI.getOperand(2);
  553. MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
  554. MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
  555. // If the shape is defined in current MBB, check the domination.
  556. // FIXME how about loop?
  557. if (RowMI->getParent() == &MBB) {
  558. if (!LastShapeMI)
  559. LastShapeMI = RowMI;
  560. else if (dominates(MBB, LastShapeMI, RowMI))
  561. LastShapeMI = RowMI;
  562. }
  563. if (ColMI->getParent() == &MBB) {
  564. if (!LastShapeMI)
  565. LastShapeMI = ColMI;
  566. else if (dominates(MBB, LastShapeMI, ColMI))
  567. LastShapeMI = ColMI;
  568. }
  569. // If there is user live out of the tilecfg, spill it and reload in
  570. // before the user.
  571. Register TileReg = MI.getOperand(0).getReg();
  572. if (mayLiveOut(TileReg, LastTileCfg))
  573. spill(++MI.getIterator(), TileReg, false);
  574. for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
  575. if (UseMI.getParent() == &MBB) {
  576. // check user should not across ldtilecfg
  577. if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
  578. continue;
  579. // reload befor UseMI
  580. reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
  581. } else {
  582. // Don't reload for phi instruction, we handle phi reload separately.
  583. // TODO: merge the reload for the same user MBB.
  584. if (!UseMI.isPHI())
  585. reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
  586. }
  587. }
  588. }
  589. // Configure tile registers at the head of the MBB
  590. if (HasUnconfigTile) {
  591. MachineInstr *Before;
  592. if (LastShapeMI == nullptr || LastShapeMI->isPHI())
  593. Before = &*MBB.getFirstNonPHI();
  594. else
  595. Before = &*(++LastShapeMI->getIterator());
  596. Config(*Before);
  597. }
  598. return Change;
  599. }
  600. bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
  601. MF = &MFunc;
  602. MRI = &MFunc.getRegInfo();
  603. ST = &MFunc.getSubtarget<X86Subtarget>();
  604. TII = ST->getInstrInfo();
  605. X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
  606. MFI = &MFunc.getFrameInfo();
  607. TRI = ST->getRegisterInfo();
  608. CfgSS = -1;
  609. unsigned NumVirtRegs = MRI->getNumVirtRegs();
  610. // Abandon early if there is no tile register to config.
  611. bool HasVirtTileReg = false;
  612. for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
  613. Register VirtReg = Register::index2VirtReg(I);
  614. if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
  615. HasVirtTileReg = true;
  616. break;
  617. }
  618. }
  619. if (!HasVirtTileReg)
  620. return false;
  621. StackSlotForVirtReg.resize(NumVirtRegs);
  622. MayLiveAcrossBlocks.clear();
  623. // We will create register during config. *3 is to make sure
  624. // the virtual register number doesn't exceed the size of
  625. // the bit vector.
  626. MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
  627. bool Change = false;
  628. assert(MRI->isSSA());
  629. // Canonicalize the phi node first.
  630. for (MachineBasicBlock &MBB : MFunc)
  631. canonicalizePHIs(MBB);
  632. // Loop over all of the basic blocks in reverse post order and insert
  633. // ldtilecfg for tile registers. The reserse post order is to facilitate
  634. // PHI node convert.
  635. ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
  636. for (MachineBasicBlock *MBB : RPOT) {
  637. convertPHIs(*MBB);
  638. Change |= configBasicBlock(*MBB);
  639. }
  640. if (Change)
  641. InitializeTileConfigStackSpace();
  642. StackSlotForVirtReg.clear();
  643. return Change;
  644. }
  645. FunctionPass *llvm::createX86FastPreTileConfigPass() {
  646. return new X86FastPreTileConfig();
  647. }