123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710 |
- //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- /// \file Pass to preconfig the shape of physical tile registers
- /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
- /// walk each instruction of basic block in reverse order. All the tile
- /// registers that live out the basic block would be spilled and reloaded
- /// before its user. It also check the depenedency of the shape to ensure
- /// the shape is defined before ldtilecfg.
- //
- //===----------------------------------------------------------------------===//
- #include "X86.h"
- #include "X86InstrBuilder.h"
- #include "X86MachineFunctionInfo.h"
- #include "X86RegisterInfo.h"
- #include "X86Subtarget.h"
- #include "llvm/ADT/DepthFirstIterator.h"
- #include "llvm/ADT/PostOrderIterator.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/Passes.h"
- #include "llvm/CodeGen/TargetInstrInfo.h"
- #include "llvm/CodeGen/TargetRegisterInfo.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Support/Debug.h"
- using namespace llvm;
- #define DEBUG_TYPE "fastpretileconfig"
- STATISTIC(NumStores, "Number of stores added");
- STATISTIC(NumLoads, "Number of loads added");
- namespace {
- class X86FastPreTileConfig : public MachineFunctionPass {
- MachineFunction *MF = nullptr;
- const X86Subtarget *ST = nullptr;
- const TargetInstrInfo *TII = nullptr;
- MachineRegisterInfo *MRI = nullptr;
- X86MachineFunctionInfo *X86FI = nullptr;
- MachineFrameInfo *MFI = nullptr;
- const TargetRegisterInfo *TRI = nullptr;
- MachineBasicBlock *MBB = nullptr;
- int CfgSS = -1;
- struct PHIInfo {
- Register Row;
- Register Col;
- Register StackAddr;
- };
- DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
- /// Maps virtual regs to the frame index where these values are spilled.
- IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
- /// Has a bit set for tile virtual register for which it was determined
- /// that it is alive across blocks.
- BitVector MayLiveAcrossBlocks;
- int getStackSpaceFor(Register VirtReg);
- void InitializeTileConfigStackSpace();
- bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
- void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
- void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
- MachineOperand *RowMO, MachineOperand *ColMO);
- void canonicalizePHIs(MachineBasicBlock &MBB);
- void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
- void convertPHIs(MachineBasicBlock &MBB);
- bool configBasicBlock(MachineBasicBlock &MBB);
- public:
- X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
- /// Return the pass name.
- StringRef getPassName() const override {
- return "Fast Tile Register Preconfigure";
- }
- /// Perform tile register configure.
- bool runOnMachineFunction(MachineFunction &MFunc) override;
- static char ID;
- };
- } // end anonymous namespace
- char X86FastPreTileConfig::ID = 0;
- INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
- "Fast Tile Register Preconfigure", false, false)
- INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
- "Fast Tile Register Preconfigure", false, false)
- static bool dominates(MachineBasicBlock &MBB,
- MachineBasicBlock::const_iterator A,
- MachineBasicBlock::const_iterator B) {
- auto MBBEnd = MBB.end();
- if (B == MBBEnd)
- return true;
- MachineBasicBlock::const_iterator I = MBB.begin();
- for (; &*I != A && &*I != B; ++I)
- ;
- return &*I == A;
- }
- /// This allocates space for the specified virtual register to be held on the
- /// stack.
- int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
- // Find the location Reg would belong...
- int SS = StackSlotForVirtReg[VirtReg];
- // Already has space allocated?
- if (SS != -1)
- return SS;
- // Allocate a new stack object for this spill location...
- const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- unsigned Size = TRI->getSpillSize(RC);
- Align Alignment = TRI->getSpillAlign(RC);
- int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
- // Assign the slot.
- StackSlotForVirtReg[VirtReg] = FrameIdx;
- return FrameIdx;
- }
- /// Returns false if \p VirtReg is known to not live out of the current config.
- /// If \p VirtReg live out of the current MBB, it must live out of the current
- /// config
- bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
- if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
- return true;
- for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
- if (UseInst.getParent() != MBB) {
- MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
- return true;
- }
- // The use and def are in the same MBB. If the tile register is
- // reconfigured, it is crobbered and we need to spill and reload
- // tile register.
- if (CfgMI) {
- if (dominates(*MBB, *CfgMI, UseInst)) {
- MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
- return true;
- }
- }
- }
- return false;
- }
- void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
- MachineBasicBlock &MBB = MF->front();
- MachineInstr *MI = &*MBB.getFirstNonPHI();
- DebugLoc DL;
- if (ST->hasAVX512()) {
- Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
- BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
- .addReg(Zmm);
- } else if (ST->hasAVX2()) {
- Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
- BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
- .addReg(Ymm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
- 32)
- .addReg(Ymm);
- } else {
- assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
- unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
- Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
- BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
- .addReg(Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
- .addReg(Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
- .addReg(Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
- .addReg(Xmm);
- }
- // Fill in the palette first.
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
- .addImm(1);
- }
- /// Insert spill instruction for \p AssignedReg before \p Before.
- /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
- void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
- Register VirtReg, bool Kill) {
- LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
- int FI = getStackSpaceFor(VirtReg);
- LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
- const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- // Don't need shape information for tile store, becasue it is adjacent to
- // the tile def instruction.
- TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
- Register());
- ++NumStores;
- // TODO: update DBG_VALUEs
- }
- /// Insert reload instruction for \p PhysReg before \p Before.
- void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
- Register OrigReg, MachineOperand *RowMO,
- MachineOperand *ColMO) {
- int FI = getStackSpaceFor(OrigReg);
- const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
- Register TileReg;
- // Fold copy to tileload
- // BB1:
- // spill src to s
- //
- // BB2:
- // t = copy src
- // -->
- // t = tileload (s)
- if (UseMI->isCopy())
- TileReg = UseMI->getOperand(0).getReg();
- else
- TileReg = MRI->createVirtualRegister(&RC);
- // Can't use TII->loadRegFromStackSlot(), because we need the shape
- // information for reload.
- // tileloadd (%sp, %idx), %tmm
- unsigned Opc = X86::PTILELOADDV;
- Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
- // FIXME: MBB is not the parent of UseMI.
- MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
- TII->get(X86::MOV64ri), StrideReg)
- .addImm(64);
- NewMI = addFrameReference(
- BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
- .addReg(RowMO->getReg())
- .addReg(ColMO->getReg()),
- FI);
- MachineOperand &MO = NewMI->getOperand(5);
- MO.setReg(StrideReg);
- MO.setIsKill(true);
- RowMO->setIsKill(false);
- ColMO->setIsKill(false);
- // Erase copy instruction after it is folded.
- if (UseMI->isCopy()) {
- UseMI->eraseFromParent();
- } else {
- // Replace the register in the user MI.
- for (auto &MO : UseMI->operands()) {
- if (MO.isReg() && MO.getReg() == OrigReg)
- MO.setReg(TileReg);
- }
- }
- ++NumLoads;
- LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
- << printReg(TileReg, TRI) << '\n');
- }
- static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
- // The instruction must have 3 operands: tile def, row, col.
- if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
- return false;
- MachineOperand &MO = MI.getOperand(0);
- if (MO.isReg()) {
- Register Reg = MO.getReg();
- // FIXME it may be used after Greedy RA and the physical
- // register is not rewritten yet.
- if (Reg.isVirtual() &&
- MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
- return true;
- if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
- return true;
- }
- return false;
- }
- static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
- MachineInstr *MI = MRI->getVRegDef(TileReg);
- if (isTileDef(MRI, *MI)) {
- MachineOperand *RowMO = &MI->getOperand(1);
- MachineOperand *ColMO = &MI->getOperand(2);
- return ShapeT(RowMO, ColMO, MRI);
- } else if (MI->isCopy()) {
- TileReg = MI->getOperand(1).getReg();
- return getShape(MRI, TileReg);
- }
- // The def should not be PHI node, because we walk the MBB in reverse post
- // order.
- assert(MI->isPHI() && "Unexpected PHI when get shape.");
- llvm_unreachable("Unexpected MI when get shape.");
- }
- // BB0:
- // spill t0 to s0
- // BB1:
- // spill t1 to s1
- //
- // BB2:
- // t = phi [t0, bb0] [t1, bb1]
- // -->
- // row = phi [r0, bb0] [r1, bb1]
- // col = phi [c0, bb0] [c1, bb1]
- // s = phi [s0, bb0] [s1, bb1]
- // t = tileload row, col, s
- // The new instruction is inserted at the end of the phi node. The order
- // of the original phi node is not ensured.
- void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
- MachineInstr &PHI) {
- // 1. Create instruction to get stack slot address of each incoming block.
- // 2. Create PHI node for the stack address.
- // 3. Create PHI node for shape. If one of the incoming shape is immediate
- // use the immediate and delete the PHI node.
- // 4. Create tileload instruction from the stack address.
- Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
- MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
- TII->get(X86::PHI), StackAddrReg);
- Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
- MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
- TII->get(X86::PHI), RowReg);
- Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
- MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
- TII->get(X86::PHI), ColReg);
- // Record the mapping of phi node and its row/column information.
- VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
- for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
- // Get the 2 incoming value of tile register and MBB.
- Register InTileReg = PHI.getOperand(I).getReg();
- // Mark it as liveout, so that it will be spilled when visit
- // the incoming MBB. Otherwise since phi will be deleted, it
- // would miss spill when visit incoming MBB.
- MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
- MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
- MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
- MachineBasicBlock::iterator InsertPos;
- if (TileDefMI->isPHI()) {
- InsertPos = TileDefMI->getParent()->getFirstNonPHI();
- if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
- // def t1
- // / \
- // def t2 t3 = phi(t1, t4) <--
- // \ / |
- // t4 = phi(t2, t3)-------------
- //
- // For each (row, column and stack address) append phi incoming value.
- // Create r3 = phi(r1, r4)
- // Create r4 = phi(r2, r3)
- Register InRowReg = VisitedPHIs[TileDefMI].Row;
- Register InColReg = VisitedPHIs[TileDefMI].Col;
- Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
- RowPHI.addReg(InRowReg).addMBB(InMBB);
- ColPHI.addReg(InColReg).addMBB(InMBB);
- AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
- continue;
- } else {
- // Recursively convert PHI to tileload
- convertPHI(TileDefMI->getParent(), *TileDefMI);
- // The PHI node is coverted to tileload instruction. Get the stack
- // address from tileload operands.
- MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
- assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
- Register InRowReg = TileLoad->getOperand(1).getReg();
- Register InColReg = TileLoad->getOperand(2).getReg();
- Register InStackAddrReg = TileLoad->getOperand(3).getReg();
- RowPHI.addReg(InRowReg).addMBB(InMBB);
- ColPHI.addReg(InColReg).addMBB(InMBB);
- AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
- }
- } else {
- InsertPos = TileDefMI->getIterator();
- // Fill the incoming operand of row/column phi instruction.
- ShapeT Shape = getShape(MRI, InTileReg);
- Shape.getRow()->setIsKill(false);
- Shape.getCol()->setIsKill(false);
- RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
- ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
- // The incoming tile register live out of its def BB, it would be spilled.
- // Create MI to get the spill stack slot address for the tile register
- int FI = getStackSpaceFor(InTileReg);
- Register InStackAddrReg =
- MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
- addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
- TII->get(X86::LEA64r), InStackAddrReg)
- .addFrameIndex(FI),
- 0);
- AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
- }
- }
- MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
- Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
- BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
- .addImm(64);
- Register TileReg = PHI.getOperand(0).getReg();
- MachineInstr *NewMI = addDirectMem(
- BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
- .addReg(RowReg)
- .addReg(ColReg),
- StackAddrReg);
- MachineOperand &MO = NewMI->getOperand(5);
- MO.setReg(StrideReg);
- MO.setIsKill(true);
- PHI.eraseFromParent();
- VisitedPHIs.erase(&PHI);
- }
- static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
- MachineOperand &MO = MI.getOperand(0);
- if (MO.isReg() && MO.getReg().isVirtual() &&
- MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
- return true;
- return false;
- }
- void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
- SmallVector<MachineInstr *, 8> PHIs;
- for (MachineInstr &MI : MBB) {
- if (!MI.isPHI())
- break;
- if (!isTileRegDef(MRI, MI))
- continue;
- PHIs.push_back(&MI);
- }
- // Canonicalize the phi node first. One tile phi may depeneds previous
- // phi node. For below case, we need convert %t4.
- //
- // BB0:
- // %t3 = phi (t1 BB1, t2 BB0)
- // %t4 = phi (t5 BB1, t3 BB0)
- // -->
- // %t3 = phi (t1 BB1, t2 BB0)
- // %t4 = phi (t5 BB1, t2 BB0)
- //
- while (!PHIs.empty()) {
- MachineInstr *PHI = PHIs.pop_back_val();
- // Find the operand that is incoming from the same MBB and the def
- // is also phi node.
- MachineOperand *InMO = nullptr;
- MachineInstr *DefMI = nullptr;
- for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
- Register InTileReg = PHI->getOperand(I).getReg();
- MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
- DefMI = MRI->getVRegDef(InTileReg);
- if (InMBB != &MBB || !DefMI->isPHI())
- continue;
- InMO = &PHI->getOperand(I);
- break;
- }
- // If can't find such operand, do nothing.
- if (!InMO)
- continue;
- // Current phi node depends on previous phi node. Break the
- // dependency.
- Register DefTileReg;
- for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
- MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
- if (InMBB != &MBB)
- continue;
- DefTileReg = DefMI->getOperand(I).getReg();
- InMO->setReg(DefTileReg);
- break;
- }
- }
- }
- void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
- SmallVector<MachineInstr *, 8> PHIs;
- for (MachineInstr &MI : MBB) {
- if (!MI.isPHI())
- break;
- if (!isTileRegDef(MRI, MI))
- continue;
- PHIs.push_back(&MI);
- }
- while (!PHIs.empty()) {
- MachineInstr *MI = PHIs.pop_back_val();
- VisitedPHIs.clear();
- convertPHI(&MBB, *MI);
- }
- }
- // PreTileConfig should configure the tile registers based on basic
- // block.
- bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
- this->MBB = &MBB;
- bool Change = false;
- MachineInstr *LastShapeMI = nullptr;
- MachineInstr *LastTileCfg = nullptr;
- bool HasUnconfigTile = false;
- auto Config = [&](MachineInstr &Before) {
- if (CfgSS == -1)
- CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
- ST->getTileConfigAlignment(), false);
- LastTileCfg = addFrameReference(
- BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
- LastShapeMI = nullptr;
- Change = true;
- };
- auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (Reg.isVirtual() &&
- MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
- return true;
- }
- return false;
- };
- for (MachineInstr &MI : reverse(MBB)) {
- // We have transformed phi node before configuring BB.
- if (MI.isPHI())
- break;
- // Don't collect the shape of used tile, the tile should be defined
- // before the tile use. Spill and reload would happen if there is only
- // tile use after ldtilecfg, so the shape can be collected from reload.
- // Take below code for example. %t would be reloaded before tilestore
- // call
- // ....
- // tilestore %r, %c, %t
- // -->
- // call
- // ldtilecfg
- // %t = tileload %r, %c
- // tilestore %r, %c, %t
- if (HasTileOperand(MRI, MI))
- HasUnconfigTile = true;
- // According to AMX ABI, all the tile registers including config register
- // are volatile. Caller need to save/restore config register.
- if (MI.isCall() && HasUnconfigTile) {
- MachineBasicBlock::iterator I;
- if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
- I = ++LastShapeMI->getIterator();
- else
- I = ++MI.getIterator();
- Config(*I);
- HasUnconfigTile = false;
- continue;
- }
- if (!isTileDef(MRI, MI))
- continue;
- //
- //---------------------------------------------------------------------
- // Don't handle COPY instruction. If the src and dst of the COPY can be
- // in the same config in below case, we just check the shape of t0.
- // def row0
- // def col0
- // ldtilecfg
- // t0 = tielzero(row0, col0)
- // t1 = copy t0
- // ...
- // If the src and dst of the COPY can NOT be in the same config in below
- // case. Reload would be generated befor the copy instruction.
- // def row0
- // def col0
- // t0 = tielzero(row0, col0)
- // spill t0
- // ...
- // def row1
- // def col1
- // ldtilecfg
- // t1 = tilezero(row1, col1)
- // reload t0
- // t1 = copy t0
- //---------------------------------------------------------------------
- //
- // If MI dominate the last shape def instruction, we need insert
- // ldtilecfg after LastShapeMI now. The config doesn't include
- // current MI.
- // def row0
- // def col0
- // tilezero(row0, col0) <- MI
- // def row1
- // def col1
- // ldtilecfg <- insert
- // tilezero(row1, col1)
- if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
- Config(*(++LastShapeMI->getIterator()));
- MachineOperand *RowMO = &MI.getOperand(1);
- MachineOperand *ColMO = &MI.getOperand(2);
- MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
- MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
- // If the shape is defined in current MBB, check the domination.
- // FIXME how about loop?
- if (RowMI->getParent() == &MBB) {
- if (!LastShapeMI)
- LastShapeMI = RowMI;
- else if (dominates(MBB, LastShapeMI, RowMI))
- LastShapeMI = RowMI;
- }
- if (ColMI->getParent() == &MBB) {
- if (!LastShapeMI)
- LastShapeMI = ColMI;
- else if (dominates(MBB, LastShapeMI, ColMI))
- LastShapeMI = ColMI;
- }
- // If there is user live out of the tilecfg, spill it and reload in
- // before the user.
- Register TileReg = MI.getOperand(0).getReg();
- if (mayLiveOut(TileReg, LastTileCfg))
- spill(++MI.getIterator(), TileReg, false);
- for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
- if (UseMI.getParent() == &MBB) {
- // check user should not across ldtilecfg
- if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
- continue;
- // reload befor UseMI
- reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
- } else {
- // Don't reload for phi instruction, we handle phi reload separately.
- // TODO: merge the reload for the same user MBB.
- if (!UseMI.isPHI())
- reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
- }
- }
- }
- // Configure tile registers at the head of the MBB
- if (HasUnconfigTile) {
- MachineInstr *Before;
- if (LastShapeMI == nullptr || LastShapeMI->isPHI())
- Before = &*MBB.getFirstNonPHI();
- else
- Before = &*(++LastShapeMI->getIterator());
- Config(*Before);
- }
- return Change;
- }
- bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
- MF = &MFunc;
- MRI = &MFunc.getRegInfo();
- ST = &MFunc.getSubtarget<X86Subtarget>();
- TII = ST->getInstrInfo();
- X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
- MFI = &MFunc.getFrameInfo();
- TRI = ST->getRegisterInfo();
- CfgSS = -1;
- unsigned NumVirtRegs = MRI->getNumVirtRegs();
- // Abandon early if there is no tile register to config.
- bool HasVirtTileReg = false;
- for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
- Register VirtReg = Register::index2VirtReg(I);
- if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
- HasVirtTileReg = true;
- break;
- }
- }
- if (!HasVirtTileReg)
- return false;
- StackSlotForVirtReg.resize(NumVirtRegs);
- MayLiveAcrossBlocks.clear();
- // We will create register during config. *3 is to make sure
- // the virtual register number doesn't exceed the size of
- // the bit vector.
- MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
- bool Change = false;
- assert(MRI->isSSA());
- // Canonicalize the phi node first.
- for (MachineBasicBlock &MBB : MFunc)
- canonicalizePHIs(MBB);
- // Loop over all of the basic blocks in reverse post order and insert
- // ldtilecfg for tile registers. The reserse post order is to facilitate
- // PHI node convert.
- ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
- for (MachineBasicBlock *MBB : RPOT) {
- convertPHIs(*MBB);
- Change |= configBasicBlock(*MBB);
- }
- if (Change)
- InitializeTileConfigStackSpace();
- StackSlotForVirtReg.clear();
- return Change;
- }
- FunctionPass *llvm::createX86FastPreTileConfigPass() {
- return new X86FastPreTileConfig();
- }
|