123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- /// \file Pass to config the shape of AMX physical registers
- /// AMX register need to be configured before use. Before FastRegAllocation pass
- /// the ldtilecfg instruction is inserted, however at that time we don't
- /// know the shape of each physical tile registers, because the register
- /// allocation is not done yet. This pass runs after register allocation
- /// pass. It collects the shape information of each physical tile register
- /// and store the shape in the stack slot that is allocated for load config
- /// to tile config register.
- //
- //===----------------------------------------------------------------------===//
- #include "X86.h"
- #include "X86InstrBuilder.h"
- #include "X86MachineFunctionInfo.h"
- #include "X86RegisterInfo.h"
- #include "X86Subtarget.h"
- #include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/Passes.h"
- #include "llvm/CodeGen/TargetInstrInfo.h"
- #include "llvm/CodeGen/TargetRegisterInfo.h"
- #include "llvm/InitializePasses.h"
- using namespace llvm;
- #define DEBUG_TYPE "fasttileconfig"
- namespace {
- class X86FastTileConfig : public MachineFunctionPass {
- // context
- MachineFunction *MF = nullptr;
- const TargetInstrInfo *TII = nullptr;
- MachineRegisterInfo *MRI = nullptr;
- const TargetRegisterInfo *TRI = nullptr;
- X86MachineFunctionInfo *X86FI = nullptr;
- bool configBasicBlock(MachineBasicBlock &MBB);
- public:
- X86FastTileConfig() : MachineFunctionPass(ID) {}
- /// Return the pass name.
- StringRef getPassName() const override {
- return "Fast Tile Register Configure";
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- /// Perform register allocation.
- bool runOnMachineFunction(MachineFunction &MFunc) override;
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoPHIs);
- }
- static char ID;
- };
- } // end anonymous namespace
- char X86FastTileConfig::ID = 0;
- INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
- "Fast Tile Register Configure", false, false)
- INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
- "Fast Tile Register Configure", false, false)
- static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
- // There is no phi instruction after register allocation.
- assert(MI.isPHI() == false);
- // The instruction must have 3 operands: tile def, row, col.
- // It should be AMX pseudo instruction that have shape operand.
- if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
- !MI.isPseudo())
- return false;
- MachineOperand &MO = MI.getOperand(0);
- if (MO.isReg()) {
- Register Reg = MO.getReg();
- // FIXME it may be used after Greedy RA and the physical
- // register is not rewritten yet.
- if (Reg.isVirtual() &&
- MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
- return true;
- if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
- return true;
- }
- return false;
- }
- // PreTileConfig should configure the tile registers based on basic
- // block.
- bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
- bool Change = false;
- SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
- for (MachineInstr &MI : reverse(MBB)) {
- if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
- continue;
- // AMX instructions that define tile register.
- if (MI.getOpcode() != X86::PLDTILECFGV) {
- MachineOperand &Row = MI.getOperand(1);
- MachineOperand &Col = MI.getOperand(2);
- unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
- ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
- } else { // PLDTILECFGV
- // Rewrite the shape information to memory. Stack slot should have
- // been initialized to zero in pre config.
- int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
- for (auto &ShapeInfo : ShapeInfos) {
- DebugLoc DL;
- unsigned TMMIdx = ShapeInfo.first;
- Register RowReg = ShapeInfo.second.getRow()->getReg();
- Register ColReg = ShapeInfo.second.getCol()->getReg();
- // Here is the data format for the tile config.
- // 0 palette
- // 1 start_row
- // 2-15 reserved, must be zero
- // 16-17 tile0.colsb Tile 0 bytes per row.
- // 18-19 tile1.colsb Tile 1 bytes per row.
- // 20-21 tile2.colsb Tile 2 bytes per row.
- // ... (sequence continues)
- // 30-31 tile7.colsb Tile 7 bytes per row.
- // 32-47 reserved, must be zero
- // 48 tile0.rows Tile 0 rows.
- // 49 tile1.rows Tile 1 rows.
- // 50 tile2.rows Tile 2 rows.
- // ... (sequence continues)
- // 55 tile7.rows Tile 7 rows.
- // 56-63 reserved, must be zero
- int RowOffset = 48 + TMMIdx;
- int ColOffset = 16 + TMMIdx * 2;
- Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
- BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
- MachineInstrBuilder StoreRow =
- BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
- addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
- MachineInstrBuilder StoreCol =
- BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
- addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
- }
- ShapeInfos.clear();
- Change = true;
- }
- }
- if (Change)
- X86FI->setHasVirtualTileReg(true);
- return Change;
- }
- bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
- MF = &MFunc;
- MRI = &MFunc.getRegInfo();
- const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
- TRI = ST->getRegisterInfo();
- TII = MFunc.getSubtarget().getInstrInfo();
- X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
- bool Change = false;
- // Loop over all of the basic blocks, eliminating virtual register references
- for (MachineBasicBlock &MBB : MFunc)
- Change |= configBasicBlock(MBB);
- return Change;
- }
- FunctionPass *llvm::createX86FastTileConfigPass() {
- return new X86FastTileConfig();
- }
|