123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780 |
- //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- /// \file
- ///
- /// This file implements the InstrBuilder interface.
- ///
- //===----------------------------------------------------------------------===//
- #include "llvm/MCA/InstrBuilder.h"
- #include "llvm/ADT/APInt.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/MC/MCInst.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/WithColor.h"
- #include "llvm/Support/raw_ostream.h"
- #define DEBUG_TYPE "llvm-mca-instrbuilder"
- namespace llvm {
- namespace mca {
- char RecycledInstErr::ID = 0;
- InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
- const llvm::MCInstrInfo &mcii,
- const llvm::MCRegisterInfo &mri,
- const llvm::MCInstrAnalysis *mcia,
- const mca::InstrumentManager &im)
- : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
- FirstReturnInst(true) {
- const MCSchedModel &SM = STI.getSchedModel();
- ProcResourceMasks.resize(SM.getNumProcResourceKinds());
- computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
- }
- static void initializeUsedResources(InstrDesc &ID,
- const MCSchedClassDesc &SCDesc,
- const MCSubtargetInfo &STI,
- ArrayRef<uint64_t> ProcResourceMasks) {
- const MCSchedModel &SM = STI.getSchedModel();
- // Populate resources consumed.
- using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
- SmallVector<ResourcePlusCycles, 4> Worklist;
- // Track cycles contributed by resources that are in a "Super" relationship.
- // This is required if we want to correctly match the behavior of method
- // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
- // of "consumed" processor resources and resource cycles, the logic in
- // ExpandProcResource() doesn't update the number of resource cycles
- // contributed by a "Super" resource to a group.
- // We need to take this into account when we find that a processor resource is
- // part of a group, and it is also used as the "Super" of other resources.
- // This map stores the number of cycles contributed by sub-resources that are
- // part of a "Super" resource. The key value is the "Super" resource mask ID.
- DenseMap<uint64_t, unsigned> SuperResources;
- unsigned NumProcResources = SM.getNumProcResourceKinds();
- APInt Buffers(NumProcResources, 0);
- bool AllInOrderResources = true;
- bool AnyDispatchHazards = false;
- for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
- const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
- const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
- if (!PRE->Cycles) {
- #ifndef NDEBUG
- WithColor::warning()
- << "Ignoring invalid write of zero cycles on processor resource "
- << PR.Name << "\n";
- WithColor::note() << "found in scheduling class " << SCDesc.Name
- << " (write index #" << I << ")\n";
- #endif
- continue;
- }
- uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
- if (PR.BufferSize < 0) {
- AllInOrderResources = false;
- } else {
- Buffers.setBit(getResourceStateIndex(Mask));
- AnyDispatchHazards |= (PR.BufferSize == 0);
- AllInOrderResources &= (PR.BufferSize <= 1);
- }
- CycleSegment RCy(0, PRE->Cycles, false);
- Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
- if (PR.SuperIdx) {
- uint64_t Super = ProcResourceMasks[PR.SuperIdx];
- SuperResources[Super] += PRE->Cycles;
- }
- }
- ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
- // Sort elements by mask popcount, so that we prioritize resource units over
- // resource groups, and smaller groups over larger groups.
- sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
- unsigned popcntA = llvm::popcount(A.first);
- unsigned popcntB = llvm::popcount(B.first);
- if (popcntA < popcntB)
- return true;
- if (popcntA > popcntB)
- return false;
- return A.first < B.first;
- });
- uint64_t UsedResourceUnits = 0;
- uint64_t UsedResourceGroups = 0;
- uint64_t UnitsFromResourceGroups = 0;
- // Remove cycles contributed by smaller resources, and check if there
- // are partially overlapping resource groups.
- ID.HasPartiallyOverlappingGroups = false;
- for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
- ResourcePlusCycles &A = Worklist[I];
- if (!A.second.size()) {
- assert(llvm::popcount(A.first) > 1 && "Expected a group!");
- UsedResourceGroups |= PowerOf2Floor(A.first);
- continue;
- }
- ID.Resources.emplace_back(A);
- uint64_t NormalizedMask = A.first;
- if (llvm::popcount(A.first) == 1) {
- UsedResourceUnits |= A.first;
- } else {
- // Remove the leading 1 from the resource group mask.
- NormalizedMask ^= PowerOf2Floor(NormalizedMask);
- if (UnitsFromResourceGroups & NormalizedMask)
- ID.HasPartiallyOverlappingGroups = true;
- UnitsFromResourceGroups |= NormalizedMask;
- UsedResourceGroups |= (A.first ^ NormalizedMask);
- }
- for (unsigned J = I + 1; J < E; ++J) {
- ResourcePlusCycles &B = Worklist[J];
- if ((NormalizedMask & B.first) == NormalizedMask) {
- B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
- if (llvm::popcount(B.first) > 1)
- B.second.NumUnits++;
- }
- }
- }
- // A SchedWrite may specify a number of cycles in which a resource group
- // is reserved. For example (on target x86; cpu Haswell):
- //
- // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
- // let ResourceCycles = [2, 2, 3];
- // }
- //
- // This means:
- // Resource units HWPort0 and HWPort1 are both used for 2cy.
- // Resource group HWPort01 is the union of HWPort0 and HWPort1.
- // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
- // will not be usable for 2 entire cycles from instruction issue.
- //
- // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
- // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
- // extra delay on top of the 2 cycles latency.
- // During those extra cycles, HWPort01 is not usable by other instructions.
- for (ResourcePlusCycles &RPC : ID.Resources) {
- if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
- // Remove the leading 1 from the resource group mask.
- uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
- uint64_t MaxResourceUnits = llvm::popcount(Mask);
- if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
- RPC.second.setReserved();
- RPC.second.NumUnits = MaxResourceUnits;
- }
- }
- }
- // Identify extra buffers that are consumed through super resources.
- for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
- for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
- const MCProcResourceDesc &PR = *SM.getProcResource(I);
- if (PR.BufferSize == -1)
- continue;
- uint64_t Mask = ProcResourceMasks[I];
- if (Mask != SR.first && ((Mask & SR.first) == SR.first))
- Buffers.setBit(getResourceStateIndex(Mask));
- }
- }
- ID.UsedBuffers = Buffers.getZExtValue();
- ID.UsedProcResUnits = UsedResourceUnits;
- ID.UsedProcResGroups = UsedResourceGroups;
- LLVM_DEBUG({
- for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
- dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
- << "Reserved=" << R.second.isReserved() << ", "
- << "#Units=" << R.second.NumUnits << ", "
- << "cy=" << R.second.size() << '\n';
- uint64_t BufferIDs = ID.UsedBuffers;
- while (BufferIDs) {
- uint64_t Current = BufferIDs & (-BufferIDs);
- dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
- BufferIDs ^= Current;
- }
- dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
- dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
- << '\n';
- dbgs() << "\t\tHasPartiallyOverlappingGroups="
- << ID.HasPartiallyOverlappingGroups << '\n';
- });
- }
- static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
- const MCSchedClassDesc &SCDesc,
- const MCSubtargetInfo &STI) {
- if (MCDesc.isCall()) {
- // We cannot estimate how long this call will take.
- // Artificially set an arbitrarily high latency (100cy).
- ID.MaxLatency = 100U;
- return;
- }
- int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
- // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
- ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
- }
- static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
- // Count register definitions, and skip non register operands in the process.
- unsigned I, E;
- unsigned NumExplicitDefs = MCDesc.getNumDefs();
- for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
- const MCOperand &Op = MCI.getOperand(I);
- if (Op.isReg())
- --NumExplicitDefs;
- }
- if (NumExplicitDefs) {
- return make_error<InstructionError<MCInst>>(
- "Expected more register operand definitions.", MCI);
- }
- if (MCDesc.hasOptionalDef()) {
- // Always assume that the optional definition is the last operand.
- const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
- if (I == MCI.getNumOperands() || !Op.isReg()) {
- std::string Message =
- "expected a register operand for an optional definition. Instruction "
- "has not been correctly analyzed.";
- return make_error<InstructionError<MCInst>>(Message, MCI);
- }
- }
- return ErrorSuccess();
- }
- void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
- unsigned SchedClassID) {
- const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
- const MCSchedModel &SM = STI.getSchedModel();
- const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
- // Assumptions made by this algorithm:
- // 1. The number of explicit and implicit register definitions in a MCInst
- // matches the number of explicit and implicit definitions according to
- // the opcode descriptor (MCInstrDesc).
- // 2. Uses start at index #(MCDesc.getNumDefs()).
- // 3. There can only be a single optional register definition, an it is
- // either the last operand of the sequence (excluding extra operands
- // contributed by variadic opcodes) or one of the explicit register
- // definitions. The latter occurs for some Thumb1 instructions.
- //
- // These assumptions work quite well for most out-of-order in-tree targets
- // like x86. This is mainly because the vast majority of instructions is
- // expanded to MCInst using a straightforward lowering logic that preserves
- // the ordering of the operands.
- //
- // About assumption 1.
- // The algorithm allows non-register operands between register operand
- // definitions. This helps to handle some special ARM instructions with
- // implicit operand increment (-mtriple=armv7):
- //
- // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
- // @ <MCOperand Reg:59>
- // @ <MCOperand Imm:0> (!!)
- // @ <MCOperand Reg:67>
- // @ <MCOperand Imm:0>
- // @ <MCOperand Imm:14>
- // @ <MCOperand Reg:0>>
- //
- // MCDesc reports:
- // 6 explicit operands.
- // 1 optional definition
- // 2 explicit definitions (!!)
- //
- // The presence of an 'Imm' operand between the two register definitions
- // breaks the assumption that "register definitions are always at the
- // beginning of the operand sequence".
- //
- // To workaround this issue, this algorithm ignores (i.e. skips) any
- // non-register operands between register definitions. The optional
- // definition is still at index #(NumOperands-1).
- //
- // According to assumption 2. register reads start at #(NumExplicitDefs-1).
- // That means, register R1 from the example is both read and written.
- unsigned NumExplicitDefs = MCDesc.getNumDefs();
- unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
- unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
- unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
- if (MCDesc.hasOptionalDef())
- TotalDefs++;
- unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
- ID.Writes.resize(TotalDefs + NumVariadicOps);
- // Iterate over the operands list, and skip non-register operands.
- // The first NumExplicitDefs register operands are expected to be register
- // definitions.
- unsigned CurrentDef = 0;
- unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
- unsigned i = 0;
- for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
- const MCOperand &Op = MCI.getOperand(i);
- if (!Op.isReg())
- continue;
- if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
- OptionalDefIdx = CurrentDef++;
- continue;
- }
- WriteDescriptor &Write = ID.Writes[CurrentDef];
- Write.OpIndex = i;
- if (CurrentDef < NumWriteLatencyEntries) {
- const MCWriteLatencyEntry &WLE =
- *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
- // Conservatively default to MaxLatency.
- Write.Latency =
- WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
- Write.SClassOrWriteResourceID = WLE.WriteResourceID;
- } else {
- // Assign a default latency for this write.
- Write.Latency = ID.MaxLatency;
- Write.SClassOrWriteResourceID = 0;
- }
- Write.IsOptionalDef = false;
- LLVM_DEBUG({
- dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
- << ", Latency=" << Write.Latency
- << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
- });
- CurrentDef++;
- }
- assert(CurrentDef == NumExplicitDefs &&
- "Expected more register operand definitions.");
- for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
- unsigned Index = NumExplicitDefs + CurrentDef;
- WriteDescriptor &Write = ID.Writes[Index];
- Write.OpIndex = ~CurrentDef;
- Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
- if (Index < NumWriteLatencyEntries) {
- const MCWriteLatencyEntry &WLE =
- *STI.getWriteLatencyEntry(&SCDesc, Index);
- // Conservatively default to MaxLatency.
- Write.Latency =
- WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
- Write.SClassOrWriteResourceID = WLE.WriteResourceID;
- } else {
- // Assign a default latency for this write.
- Write.Latency = ID.MaxLatency;
- Write.SClassOrWriteResourceID = 0;
- }
- Write.IsOptionalDef = false;
- assert(Write.RegisterID != 0 && "Expected a valid phys register!");
- LLVM_DEBUG({
- dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
- << ", PhysReg=" << MRI.getName(Write.RegisterID)
- << ", Latency=" << Write.Latency
- << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
- });
- }
- if (MCDesc.hasOptionalDef()) {
- WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
- Write.OpIndex = OptionalDefIdx;
- // Assign a default latency for this write.
- Write.Latency = ID.MaxLatency;
- Write.SClassOrWriteResourceID = 0;
- Write.IsOptionalDef = true;
- LLVM_DEBUG({
- dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
- << ", Latency=" << Write.Latency
- << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
- });
- }
- if (!NumVariadicOps)
- return;
- bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
- CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
- for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
- I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
- const MCOperand &Op = MCI.getOperand(OpIndex);
- if (!Op.isReg())
- continue;
- WriteDescriptor &Write = ID.Writes[CurrentDef];
- Write.OpIndex = OpIndex;
- // Assign a default latency for this write.
- Write.Latency = ID.MaxLatency;
- Write.SClassOrWriteResourceID = 0;
- Write.IsOptionalDef = false;
- ++CurrentDef;
- LLVM_DEBUG({
- dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
- << ", Latency=" << Write.Latency
- << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
- });
- }
- ID.Writes.resize(CurrentDef);
- }
- void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
- unsigned SchedClassID) {
- const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
- unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
- unsigned NumImplicitUses = MCDesc.implicit_uses().size();
- // Remove the optional definition.
- if (MCDesc.hasOptionalDef())
- --NumExplicitUses;
- unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
- unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
- ID.Reads.resize(TotalUses);
- unsigned CurrentUse = 0;
- for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
- ++I, ++OpIndex) {
- const MCOperand &Op = MCI.getOperand(OpIndex);
- if (!Op.isReg())
- continue;
- ReadDescriptor &Read = ID.Reads[CurrentUse];
- Read.OpIndex = OpIndex;
- Read.UseIndex = I;
- Read.SchedClassID = SchedClassID;
- ++CurrentUse;
- LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
- << ", UseIndex=" << Read.UseIndex << '\n');
- }
- // For the purpose of ReadAdvance, implicit uses come directly after explicit
- // uses. The "UseIndex" must be updated according to that implicit layout.
- for (unsigned I = 0; I < NumImplicitUses; ++I) {
- ReadDescriptor &Read = ID.Reads[CurrentUse + I];
- Read.OpIndex = ~I;
- Read.UseIndex = NumExplicitUses + I;
- Read.RegisterID = MCDesc.implicit_uses()[I];
- Read.SchedClassID = SchedClassID;
- LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
- << ", UseIndex=" << Read.UseIndex << ", RegisterID="
- << MRI.getName(Read.RegisterID) << '\n');
- }
- CurrentUse += NumImplicitUses;
- bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
- for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
- I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
- const MCOperand &Op = MCI.getOperand(OpIndex);
- if (!Op.isReg())
- continue;
- ReadDescriptor &Read = ID.Reads[CurrentUse];
- Read.OpIndex = OpIndex;
- Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
- Read.SchedClassID = SchedClassID;
- ++CurrentUse;
- LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
- << ", UseIndex=" << Read.UseIndex << '\n');
- }
- ID.Reads.resize(CurrentUse);
- }
- Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
- const MCInst &MCI) const {
- if (ID.NumMicroOps != 0)
- return ErrorSuccess();
- bool UsesBuffers = ID.UsedBuffers;
- bool UsesResources = !ID.Resources.empty();
- if (!UsesBuffers && !UsesResources)
- return ErrorSuccess();
- // FIXME: see PR44797. We should revisit these checks and possibly move them
- // in CodeGenSchedule.cpp.
- StringRef Message = "found an inconsistent instruction that decodes to zero "
- "opcodes and that consumes scheduler resources.";
- return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
- }
- Expected<const InstrDesc &>
- InstrBuilder::createInstrDescImpl(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) {
- assert(STI.getSchedModel().hasInstrSchedModel() &&
- "Itineraries are not yet supported!");
- // Obtain the instruction descriptor from the opcode.
- unsigned short Opcode = MCI.getOpcode();
- const MCInstrDesc &MCDesc = MCII.get(Opcode);
- const MCSchedModel &SM = STI.getSchedModel();
- // Then obtain the scheduling class information from the instruction.
- // Allow InstrumentManager to override and use a different SchedClassID
- unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
- bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
- // Try to solve variant scheduling classes.
- if (IsVariant) {
- unsigned CPUID = SM.getProcessorID();
- while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
- SchedClassID =
- STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
- if (!SchedClassID) {
- return make_error<InstructionError<MCInst>>(
- "unable to resolve scheduling class for write variant.", MCI);
- }
- }
- // Check if this instruction is supported. Otherwise, report an error.
- const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
- if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
- return make_error<InstructionError<MCInst>>(
- "found an unsupported instruction in the input assembly sequence.",
- MCI);
- }
- LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
- LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
- LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
- // Create a new empty descriptor.
- std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
- ID->NumMicroOps = SCDesc.NumMicroOps;
- ID->SchedClassID = SchedClassID;
- if (MCDesc.isCall() && FirstCallInst) {
- // We don't correctly model calls.
- WithColor::warning() << "found a call in the input assembly sequence.\n";
- WithColor::note() << "call instructions are not correctly modeled. "
- << "Assume a latency of 100cy.\n";
- FirstCallInst = false;
- }
- if (MCDesc.isReturn() && FirstReturnInst) {
- WithColor::warning() << "found a return instruction in the input"
- << " assembly sequence.\n";
- WithColor::note() << "program counter updates are ignored.\n";
- FirstReturnInst = false;
- }
- initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
- computeMaxLatency(*ID, MCDesc, SCDesc, STI);
- if (Error Err = verifyOperands(MCDesc, MCI))
- return std::move(Err);
- populateWrites(*ID, MCI, SchedClassID);
- populateReads(*ID, MCI, SchedClassID);
- LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
- LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
- // Validation check on the instruction descriptor.
- if (Error Err = verifyInstrDesc(*ID, MCI))
- return std::move(Err);
- // Now add the new descriptor.
- bool IsVariadic = MCDesc.isVariadic();
- if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
- auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
- Descriptors[DKey] = std::move(ID);
- return *Descriptors[DKey];
- }
- auto VDKey = std::make_pair(&MCI, SchedClassID);
- VariantDescriptors[VDKey] = std::move(ID);
- return *VariantDescriptors[VDKey];
- }
- Expected<const InstrDesc &>
- InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) {
- // Cache lookup using SchedClassID from Instrumentation
- unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
- auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
- if (Descriptors.find_as(DKey) != Descriptors.end())
- return *Descriptors[DKey];
- unsigned CPUID = STI.getSchedModel().getProcessorID();
- SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
- auto VDKey = std::make_pair(&MCI, SchedClassID);
- if (VariantDescriptors.find(VDKey) != VariantDescriptors.end())
- return *VariantDescriptors[VDKey];
- return createInstrDescImpl(MCI, IVec);
- }
- STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
- Expected<std::unique_ptr<Instruction>>
- InstrBuilder::createInstruction(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) {
- Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
- if (!DescOrErr)
- return DescOrErr.takeError();
- const InstrDesc &D = *DescOrErr;
- Instruction *NewIS = nullptr;
- std::unique_ptr<Instruction> CreatedIS;
- bool IsInstRecycled = false;
- if (!D.IsRecyclable)
- ++NumVariantInst;
- if (D.IsRecyclable && InstRecycleCB) {
- if (auto *I = InstRecycleCB(D)) {
- NewIS = I;
- NewIS->reset();
- IsInstRecycled = true;
- }
- }
- if (!IsInstRecycled) {
- CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
- NewIS = CreatedIS.get();
- }
- const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
- const MCSchedClassDesc &SCDesc =
- *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
- NewIS->setMayLoad(MCDesc.mayLoad());
- NewIS->setMayStore(MCDesc.mayStore());
- NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
- NewIS->setBeginGroup(SCDesc.BeginGroup);
- NewIS->setEndGroup(SCDesc.EndGroup);
- NewIS->setRetireOOO(SCDesc.RetireOOO);
- // Check if this is a dependency breaking instruction.
- APInt Mask;
- bool IsZeroIdiom = false;
- bool IsDepBreaking = false;
- if (MCIA) {
- unsigned ProcID = STI.getSchedModel().getProcessorID();
- IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
- IsDepBreaking =
- IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
- if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
- NewIS->setOptimizableMove();
- }
- // Initialize Reads first.
- MCPhysReg RegID = 0;
- size_t Idx = 0U;
- for (const ReadDescriptor &RD : D.Reads) {
- if (!RD.isImplicitRead()) {
- // explicit read.
- const MCOperand &Op = MCI.getOperand(RD.OpIndex);
- // Skip non-register operands.
- if (!Op.isReg())
- continue;
- RegID = Op.getReg();
- } else {
- // Implicit read.
- RegID = RD.RegisterID;
- }
- // Skip invalid register operands.
- if (!RegID)
- continue;
- // Okay, this is a register operand. Create a ReadState for it.
- ReadState *RS = nullptr;
- if (IsInstRecycled && Idx < NewIS->getUses().size()) {
- NewIS->getUses()[Idx] = ReadState(RD, RegID);
- RS = &NewIS->getUses()[Idx++];
- } else {
- NewIS->getUses().emplace_back(RD, RegID);
- RS = &NewIS->getUses().back();
- ++Idx;
- }
- if (IsDepBreaking) {
- // A mask of all zeroes means: explicit input operands are not
- // independent.
- if (Mask.isZero()) {
- if (!RD.isImplicitRead())
- RS->setIndependentFromDef();
- } else {
- // Check if this register operand is independent according to `Mask`.
- // Note that Mask may not have enough bits to describe all explicit and
- // implicit input operands. If this register operand doesn't have a
- // corresponding bit in Mask, then conservatively assume that it is
- // dependent.
- if (Mask.getBitWidth() > RD.UseIndex) {
- // Okay. This map describe register use `RD.UseIndex`.
- if (Mask[RD.UseIndex])
- RS->setIndependentFromDef();
- }
- }
- }
- }
- if (IsInstRecycled && Idx < NewIS->getUses().size())
- NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
- // Early exit if there are no writes.
- if (D.Writes.empty()) {
- if (IsInstRecycled)
- return llvm::make_error<RecycledInstErr>(NewIS);
- else
- return std::move(CreatedIS);
- }
- // Track register writes that implicitly clear the upper portion of the
- // underlying super-registers using an APInt.
- APInt WriteMask(D.Writes.size(), 0);
- // Now query the MCInstrAnalysis object to obtain information about which
- // register writes implicitly clear the upper portion of a super-register.
- if (MCIA)
- MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
- // Initialize writes.
- unsigned WriteIndex = 0;
- Idx = 0U;
- for (const WriteDescriptor &WD : D.Writes) {
- RegID = WD.isImplicitWrite() ? WD.RegisterID
- : MCI.getOperand(WD.OpIndex).getReg();
- // Check if this is a optional definition that references NoReg.
- if (WD.IsOptionalDef && !RegID) {
- ++WriteIndex;
- continue;
- }
- assert(RegID && "Expected a valid register ID!");
- if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
- NewIS->getDefs()[Idx++] =
- WriteState(WD, RegID,
- /* ClearsSuperRegs */ WriteMask[WriteIndex],
- /* WritesZero */ IsZeroIdiom);
- } else {
- NewIS->getDefs().emplace_back(WD, RegID,
- /* ClearsSuperRegs */ WriteMask[WriteIndex],
- /* WritesZero */ IsZeroIdiom);
- ++Idx;
- }
- ++WriteIndex;
- }
- if (IsInstRecycled && Idx < NewIS->getDefs().size())
- NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
- if (IsInstRecycled)
- return llvm::make_error<RecycledInstErr>(NewIS);
- else
- return std::move(CreatedIS);
- }
- } // namespace mca
- } // namespace llvm
|