1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471 |
- //=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines the machine model for AMD bdver2 (Piledriver) to support
- // instruction scheduling and other instruction cost heuristics.
- // Based on:
- // * AMD Software Optimization Guide for AMD Family 15h Processors.
- // https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf
- // * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
- // http://www.agner.org/optimize/microarchitecture.pdf
- // * https://www.realworldtech.com/bulldozer/
- // Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2.
- //
- //===----------------------------------------------------------------------===//
- def BdVer2Model : SchedMachineModel {
- let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired.
- let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed.
- let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
- let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency.
- let HighLatency = 25; // FIXME: any better choice?
- let MispredictPenalty = 20; // Minimum branch misdirection penalty.
- let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
- // FIXME: Incomplete. This flag is set to allow the scheduler to assign
- // a default model to unrecognized opcodes.
- let CompleteModel = 0;
- } // SchedMachineModel
- let SchedModel = BdVer2Model in {
- //===----------------------------------------------------------------------===//
- // Pipes
- //===----------------------------------------------------------------------===//
- // There are total of eight pipes.
- //===----------------------------------------------------------------------===//
- // Integer execution pipes
- //
- // Two EX (ALU) pipes.
- def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0
- def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1
- def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>;
- // Two AGLU pipes, identical.
- def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23]
- //===----------------------------------------------------------------------===//
- // Floating point execution pipes
- //
- // Four FPU pipes.
- def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0
- def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1
- def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2
- def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3
- // FPU grouping
- def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>;
- def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>;
- //===----------------------------------------------------------------------===//
- // RCU
- //===----------------------------------------------------------------------===//
- // The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle.
- // On the other hand, the RCU reorder buffer size for Piledriver does not
- // seem be specified in any trustworthy source.
- // But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had
- // RCU reorder buffer size of 128. So that is a good guess for now.
- def PdRCU : RetireControlUnit<128, 4>;
- //===----------------------------------------------------------------------===//
- // Pipelines
- //===----------------------------------------------------------------------===//
- // There are total of two pipelines, each one with it's own scheduler.
- //===----------------------------------------------------------------------===//
- // Integer Pipeline Scheduling
- //
- // There is one Integer Scheduler per core.
- // Integer physical register file has 96 registers of 64-bit.
- def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>;
- // Unified Integer, Memory Scheduler has 40 entries.
- def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> {
- // Up to 4 IPC can be decoded, issued, retired.
- let BufferSize = 40;
- }
- //===----------------------------------------------------------------------===//
- // FPU Pipeline Scheduling
- //
- // The FPU unit is shared between the two cores.
- // FP physical register file has 160 registers of 128-bit.
- // Operations on 256-bit data types are cracked into two COPs.
- def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
- // Unified FP Scheduler has 64 entries,
- def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> {
- // Up to 4 IPC can be decoded, issued, retired.
- let BufferSize = 64;
- }
- //===----------------------------------------------------------------------===//
- // Functional units
- //===----------------------------------------------------------------------===//
- //===----------------------------------------------------------------------===//
- // Load-Store Units
- //
- let Super = PdAGLU01 in
- def PdLoad : ProcResource<2> {
- // For Piledriver, the load queue is 40 entries deep.
- let BufferSize = 40;
- }
- def PdLoadQueue : LoadQueue<PdLoad>;
- let Super = PdAGLU01 in
- def PdStore : ProcResource<1> {
- // For Piledriver, the store queue is 24 entries deep.
- let BufferSize = 24;
- }
- def PdStoreQueue : StoreQueue<PdStore>;
- //===----------------------------------------------------------------------===//
- // Integer Execution Units
- //
- def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division
- def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT
- def PdMul : ProcResource<1>; // PdEX1; integer multiplication
- def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches
- //===----------------------------------------------------------------------===//
- // Floating-Point Units
- //
- // Two FMAC/FPFMA units.
- def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1
- // One 128-bit integer multiply-accumulate unit.
- def PdFPMMA : ProcResource<1>; // PdFPU0
- // One fp conversion unit.
- def PdFPCVT : ProcResource<1>; // PdFPU0
- // One unit for shuffles, packs, permutes, shifts.
- def PdFPXBR : ProcResource<1>; // PdFPU1
- // Two 128-bit packed integer units.
- def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3
- // One FP store unit.
- def PdFPSTO : ProcResource<1>; // PdFPU3
- //===----------------------------------------------------------------------===//
- // Basic helper classes.
- //===----------------------------------------------------------------------===//
- // Many SchedWrites are defined in pairs with and without a folded load.
- // Instructions with folded loads are usually micro-fused, so they only appear
- // as two micro-ops when dispatched by the schedulers.
- // This multiclass defines the resource usage for variants with and without
- // folded loads.
- multiclass PdWriteRes<SchedWrite SchedRW,
- list<ProcResourceKind> ExePorts, int Lat = 1,
- list<int> Res = [], int UOps = 1> {
- def : WriteRes<SchedRW, ExePorts> {
- let Latency = Lat;
- let ResourceCycles = Res;
- let NumMicroOps = UOps;
- }
- }
- multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
- list<ProcResourceKind> ExePorts, int Lat,
- list<int> Res, int UOps,
- int LoadLat, int LoadRes, int LoadUOps> {
- defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
- defm : PdWriteRes<SchedRW.Folded,
- !listconcat([PdLoad], ExePorts),
- !add(Lat, LoadLat),
- !if(!and(!empty(Res), !eq(LoadRes, 1)),
- [],
- !listconcat([LoadRes],
- !if(!empty(Res),
- !listsplat(1, !size(ExePorts)),
- Res))),
- !add(UOps, LoadUOps)>;
- }
- multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
- list<ProcResourceKind> ExePorts, int Lat = 1,
- list<int> Res = [], int UOps = 1,
- int LoadUOps = 0> {
- defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
- /*LoadLat*/4, /*LoadRes*/3, LoadUOps>;
- }
- multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
- list<ProcResourceKind> ExePorts, int Lat = 1,
- list<int> Res = [], int UOps = 1,
- int LoadUOps = 0> {
- defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
- /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
- }
- multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
- list<ProcResourceKind> ExePorts, int Lat,
- list<int> Res = [], int UOps = 2,
- int LoadUOps = 0> {
- defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
- /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
- }
- //===----------------------------------------------------------------------===//
- // Here be dragons.
- //===----------------------------------------------------------------------===//
- // L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers
- // needn't be available until 4 cycles after the memory operand.
- def : ReadAdvance<ReadAfterLd, 4>;
- // Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available
- // until 5 cycles after the memory operand.
- def : ReadAdvance<ReadAfterVecLd, 5>;
- def : ReadAdvance<ReadAfterVecXLd, 5>;
- def : ReadAdvance<ReadAfterVecYLd, 5>;
- // Transfer from int domain to ivec domain incurs additional latency of 8..10cy
- // Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller
- // and Excavator pipeline", "Data delay between different execution domains"
- def : ReadAdvance<ReadInt2Fpu, -10>;
- // A folded store needs a cycle on the PdStore for the store data.
- def : WriteRes<WriteRMW, [PdStore]>;
- ////////////////////////////////////////////////////////////////////////////////
- // Loads, stores, and moves, not folded with other operations.
- ////////////////////////////////////////////////////////////////////////////////
- def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; }
- def : WriteRes<WriteStore, [PdStore]>;
- def : WriteRes<WriteStoreNT, [PdStore]>;
- def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; }
- defm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>;
- // Load/store MXCSR.
- // FIXME: These are copy and pasted from WriteLoad/Store.
- def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
- def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; }
- // Treat misc copies as a move.
- def : InstRW<[WriteMove], (instrs COPY)>;
- ////////////////////////////////////////////////////////////////////////////////
- // Idioms that clear a register, like xorps %xmm0, %xmm0.
- // These can often bypass execution ports completely.
- ////////////////////////////////////////////////////////////////////////////////
- def : WriteRes<WriteZero, [/*No ExePorts*/]>;
- ////////////////////////////////////////////////////////////////////////////////
- // Branches don't produce values, so they have no latency, but they still
- // consume resources. Indirect branches can fold loads.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>;
- ////////////////////////////////////////////////////////////////////////////////
- // Special case scheduling classes.
- ////////////////////////////////////////////////////////////////////////////////
- def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; }
- def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; }
- def : WriteRes<WriteFence, [PdStore]>;
- def PdWriteXLAT : SchedWriteRes<[PdEX01]> {
- let Latency = 6;
- }
- def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
- def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
- let Latency = 184;
- let ResourceCycles = [375];
- let NumMicroOps = 45;
- }
- def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
- "LSL(16|32|64)rr")>;
- // Nops don't have dependencies, so there's no actual latency, but we set this
- // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
- def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; }
- ////////////////////////////////////////////////////////////////////////////////
- // Arithmetic.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>;
- def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> {
- let Latency = 6;
- let ResourceCycles = [3, 2, 1];
- let NumMicroOps = 1;
- }
- def : SchedAlias<WriteALURMW, PdWriteALURMW>;
- def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
- let Latency = 6;
- let ResourceCycles = [88];
- let NumMicroOps = 4;
- }
- def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
- def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
- let Latency = 2;
- let ResourceCycles = [2];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteBMI1],
- (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr,
- BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr,
- BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr,
- BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
- TZMSK32rr, TZMSK64rr)>;
- def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> {
- let Latency = 6;
- let ResourceCycles = [3, 3];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteBMI1m],
- (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm,
- BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm,
- BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm,
- BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm,
- TZMSK32rm, TZMSK64rm)>;
- defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>;
- def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> {
- let ResourceCycles = [3];
- }
- def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>;
- defm : PdWriteRes<WriteBSWAP32, [PdEX01]>;
- defm : PdWriteRes<WriteBSWAP64, [PdEX01]>;
- defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>;
- defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>;
- defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>;
- def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
- let Latency = 3;
- let ResourceCycles = [3];
- let NumMicroOps = 3;
- }
- def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
- def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
- let Latency = 3;
- let ResourceCycles = [23];
- let NumMicroOps = 5;
- }
- def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
- def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
- let Latency = 3;
- let ResourceCycles = [21];
- let NumMicroOps = 6;
- }
- def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
- (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
- def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
- let Latency = 3;
- let ResourceCycles = [26];
- let NumMicroOps = 18;
- }
- def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
- def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
- let Latency = 3;
- let ResourceCycles = [69];
- let NumMicroOps = 22;
- }
- def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
- def PdWriteXADD : SchedWriteRes<[PdEX1]> {
- let Latency = 1;
- let ResourceCycles = [1];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
- def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
- let Latency = 6;
- let ResourceCycles = [20];
- let NumMicroOps = 4;
- }
- def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
- defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>;
- defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>;
- defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>;
- defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>;
- defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>;
- defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>;
- defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
- defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
- defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
- defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
- // BMI2 MULX
- defm : X86WriteResUnsupported<WriteIMulH>;
- defm : X86WriteResUnsupported<WriteIMulHLd>;
- defm : X86WriteResPairUnsupported<WriteMULX32>;
- defm : X86WriteResPairUnsupported<WriteMULX64>;
- defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
- defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
- defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>;
- defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
- defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
- defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>;
- defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>;
- defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
- defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>;
- def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
- let Latency = 5;
- let ResourceCycles = [10];
- let NumMicroOps = 5;
- }
- def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
- def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
- let Latency = 6;
- let ResourceCycles = [12];
- let NumMicroOps = 7;
- }
- def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
- def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
- let Latency = 10;
- let ResourceCycles = [17];
- let NumMicroOps = 11;
- }
- def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
- defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move.
- def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> {
- let Latency = 5;
- let ResourceCycles = [3, 3];
- let NumMicroOps = 2;
- }
- def PdWriteCMOVmVar : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>,
- SchedVar<NoSchedPred, [WriteCMOV.Folded]>
- ]>;
- def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
- defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move.
- def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc.
- def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>;
- def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
- let ResourceCycles = [2];
- let NumMicroOps = 2;
- }
- def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
- SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
- SchedVar<NoSchedPred, [WriteSETCCStore]>
- ]>;
- def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>;
- defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>;
- def PdWriteLAHF : SchedWriteRes<[PdEX01]> {
- let Latency = 2;
- let ResourceCycles = [4];
- let NumMicroOps = 4;
- }
- def : InstRW<[PdWriteLAHF], (instrs LAHF)>;
- def PdWriteSAHF : SchedWriteRes<[PdEX01]> {
- let Latency = 2;
- let ResourceCycles = [2];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteSAHF], (instrs SAHF)>;
- defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>;
- defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>;
- defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>;
- defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>;
- defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>;
- defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
- def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> {
- let Latency = 7;
- let ResourceCycles = [42, 1];
- let NumMicroOps = 4;
- }
- def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>;
- def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
- let Latency = 7;
- let ResourceCycles = [44, 1];
- let NumMicroOps = 10;
- }
- def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
- // This is for simple LEAs with one or two input operands.
- def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; }
- // This write is used for slow LEA instructions.
- def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> {
- let Latency = 2;
- let ResourceCycles = [2];
- }
- // On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset),
- // or an LEA with a `Scale` value different than 1.
- def PdSlowLEAPredicate : MCSchedPredicate<
- CheckAny<[
- // A 3-operand LEA (base, index, offset).
- IsThreeOperandsLEAFn,
- // An LEA with a "Scale" different than 1.
- CheckAll<[
- CheckIsImmOperand<2>,
- CheckNot<CheckImmOperand<2, 1>>
- ]>
- ]>
- >;
- def PdWriteLEA : SchedWriteVariant<[
- SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>,
- SchedVar<NoSchedPred, [WriteLEA]>
- ]>;
- def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
- def PdWriteLEA16r : SchedWriteRes<[PdEX01]> {
- let ResourceCycles = [3];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>;
- // Bit counts.
- defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>;
- defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>;
- defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>;
- defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>;
- defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>;
- // BMI1 BEXTR, BMI2 BZHI
- defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>;
- defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>;
- defm : PdWriteResExPair<WriteBZHI, [PdEX01]>;
- def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> {
- let Latency = 2;
- let ResourceCycles = [4];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>;
- def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> {
- let Latency = 2;
- let ResourceCycles = [5];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>;
- ////////////////////////////////////////////////////////////////////////////////
- // Integer shifts and rotates.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>;
- defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>;
- defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>;
- defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
- def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 12;
- let ResourceCycles = [24];
- let NumMicroOps = 26;
- }
- def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
- def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
- let Latency = 12;
- let ResourceCycles = [23];
- let NumMicroOps = 23;
- }
- def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
- def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 11;
- let ResourceCycles = [22];
- let NumMicroOps = 24;
- }
- def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
- def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 10;
- let ResourceCycles = [20];
- let NumMicroOps = 22;
- }
- def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
- def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
- let Latency = 10;
- let ResourceCycles = [19];
- let NumMicroOps = 19;
- }
- def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
- def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 7;
- let ResourceCycles = [14];
- let NumMicroOps = 17;
- }
- def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>;
- def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 7;
- let ResourceCycles = [13];
- let NumMicroOps = 16;
- }
- def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>;
- def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
- let Latency = 7;
- let ResourceCycles = [14];
- let NumMicroOps = 15;
- }
- def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
- def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 9;
- let ResourceCycles = [18];
- let NumMicroOps = 20;
- }
- def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
- def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
- let Latency = 11;
- let ResourceCycles = [21];
- let NumMicroOps = 21;
- }
- def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
- def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
- let Latency = 8;
- let ResourceCycles = [15];
- let NumMicroOps = 16;
- }
- def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
- def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
- let Latency = 13;
- let ResourceCycles = [25];
- let NumMicroOps = 25;
- }
- def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
- // SHLD/SHRD.
- defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>;
- defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>;
- def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
- let Latency = 3;
- let ResourceCycles = [6];
- let NumMicroOps = 6;
- }
- def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
- def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
- let Latency = 3;
- let ResourceCycles = [6];
- let NumMicroOps = 7;
- }
- def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
- SHLD32rrCL,
- SHRD32rrCL)>;
- defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>;
- defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>;
- ////////////////////////////////////////////////////////////////////////////////
- // Floating point. This covers both scalar and vector operations.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>;
- defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>;
- defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>;
- defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
- defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
- defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>;
- defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>;
- defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>;
- defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>;
- defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>;
- defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>;
- def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> {
- let Latency = 2;
- let ResourceCycles = [1, 3, 1];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
- def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
- let NumMicroOps = 8;
- }
- def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>;
- defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
- defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
- defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
- defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
- defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
- defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
- defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
- defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
- defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
- defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
- defm : X86WriteResUnsupported<WriteFMoveZ>;
- defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
- defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>;
- defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>;
- defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteFAddZ>;
- def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
- let Latency = 5;
- let ResourceCycles = [3, 1, 10];
- }
- def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m,
- SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m,
- SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>;
- defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>;
- defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>;
- defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
- defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>;
- defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>;
- defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteFCmpZ>;
- defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>;
- defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>;
- defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
- defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
- defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
- def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
- let Latency = 6;
- }
- def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>;
- def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>;
- def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
- defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteFMulZ>;
- def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> {
- let Latency = 5;
- let ResourceCycles = [3, 1, 10];
- }
- def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>;
- defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteFMul64Z>;
- defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>;
- defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>;
- defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>;
- defm : X86WriteResPairUnsupported<WriteFMAZ>;
- defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>;
- defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>;
- defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>;
- defm : X86WriteResPairUnsupported<WriteDPPSZ>;
- def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
- let Latency = 27;
- let ResourceCycles = [1, 14];
- let NumMicroOps = 17;
- }
- def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
- defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
- defm : X86WriteResPairUnsupported<WriteFRcpZ>;
- defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>;
- defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>;
- defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>;
- defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
- defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>;
- defm : X86WriteResPairUnsupported<WriteFDivZ>;
- def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
- let Latency = 9;
- let ResourceCycles = [3, 1, 18];
- }
- def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
- DIVR_FI16m, DIVR_FI32m,
- DIV_F32m, DIV_F64m,
- DIVR_F32m, DIVR_F64m)>;
- defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>;
- defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
- defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>;
- defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
- defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>;
- defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>;
- defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
- defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>;
- defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>;
- defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>;
- defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>;
- defm : X86WriteResPairUnsupported<WriteFRndZ>;
- def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> {
- let Latency = 10;
- let ResourceCycles = [2, 1];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>;
- def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> {
- let Latency = 10;
- let ResourceCycles = [10, 1];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>;
- def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
- let Latency = 15;
- let ResourceCycles = [2, 1];
- let NumMicroOps = 3;
- }
- def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
- VFRCZSDrm, VFRCZSSrm)>;
- def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
- let Latency = 10;
- let ResourceCycles = [3, 1];
- let NumMicroOps = 4;
- }
- def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
- def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
- let Latency = 15;
- let ResourceCycles = [4, 1];
- let NumMicroOps = 8;
- }
- def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
- defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>;
- defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>;
- defm : X86WriteResPairUnsupported<WriteFLogicZ>;
- defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
- defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>;
- defm : X86WriteResPairUnsupported<WriteFTestZ>;
- defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>;
- defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>;
- defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
- def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
- let Latency = 7;
- let ResourceCycles = [1, 3];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
- defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>;
- defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>;
- defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
- defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>;
- defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>;
- defm : X86WriteResPairUnsupported<WriteFBlendZ>;
- defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>;
- defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>;
- defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
- defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>;
- defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
- def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
- let Latency = 2;
- let ResourceCycles = [1, 2];
- }
- def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
- def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
- let Latency = 7;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
- def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
- let Latency = 4;
- let ResourceCycles = [1, 6];
- let NumMicroOps = 8;
- }
- def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
- def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
- let Latency = 8; // 4 + 4
- let ResourceCycles = [1, 8];
- let NumMicroOps = 10;
- }
- def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
- ////////////////////////////////////////////////////////////////////////////////
- // Conversions.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
- defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>;
- defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
- defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
- defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
- defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
- defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
- defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
- def PdWriteMMX_CVTTPD2PIrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
- let Latency = 6;
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteMMX_CVTTPD2PIrr], (instrs MMX_CVTTPD2PIrr)>;
- // FIXME: f+3 ST, LD+STC latency
- defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
- // FIXME: .Folded version is one NumMicroOp *less*..
- defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>;
- defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
- defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
- defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
- // FIXME: .Folded version is one NumMicroOp *less*..
- def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
- let Latency = 13;
- let ResourceCycles = [1, 3, 1];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
- defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
- defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
- defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
- defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
- defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
- defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
- defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
- defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
- defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
- defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
- defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
- def PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
- let Latency = 6;
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr], (instrs MMX_CVTPD2PIrr,
- MMX_CVTPI2PDrr)>;
- def PdWriteMMX_CVTPI2PSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
- let Latency = 4;
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteMMX_CVTPI2PSrr], (instrs MMX_CVTPI2PSrr)>;
- defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>;
- defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>;
- defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
- defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>;
- defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
- defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
- defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>;
- defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>;
- defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
- ////////////////////////////////////////////////////////////////////////////////
- // Vector integer operations.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
- defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
- defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>;
- defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>;
- defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>;
- defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>;
- defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>;
- defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>;
- defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>;
- defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>;
- def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
- let NumMicroOps = 8;
- }
- def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
- defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
- defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
- defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
- defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
- defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
- defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
- defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
- defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
- defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
- defm : X86WriteResUnsupported<WriteVecMoveZ>;
- def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
- }
- def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>;
- def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
- let Latency = 4;
- }
- def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>;
- defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>;
- defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>;
- defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>;
- defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteVecALUY>;
- defm : X86WriteResPairUnsupported<WriteVecALUZ>;
- defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>;
- defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteVecShiftY>;
- defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
- defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
- defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
- defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>;
- defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>;
- defm : X86WriteResPairUnsupported<WriteVecIMulY>;
- defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
- defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>;
- defm : X86WriteResPairUnsupported<WritePMULLDY>;
- defm : X86WriteResPairUnsupported<WritePMULLDZ>;
- def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> {
- let Latency = 4;
- }
- def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
- VPMACSSDQLrr)>;
- defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>;
- defm : X86WriteResPairUnsupported<WriteMPSADY>;
- defm : X86WriteResPairUnsupported<WriteMPSADZ>;
- def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> {
- let Latency = 8;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 10;
- }
- def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>;
- defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
- defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
- defm : X86WriteResPairUnsupported<WritePSADBWY>;
- defm : X86WriteResPairUnsupported<WritePSADBWZ>;
- defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>;
- defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>;
- defm : X86WriteResPairUnsupported<WriteShuffleZ>;
- defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>;
- defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>;
- defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
- defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
- def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> {
- let Latency = 2;
- let ResourceCycles = [1, 3];
- }
- def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
- defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
- defm : X86WriteResPairUnsupported<WriteBlendY>;
- defm : X86WriteResPairUnsupported<WriteBlendZ>;
- defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteVarBlendY>;
- defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
- defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>;
- defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteVecLogicY>;
- defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
- defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
- defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>;
- defm : X86WriteResPairUnsupported<WriteVecTestZ>;
- defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>;
- defm : PdWriteResXMMPair<WriteVPMOV256, [PdFPU01, PdFPMAL]>;
- defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
- defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>;
- defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
- defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
- ////////////////////////////////////////////////////////////////////////////////
- // Vector insert/extract operations.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>;
- defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>;
- defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>;
- defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>;
- def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
- let Latency = 3;
- let ResourceCycles = [1, 3];
- }
- def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
- ////////////////////////////////////////////////////////////////////////////////
- // SSE42 String instructions.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>;
- defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>;
- defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>;
- defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>;
- ////////////////////////////////////////////////////////////////////////////////
- // MOVMSK Instructions.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
- defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
- defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
- // defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
- defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
- ////////////////////////////////////////////////////////////////////////////////
- // AES Instructions.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>;
- defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>;
- defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
- ////////////////////////////////////////////////////////////////////////////////
- // Horizontal add/sub instructions.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>;
- defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>;
- defm : X86WriteResPairUnsupported<WriteFHAddZ>;
- defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>;
- defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
- defm : X86WriteResPairUnsupported<WritePHAddY>;
- defm : X86WriteResPairUnsupported<WritePHAddZ>;
- def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr,
- PHADDWrr, PHSUBWrr,
- PHADDSWrr, PHSUBSWrr,
- VPHADDDrr, VPHSUBDrr,
- VPHADDWrr, VPHSUBWrr,
- VPHADDSWrr, VPHSUBSWrr)>;
- def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
- PHADDWrm, PHSUBWrm,
- PHADDSWrm, PHSUBSWrm,
- VPHADDDrm, VPHSUBDrm,
- VPHADDWrm, VPHSUBWrm,
- VPHADDSWrm, VPHSUBSWrm)>;
- ////////////////////////////////////////////////////////////////////////////////
- // Carry-less multiplication instructions.
- ////////////////////////////////////////////////////////////////////////////////
- defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>;
- def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
- let Latency = 12;
- let ResourceCycles = [1, 7];
- let NumMicroOps = 6;
- }
- def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
- ////////////////////////////////////////////////////////////////////////////////
- // SSE4A instructions.
- ////////////////////////////////////////////////////////////////////////////////
- def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
- let Latency = 3;
- let ResourceCycles = [1, 2];
- }
- def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>;
- def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> {
- let Latency = 3;
- let ResourceCycles = [1, 3];
- }
- def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
- ////////////////////////////////////////////////////////////////////////////////
- // AVX instructions.
- ////////////////////////////////////////////////////////////////////////////////
- def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
- let Latency = 6;
- let ResourceCycles = [1, 2, 4];
- let NumMicroOps = 2;
- }
- def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
- VBROADCASTSSYrm)>;
- def PdWriteVZEROALL : SchedWriteRes<[]> {
- let Latency = 90;
- let NumMicroOps = 32;
- }
- def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>;
- def PdWriteVZEROUPPER : SchedWriteRes<[]> {
- let Latency = 46;
- let NumMicroOps = 16;
- }
- def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>;
- ///////////////////////////////////////////////////////////////////////////////
- // SchedWriteVariant definitions.
- ///////////////////////////////////////////////////////////////////////////////
- def PdWriteZeroLatency : SchedWriteRes<[]> {
- let Latency = 0;
- }
- def PdWriteZeroIdiom : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
- ]>;
- def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
- XOR32rr, XOR64rr)>;
- def PdWriteFZeroIdiom : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
- ]>;
- def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
- XORPDrr, VXORPDrr,
- ANDNPSrr, VANDNPSrr,
- ANDNPDrr, VANDNPDrr)>;
- // VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1.
- def PdWriteVZeroIdiomLogic : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
- ]>;
- def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>;
- def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
- ]>;
- def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
- PANDNrr, VPANDNrr)>;
- def PdWriteVZeroIdiomALU : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
- ]>;
- def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBrr, MMX_PSUBDrr,
- MMX_PSUBQrr, MMX_PSUBWrr,
- MMX_PCMPGTBrr,
- MMX_PCMPGTDrr,
- MMX_PCMPGTWrr)>;
- def PdWriteVZeroIdiomALUX : SchedWriteVariant<[
- SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
- ]>;
- def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
- PSUBDrr, VPSUBDrr,
- PSUBQrr, VPSUBQrr,
- PSUBWrr, VPSUBWrr,
- PCMPGTBrr, VPCMPGTBrr,
- PCMPGTDrr, VPCMPGTDrr,
- PCMPGTWrr, VPCMPGTWrr)>;
- ///////////////////////////////////////////////////////////////////////////////
- // Dependency breaking instructions.
- ///////////////////////////////////////////////////////////////////////////////
- // VPCMPGTQ, but not PCMPGTQ!
- def : IsZeroIdiomFunction<[
- // GPR Zero-idioms.
- DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
- // MMX Zero-idioms.
- DepBreakingClass<[
- MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
- MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
- MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
- MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
- ], ZeroIdiomPredicate>,
- // SSE Zero-idioms.
- DepBreakingClass<[
- // fp variants.
- XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
- // int variants.
- PXORrr, PANDNrr,
- PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
- PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
- PCMPGTBrr, PCMPGTDrr, PCMPGTWrr
- ], ZeroIdiomPredicate>,
- // AVX Zero-idioms.
- DepBreakingClass<[
- // xmm fp variants.
- VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
- // xmm int variants.
- VPXORrr, VPANDNrr,
- VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
- VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
- VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
- // ymm variants.
- VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
- ], ZeroIdiomPredicate>
- ]>;
- def : IsDepBreakingFunction<[
- // GPR
- DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
- DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
- // MMX
- DepBreakingClass<[
- MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr
- ], ZeroIdiomPredicate>,
- // SSE
- DepBreakingClass<[
- PCMPEQBrr, PCMPEQWrr, PCMPEQDrr
- // But not PCMPEQQrr.
- ], ZeroIdiomPredicate>,
- // AVX
- DepBreakingClass<[
- VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr
- // But not VPCMPEQQrr.
- ], ZeroIdiomPredicate>
- ]>;
- } // SchedModel
|