12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439 |
- //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines the resources required by P9 instructions. This is part of
- // the P9 processor model used for instruction scheduling. This file should
- // contain all the instructions that may be used on Power 9. This is not
- // just instructions that are new on Power 9 but also instructions that were
- // available on earlier architectures and are still used in Power 9.
- //
- // The makeup of the P9 CPU is modeled as follows:
- // - Each CPU is made up of two superslices.
- // - Each superslice is made up of two slices. Therefore, there are 4 slices
- // for each CPU.
- // - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
- // - Each CPU has:
- // - One CY (Crypto) unit P9_CY_*
- // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
- // - Two PM (Permute) units. One on each superslice. P9_PM_*
- // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
- // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
- // - Four DP (Floating Point) units. One on each slice. P9_DP_*
- // This also includes fixed point multiply add.
- // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
- // - Four Load/Store Queues. P9_LS_*
- // - Each set of instructions will require a number of these resources.
- //===----------------------------------------------------------------------===//
- // Two cycle ALU vector operation that uses an entire superslice.
- // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
- // (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
- def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- (instregex "VADDU(B|H|W|D)M$"),
- (instregex "VAND(C)?$"),
- (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
- (instregex "V_SET0(B|H)?$"),
- (instregex "VS(R|L)(B|H|W|D)$"),
- (instregex "VSUBU(B|H|W|D)M$"),
- (instregex "VPOPCNT(B|H)$"),
- (instregex "VRL(B|H|W|D)$"),
- (instregex "VSRA(B|H|W|D)$"),
- (instregex "XV(N)?ABS(D|S)P$"),
- (instregex "XVCPSGN(D|S)P$"),
- (instregex "XV(I|X)EXP(D|S)P$"),
- (instregex "VRL(D|W)(MI|NM)$"),
- (instregex "VMRG(E|O)W$"),
- MTVSRDD,
- VEQV,
- VNAND,
- VNEGD,
- VNEGW,
- VNOR,
- VOR,
- VORC,
- VSEL,
- VXOR,
- XVNEGDP,
- XVNEGSP,
- XXLAND,
- XXLANDC,
- XXLEQV,
- XXLEQVOnes,
- XXLNAND,
- XXLNOR,
- XXLOR,
- XXLORf,
- XXLORC,
- XXLXOR,
- XXLXORdpz,
- XXLXORspz,
- XXLXORz,
- XXSEL,
- XSABSQP,
- XSCPSGNQP,
- XSIEXPQP,
- XSNABSQP,
- XSNEGQP,
- XSXEXPQP
- )>;
- // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
- // single slice. However, since it is Restricted, it requires all 3 dispatches
- // (DISP) for that superslice.
- def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "TABORT(D|W)C(I)?$"),
- (instregex "MTFSB(0|1)$"),
- (instregex "MFFSC(D)?RN(I)?$"),
- (instregex "CMPRB(8)?$"),
- (instregex "TD(I)?$"),
- (instregex "TW(I)?$"),
- (instregex "FCMP(O|U)(S|D)$"),
- (instregex "XSTSTDC(S|D)P$"),
- FTDIV,
- FTSQRT,
- CMPEQB
- )>;
- // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
- def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
- (instrs
- (instregex "XSMAX(C|J)?DP$"),
- (instregex "XSMIN(C|J)?DP$"),
- (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
- (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
- (instregex "POPCNT(D|W)$"),
- (instregex "CMPB(8)?$"),
- (instregex "SETB(8)?$"),
- XSTDIVDP,
- XSTSQRTDP,
- XSXSIGDP,
- XSCVSPDPN,
- BPERMD
- )>;
- // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
- def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
- (instrs
- (instregex "S(L|R)D$"),
- (instregex "SRAD(I)?$"),
- (instregex "EXTSWSLI_32_64$"),
- (instregex "MFV(S)?RD$"),
- (instregex "MTV(S)?RD$"),
- (instregex "MTV(S)?RW(A|Z)$"),
- (instregex "CMP(WI|LWI|W|LW)(8)?$"),
- (instregex "CMP(L)?D(I)?$"),
- (instregex "SUBF(I)?C(8)?(O)?$"),
- (instregex "ANDI(S)?(8)?(_rec)?$"),
- (instregex "ADDC(8)?(O)?$"),
- (instregex "ADDIC(8)?(_rec)?$"),
- (instregex "ADD(8|4)(O)?(_rec)?$"),
- (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
- (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
- (instregex "NEG(8)?(O)?(_rec)?$"),
- (instregex "POPCNTB$"),
- (instregex "POPCNTB8$"),
- (instregex "ADD(I|IS)?(8)?$"),
- (instregex "LI(S)?(8)?$"),
- (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
- (instregex "NAND(8)?(_rec)?$"),
- (instregex "AND(C)?(8)?(_rec)?$"),
- (instregex "NOR(8)?(_rec)?$"),
- (instregex "OR(C)?(8)?(_rec)?$"),
- (instregex "EQV(8)?(_rec)?$"),
- (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
- (instregex "ADD(4|8)(TLS)?(_)?$"),
- (instregex "NEG(8)?(O)?$"),
- (instregex "ADDI(S)?toc(HA|L)(8)?$"),
- (instregex "LA(8)?$"),
- COPY,
- MCRF,
- MCRXRX,
- XSNABSDP,
- XSXEXPDP,
- XSABSDP,
- XSNEGDP,
- XSCPSGNDP,
- MFVSRWZ,
- MFVRWZ,
- EXTSWSLI,
- SRADI_32,
- RLDIC,
- RFEBB,
- TBEGIN,
- TRECHKPT,
- NOP,
- WAIT
- )>;
- // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
- // single slice. However, since it is Restricted, it requires all 3 dispatches
- // (DISP) for that superslice.
- def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "RLDC(L|R)$"),
- (instregex "RLWIMI(8)?$"),
- (instregex "RLDIC(L|R)(_32)?(_64)?$"),
- (instregex "M(F|T)OCRF(8)?$"),
- (instregex "CR(6)?(UN)?SET$"),
- (instregex "CR(N)?(OR|AND)(C)?$"),
- (instregex "S(L|R)W(8)?$"),
- (instregex "RLW(INM|NM)(8)?$"),
- (instregex "F(N)?ABS(D|S)$"),
- (instregex "FNEG(D|S)$"),
- (instregex "FCPSGN(D|S)$"),
- (instregex "SRAW(I)?$"),
- (instregex "ISEL(8)?$"),
- RLDIMI,
- XSIEXPDP,
- FMR,
- CREQV,
- CRXOR,
- TRECLAIM,
- TSR,
- TABORT
- )>;
- // Three cycle ALU vector operation that uses an entire superslice.
- // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
- // (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
- def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- (instregex "M(T|F)VSCR$"),
- (instregex "VCMPNEZ(B|H|W)$"),
- (instregex "VCMPEQU(B|H|W|D)$"),
- (instregex "VCMPNE(B|H|W)$"),
- (instregex "VABSDU(B|H|W)$"),
- (instregex "VADDU(B|H|W)S$"),
- (instregex "VAVG(S|U)(B|H|W)$"),
- (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
- (instregex "VCMPBFP(_rec)?$"),
- (instregex "VC(L|T)Z(B|H|W|D)$"),
- (instregex "VADDS(B|H|W)S$"),
- (instregex "V(MIN|MAX)FP$"),
- (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
- VBPERMD,
- VADDCUW,
- VPOPCNTW,
- VPOPCNTD,
- VPRTYBD,
- VPRTYBW,
- VSHASIGMAD,
- VSHASIGMAW,
- VSUBSBS,
- VSUBSHS,
- VSUBSWS,
- VSUBUBS,
- VSUBUHS,
- VSUBUWS,
- VSUBCUW,
- VCMPGTSB,
- VCMPGTSB_rec,
- VCMPGTSD,
- VCMPGTSD_rec,
- VCMPGTSH,
- VCMPGTSH_rec,
- VCMPGTSW,
- VCMPGTSW_rec,
- VCMPGTUB,
- VCMPGTUB_rec,
- VCMPGTUD,
- VCMPGTUD_rec,
- VCMPGTUH,
- VCMPGTUH_rec,
- VCMPGTUW,
- VCMPGTUW_rec,
- VCMPNEB_rec,
- VCMPNEH_rec,
- VCMPNEW_rec,
- VCMPNEZB_rec,
- VCMPNEZH_rec,
- VCMPNEZW_rec,
- VCMPEQUB_rec,
- VCMPEQUD_rec,
- VCMPEQUH_rec,
- VCMPEQUW_rec,
- XVCMPEQDP,
- XVCMPEQDP_rec,
- XVCMPEQSP,
- XVCMPEQSP_rec,
- XVCMPGEDP,
- XVCMPGEDP_rec,
- XVCMPGESP,
- XVCMPGESP_rec,
- XVCMPGTDP,
- XVCMPGTDP_rec,
- XVCMPGTSP,
- XVCMPGTSP_rec,
- XVMAXDP,
- XVMAXSP,
- XVMINDP,
- XVMINSP,
- XVTDIVDP,
- XVTDIVSP,
- XVTSQRTDP,
- XVTSQRTSP,
- XVTSTDCDP,
- XVTSTDCSP,
- XVXSIGDP,
- XVXSIGSP
- )>;
- // 7 cycle DP vector operation that uses an entire superslice.
- // Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
- // EXECO) and all three dispatches (DISP) to the given superslice.
- def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- VADDFP,
- VCTSXS,
- VCTSXS_0,
- VCTUXS,
- VCTUXS_0,
- VEXPTEFP,
- VLOGEFP,
- VMADDFP,
- VMHADDSHS,
- VNMSUBFP,
- VREFP,
- VRFIM,
- VRFIN,
- VRFIP,
- VRFIZ,
- VRSQRTEFP,
- VSUBFP,
- XVADDDP,
- XVADDSP,
- XVCVDPSP,
- XVCVDPSXDS,
- XVCVDPSXWS,
- XVCVDPUXDS,
- XVCVDPUXWS,
- XVCVHPSP,
- XVCVSPDP,
- XVCVSPHP,
- XVCVSPSXDS,
- XVCVSPSXWS,
- XVCVSPUXDS,
- XVCVSPUXWS,
- XVCVSXDDP,
- XVCVSXDSP,
- XVCVSXWDP,
- XVCVSXWSP,
- XVCVUXDDP,
- XVCVUXDSP,
- XVCVUXWDP,
- XVCVUXWSP,
- XVMADDADP,
- XVMADDASP,
- XVMADDMDP,
- XVMADDMSP,
- XVMSUBADP,
- XVMSUBASP,
- XVMSUBMDP,
- XVMSUBMSP,
- XVMULDP,
- XVMULSP,
- XVNMADDADP,
- XVNMADDASP,
- XVNMADDMDP,
- XVNMADDMSP,
- XVNMSUBADP,
- XVNMSUBASP,
- XVNMSUBMDP,
- XVNMSUBMSP,
- XVRDPI,
- XVRDPIC,
- XVRDPIM,
- XVRDPIP,
- XVRDPIZ,
- XVREDP,
- XVRESP,
- XVRSPI,
- XVRSPIC,
- XVRSPIM,
- XVRSPIP,
- XVRSPIZ,
- XVRSQRTEDP,
- XVRSQRTESP,
- XVSUBDP,
- XVSUBSP,
- VCFSX,
- VCFSX_0,
- VCFUX,
- VCFUX_0,
- VMHRADDSHS,
- VMLADDUHM,
- VMSUMMBM,
- VMSUMSHM,
- VMSUMSHS,
- VMSUMUBM,
- VMSUMUHM,
- VMSUMUDM,
- VMSUMUHS,
- VMULESB,
- VMULESH,
- VMULESW,
- VMULEUB,
- VMULEUH,
- VMULEUW,
- VMULOSB,
- VMULOSH,
- VMULOSW,
- VMULOUB,
- VMULOUH,
- VMULOUW,
- VMULUWM,
- VSUM2SWS,
- VSUM4SBS,
- VSUM4SHS,
- VSUM4UBS,
- VSUMSWS
- )>;
- // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
- // dispatch units for the superslice.
- def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "MADD(HD|HDU|LD|LD8)$"),
- (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
- )>;
- // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
- // dispatch units for the superslice.
- def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- FRSP,
- (instregex "FRI(N|P|Z|M)(D|S)$"),
- (instregex "FRE(S)?$"),
- (instregex "FADD(S)?$"),
- (instregex "FMSUB(S)?$"),
- (instregex "FMADD(S)?$"),
- (instregex "FSUB(S)?$"),
- (instregex "FCFID(U)?(S)?$"),
- (instregex "FCTID(U)?(Z)?$"),
- (instregex "FCTIW(U)?(Z)?$"),
- (instregex "FRSQRTE(S)?$"),
- FNMADDS,
- FNMADD,
- FNMSUBS,
- FNMSUB,
- FSELD,
- FSELS,
- FMULS,
- FMUL,
- XSMADDADP,
- XSMADDASP,
- XSMADDMDP,
- XSMADDMSP,
- XSMSUBADP,
- XSMSUBASP,
- XSMSUBMDP,
- XSMSUBMSP,
- XSMULDP,
- XSMULSP,
- XSNMADDADP,
- XSNMADDASP,
- XSNMADDMDP,
- XSNMADDMSP,
- XSNMSUBADP,
- XSNMSUBASP,
- XSNMSUBMDP,
- XSNMSUBMSP
- )>;
- // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
- // These operations can be done in parallel. The DP is restricted so we need a
- // full 4 dispatches.
- def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "FSEL(D|S)_rec$")
- )>;
- // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
- def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
- )>;
- // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
- // These operations must be done sequentially.The DP is restricted so we need a
- // full 4 dispatches.
- def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
- (instregex "FRE(S)?_rec$"),
- (instregex "FADD(S)?_rec$"),
- (instregex "FSUB(S)?_rec$"),
- (instregex "F(N)?MSUB(S)?_rec$"),
- (instregex "F(N)?MADD(S)?_rec$"),
- (instregex "FCFID(U)?(S)?_rec$"),
- (instregex "FCTID(U)?(Z)?_rec$"),
- (instregex "FCTIW(U)?(Z)?_rec$"),
- (instregex "FMUL(S)?_rec$"),
- (instregex "FRSQRTE(S)?_rec$"),
- FRSP_rec
- )>;
- // 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
- def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
- (instrs
- XSADDDP,
- XSADDSP,
- XSCVDPHP,
- XSCVDPSP,
- XSCVDPSXDS,
- XSCVDPSXDSs,
- XSCVDPSXWS,
- XSCVDPUXDS,
- XSCVDPUXDSs,
- XSCVDPUXWS,
- XSCVDPSXWSs,
- XSCVDPUXWSs,
- XSCVHPDP,
- XSCVSPDP,
- XSCVSXDDP,
- XSCVSXDSP,
- XSCVUXDDP,
- XSCVUXDSP,
- XSRDPI,
- XSRDPIC,
- XSRDPIM,
- XSRDPIP,
- XSRDPIZ,
- XSREDP,
- XSRESP,
- XSRSQRTEDP,
- XSRSQRTESP,
- XSSUBDP,
- XSSUBSP,
- XSCVDPSPN,
- XSRSP
- )>;
- // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
- (instrs
- (instregex "LVS(L|R)$"),
- (instregex "VSPLTIS(W|H|B)$"),
- (instregex "VSPLT(W|H|B)(s)?$"),
- (instregex "V_SETALLONES(B|H)?$"),
- (instregex "VEXTRACTU(B|H|W)$"),
- (instregex "VINSERT(B|H|W|D)$"),
- MFVSRLD,
- MTVSRWS,
- VBPERMQ,
- VCLZLSBB,
- VCTZLSBB,
- VEXTRACTD,
- VEXTUBLX,
- VEXTUBRX,
- VEXTUHLX,
- VEXTUHRX,
- VEXTUWLX,
- VEXTUWRX,
- VGBBD,
- VMRGHB,
- VMRGHH,
- VMRGHW,
- VMRGLB,
- VMRGLH,
- VMRGLW,
- VPERM,
- VPERMR,
- VPERMXOR,
- VPKPX,
- VPKSDSS,
- VPKSDUS,
- VPKSHSS,
- VPKSHUS,
- VPKSWSS,
- VPKSWUS,
- VPKUDUM,
- VPKUDUS,
- VPKUHUM,
- VPKUHUS,
- VPKUWUM,
- VPKUWUS,
- VPRTYBQ,
- VSL,
- VSLDOI,
- VSLO,
- VSLV,
- VSR,
- VSRO,
- VSRV,
- VUPKHPX,
- VUPKHSB,
- VUPKHSH,
- VUPKHSW,
- VUPKLPX,
- VUPKLSB,
- VUPKLSH,
- VUPKLSW,
- XXBRD,
- XXBRH,
- XXBRQ,
- XXBRW,
- XXEXTRACTUW,
- XXINSERTW,
- XXMRGHW,
- XXMRGLW,
- XXPERM,
- XXPERMR,
- XXSLDWI,
- XXSLDWIs,
- XXSPLTIB,
- XXSPLTW,
- XXSPLTWs,
- XXPERMDI,
- XXPERMDIs,
- VADDCUQ,
- VADDECUQ,
- VADDEUQM,
- VADDUQM,
- VMUL10CUQ,
- VMUL10ECUQ,
- VMUL10EUQ,
- VMUL10UQ,
- VSUBCUQ,
- VSUBECUQ,
- VSUBEUQM,
- VSUBUQM,
- XSCMPEXPQP,
- XSCMPOQP,
- XSCMPUQP,
- XSTSTDCQP,
- XSXSIGQP,
- BCDCFN_rec,
- BCDCFZ_rec,
- BCDCPSGN_rec,
- BCDCTN_rec,
- BCDCTZ_rec,
- BCDSETSGN_rec,
- BCDS_rec,
- BCDTRUNC_rec,
- BCDUS_rec,
- BCDUTRUNC_rec,
- BCDADD_rec,
- BCDSUB_rec
- )>;
- // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- BCDSR_rec,
- XSADDQP,
- XSADDQPO,
- XSCVDPQP,
- XSCVQPDP,
- XSCVQPDPO,
- XSCVQPSDZ,
- XSCVQPSWZ,
- XSCVQPUDZ,
- XSCVQPUWZ,
- XSCVSDQP,
- XSCVUDQP,
- XSRQPI,
- XSRQPIX,
- XSRQPXP,
- XSSUBQP,
- XSSUBQPO
- )>;
- // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- BCDCTSQ_rec
- )>;
- // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- XSMADDQP,
- XSMADDQPO,
- XSMSUBQP,
- XSMSUBQPO,
- XSMULQP,
- XSMULQPO,
- XSNMADDQP,
- XSNMADDQPO,
- XSNMSUBQP,
- XSNMSUBQPO
- )>;
- // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- BCDCFSQ_rec
- )>;
- // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- XSDIVQP,
- XSDIVQPO
- )>;
- // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
- // dispatches.
- def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
- (instrs
- XSSQRTQP,
- XSSQRTQPO
- )>;
- // 6 Cycle Load uses a single slice.
- def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
- (instrs
- (instregex "LXVL(L)?")
- )>;
- // 5 Cycle Load uses a single slice.
- def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
- (instrs
- (instregex "LVE(B|H|W)X$"),
- (instregex "LVX(L)?"),
- (instregex "LXSI(B|H)ZX$"),
- LXSDX,
- LXVB16X,
- LXVD2X,
- LXVWSX,
- LXSIWZX,
- LXV,
- LXVX,
- LXSD,
- DFLOADf64,
- XFLOADf64,
- LIWZX
- )>;
- // 4 Cycle Load uses a single slice.
- def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
- (instrs
- (instregex "DCB(F|T|ST)(EP)?$"),
- (instregex "DCBZ(L)?(EP)?$"),
- (instregex "DCBTST(EP)?$"),
- (instregex "CP_COPY(8)?$"),
- (instregex "ICBI(EP)?$"),
- (instregex "ICBT(LS)?$"),
- (instregex "LBARX(L)?$"),
- (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
- (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
- (instregex "LH(A|B)RX(L)?(8)?$"),
- (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
- (instregex "LWARX(L)?$"),
- (instregex "LWBRX(8)?$"),
- (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
- CP_ABORT,
- DARN,
- EnforceIEIO,
- ISYNC,
- MSGSYNC,
- TLBSYNC,
- SYNC,
- LMW,
- LSWI
- )>;
- // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
- // superslice.
- def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
- (instrs
- LFIWZX,
- LFDX,
- LFD
- )>;
- // Cracked Load Instructions.
- // Load instructions that can be done in parallel.
- def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_PAIR_1C],
- (instrs
- SLBIA,
- SLBIE,
- SLBMFEE,
- SLBMFEV,
- SLBMTE,
- TLBIEL
- )>;
- // Cracked Load Instruction.
- // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
- // operations can be run in parallel.
- def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_PAIR_1C, DISP_PAIR_1C],
- (instrs
- (instregex "L(W|H)ZU(X)?(8)?$")
- )>;
- // Cracked TEND Instruction.
- // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
- // operations can be run in parallel.
- def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C],
- (instrs
- TEND
- )>;
- // Cracked Store Instruction
- // Consecutive Store and ALU instructions. The store is restricted and requires
- // three dispatches.
- def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "ST(B|H|W|D)CX$")
- )>;
- // Cracked Load Instruction.
- // Two consecutive load operations for a total of 8 cycles.
- def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C],
- (instrs
- LDMX
- )>;
- // Cracked Load instruction.
- // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
- // operations cannot be done at the same time and so their latencies are added.
- def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C],
- (instrs
- (instregex "LHA(X)?(8)?$"),
- (instregex "CP_PASTE(8)?_rec$"),
- (instregex "LWA(X)?(_32)?$"),
- TCHECK
- )>;
- // Cracked Restricted Load instruction.
- // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
- // operations cannot be done at the same time and so their latencies are added.
- // Full 6 dispatches are required as this is both cracked and restricted.
- def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C],
- (instrs
- LFIWAX
- )>;
- // Cracked Load instruction.
- // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
- // operations cannot be done at the same time and so their latencies are added.
- // Full 4 dispatches are required as this is a cracked instruction.
- def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
- (instrs
- LXSIWAX,
- LIWAX
- )>;
- // Cracked Load instruction.
- // Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
- // cycles. The Load and ALU operations cannot be done at the same time and so
- // their latencies are added.
- // Full 6 dispatches are required as this is a restricted instruction.
- def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C],
- (instrs
- LFSX,
- LFS
- )>;
- // Cracked Load instruction.
- // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
- // operations cannot be done at the same time and so their latencies are added.
- // Full 4 dispatches are required as this is a cracked instruction.
- def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
- (instrs
- LXSSP,
- LXSSPX,
- XFLOADf32,
- DFLOADf32
- )>;
- // Cracked 3-Way Load Instruction
- // Load with two ALU operations that depend on each other
- def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
- (instrs
- (instregex "LHAU(X)?(8)?$"),
- LWAUX
- )>;
- // Cracked Load that requires the PM resource.
- // Since the Load and the PM cannot be done at the same time the latencies are
- // added. Requires 8 cycles. Since the PM requires the full superslice we need
- // both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
- // requires the remaining 1 dispatch.
- def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C],
- (instrs
- LXVH8X,
- LXVDSX,
- LXVW4X
- )>;
- // Single slice Restricted store operation. The restricted operation requires
- // all three dispatches for the superslice.
- def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "STF(S|D|IWX|SX|DX)$"),
- (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
- (instregex "STW(8)?$"),
- (instregex "(D|X)FSTORE(f32|f64)$"),
- (instregex "ST(W|H|D)BRX$"),
- (instregex "ST(B|H|D)(8)?$"),
- (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
- STIWX,
- SLBIEG,
- STMW,
- STSWI,
- TLBIE
- )>;
- // Vector Store Instruction
- // Requires the whole superslice and therefore requires one dispatch
- // as well as both the Even and Odd exec pipelines.
- def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
- (instrs
- (instregex "STVE(B|H|W)X$"),
- (instregex "STVX(L)?$"),
- (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
- )>;
- // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and two
- // dispatches.
- def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
- (instrs
- (instregex "MTCTR(8)?(loop)?$"),
- (instregex "MTLR(8)?$")
- )>;
- // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and two
- // dispatches.
- def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
- (instrs
- (instregex "M(T|F)VRSAVE(v)?$"),
- (instregex "M(T|F)PMR$"),
- (instregex "M(T|F)TB(8)?$"),
- (instregex "MF(SPR|CTR|LR)(8)?$"),
- (instregex "M(T|F)MSR(D)?$"),
- (instregex "MTSPR(8)?$")
- )>;
- // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and two
- // dispatches.
- def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
- (instrs
- DIVW,
- DIVWO,
- DIVWU,
- DIVWUO,
- MODSW
- )>;
- // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and two
- // dispatches.
- def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
- (instrs
- DIVWE,
- DIVWEO,
- DIVD,
- DIVDO,
- DIVWEU,
- DIVWEUO,
- DIVDU,
- DIVDUO,
- MODSD,
- MODUD,
- MODUW
- )>;
- // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
- // dispatches.
- def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
- (instrs
- DIVDE,
- DIVDEO,
- DIVDEU,
- DIVDEUO
- )>;
- // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
- // and one full superslice for the DIV operation since there is only one DIV per
- // superslice. Latency of DIV plus ALU is 26.
- def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_EVEN_1C, DISP_1C],
- (instrs
- (instregex "DIVW(U)?(O)?_rec$")
- )>;
- // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
- // and one full superslice for the DIV operation since there is only one DIV per
- // superslice. Latency of DIV plus ALU is 26.
- def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_EVEN_1C, DISP_1C],
- (instrs
- DIVD_rec,
- DIVDO_rec,
- DIVDU_rec,
- DIVDUO_rec,
- DIVWE_rec,
- DIVWEO_rec,
- DIVWEU_rec,
- DIVWEUO_rec
- )>;
- // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
- // and one full superslice for the DIV operation since there is only one DIV per
- // superslice. Latency of DIV plus ALU is 42.
- def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_EVEN_1C, DISP_1C],
- (instrs
- DIVDE_rec,
- DIVDEO_rec,
- DIVDEU_rec,
- DIVDEUO_rec
- )>;
- // CR access instructions in _BrMCR, IIC_BrMCRX.
- // Cracked, restricted, ALU operations.
- // Here the two ALU ops can actually be done in parallel and therefore the
- // latencies are not added together. Otherwise this is like having two
- // instructions running together on two pipelines and 6 dispatches. ALU ops are
- // 2 cycles each.
- def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C],
- (instrs
- MTCRF,
- MTCRF8
- )>;
- // Cracked ALU operations.
- // Here the two ALU ops can actually be done in parallel and therefore the
- // latencies are not added together. Otherwise this is like having two
- // instructions running together on two pipelines and 2 dispatches. ALU ops are
- // 2 cycles each.
- def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C],
- (instrs
- (instregex "ADDC(8)?(O)?_rec$"),
- (instregex "SUBFC(8)?(O)?_rec$")
- )>;
- // Cracked ALU operations.
- // Two ALU ops can be done in parallel.
- // One is three cycle ALU the ohter is a two cycle ALU.
- // One of the ALU ops is restricted the other is not so we have a total of
- // 5 dispatches.
- def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "F(N)?ABS(D|S)_rec$"),
- (instregex "FCPSGN(D|S)_rec$"),
- (instregex "FNEG(D|S)_rec$"),
- FMR_rec
- )>;
- // Cracked ALU operations.
- // Here the two ALU ops can actually be done in parallel and therefore the
- // latencies are not added together. Otherwise this is like having two
- // instructions running together on two pipelines and 2 dispatches.
- // ALU ops are 3 cycles each.
- def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C],
- (instrs
- MCRFS
- )>;
- // Cracked Restricted ALU operations.
- // Here the two ALU ops can actually be done in parallel and therefore the
- // latencies are not added together. Otherwise this is like having two
- // instructions running together on two pipelines and 6 dispatches.
- // ALU ops are 3 cycles each.
- def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "MTFSF(b|_rec)?$"),
- (instregex "MTFSFI(_rec)?$"),
- MTFSFIb
- )>;
- // Cracked instruction made of two ALU ops.
- // The two ops cannot be done in parallel.
- // One of the ALU ops is restricted and takes 3 dispatches.
- def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "RLD(I)?C(R|L)_rec$"),
- (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
- (instregex "SLW(8)?_rec$"),
- (instregex "SRAW(I)?_rec$"),
- (instregex "SRW(8)?_rec$"),
- RLDICL_32_rec,
- RLDIMI_rec
- )>;
- // Cracked instruction made of two ALU ops.
- // The two ops cannot be done in parallel.
- // Both of the ALU ops are restricted and take 3 dispatches.
- def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "MFFS(L|CE|_rec)?$")
- )>;
- // Cracked ALU instruction composed of three consecutive 2 cycle loads for a
- // total of 6 cycles. All of the ALU operations are also restricted so each
- // takes 3 dispatches for a total of 9.
- def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
- (instrs
- (instregex "MFCR(8)?$")
- )>;
- // Cracked instruction made of two ALU ops.
- // The two ops cannot be done in parallel.
- def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
- (instrs
- (instregex "EXTSWSLI_32_64_rec$"),
- (instregex "SRAD(I)?_rec$"),
- EXTSWSLI_rec,
- SLD_rec,
- SRD_rec,
- RLDIC_rec
- )>;
- // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
- def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- FDIV
- )>;
- // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
- def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- FDIV_rec
- )>;
- // 36 Cycle DP Instruction.
- // Instruction can be done on a single slice.
- def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
- (instrs
- XSSQRTDP
- )>;
- // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
- def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- FSQRT
- )>;
- // 36 Cycle DP Vector Instruction.
- def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C],
- (instrs
- XVSQRTDP
- )>;
- // 27 Cycle DP Vector Instruction.
- def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C],
- (instrs
- XVSQRTSP
- )>;
- // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
- def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- FSQRT_rec
- )>;
- // 26 Cycle DP Instruction.
- def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
- (instrs
- XSSQRTSP
- )>;
- // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
- def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- FSQRTS
- )>;
- // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
- def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- FSQRTS_rec
- )>;
- // 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
- def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
- (instrs
- XSDIVDP
- )>;
- // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
- def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
- (instrs
- FDIVS
- )>;
- // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
- def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- FDIVS_rec
- )>;
- // 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
- def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
- (instrs
- XSDIVSP
- )>;
- // 24 Cycle DP Vector Instruction. Takes one full superslice.
- // Includes both EXECE, EXECO pipelines and 1 dispatch for the given
- // superslice.
- def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C],
- (instrs
- XVDIVSP
- )>;
- // 33 Cycle DP Vector Instruction. Takes one full superslice.
- // Includes both EXECE, EXECO pipelines and 1 dispatch for the given
- // superslice.
- def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C],
- (instrs
- XVDIVDP
- )>;
- // Instruction cracked into three pieces. One Load and two ALU operations.
- // The Load and one of the ALU ops cannot be run at the same time and so the
- // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
- // Both the load and the ALU that depends on it are restricted and so they take
- // a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
- // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
- def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
- IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "LF(SU|SUX)$")
- )>;
- // Cracked instruction made up of a Store and an ALU. The ALU does not depend on
- // the store and so it can be run at the same time as the store. The store is
- // also restricted.
- def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "STF(S|D)U(X)?$"),
- (instregex "ST(B|H|W|D)U(X)?(8)?$")
- )>;
- // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
- // the load and so it can be run at the same time as the load.
- def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_PAIR_1C, DISP_PAIR_1C],
- (instrs
- (instregex "LBZU(X)?(8)?$"),
- (instregex "LDU(X)?$")
- )>;
- // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
- // the load and so it can be run at the same time as the load. The load is also
- // restricted. 3 dispatches are from the restricted load while the other two
- // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
- // is required for the ALU.
- def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "LF(DU|DUX)$")
- )>;
- // Crypto Instructions
- // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
- // superslice. That includes both exec pipelines (EXECO, EXECE) and one
- // dispatch.
- def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
- (instrs
- (instregex "VPMSUM(B|H|W|D)$"),
- (instregex "V(N)?CIPHER(LAST)?$"),
- VSBOX
- )>;
- // Branch Instructions
- // Two Cycle Branch
- def : InstRW<[P9_BR_2C, DISP_BR_1C],
- (instrs
- (instregex "BCCCTR(L)?(8)?$"),
- (instregex "BCCL(A|R|RL)?$"),
- (instregex "BCCTR(L)?(8)?(n)?$"),
- (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
- (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
- (instregex "BL(_TLS|_NOP)?(_RM)?$"),
- (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
- (instregex "BLA(8|8_NOP)?(_RM)?$"),
- (instregex "BLR(8|L)?$"),
- (instregex "TAILB(A)?(8)?$"),
- (instregex "TAILBCTR(8)?$"),
- (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
- (instregex "BCLR(L)?(n)?$"),
- (instregex "BCTR(L)?(8)?(_RM)?$"),
- B,
- BA,
- BC,
- BCC,
- BCCA,
- BCL,
- BCLalways,
- BCLn,
- BCTRL8_LDinto_toc,
- BCTRL_LWZinto_toc,
- BCTRL8_LDinto_toc_RM,
- BCTRL_LWZinto_toc_RM,
- BCn,
- CTRL_DEP
- )>;
- // Five Cycle Branch with a 2 Cycle ALU Op
- // Operations must be done consecutively and not in parallel.
- def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
- (instrs
- ADDPCIS
- )>;
- // Special Extracted Instructions For Atomics
- // Atomic Load
- def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
- IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
- IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
- DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
- (instrs
- (instregex "L(D|W)AT$")
- )>;
- // Atomic Store
- def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
- IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
- (instrs
- (instregex "ST(D|W)AT$")
- )>;
- // Signal Processing Engine (SPE) Instructions
- // These instructions are not supported on Power 9
- def : InstRW<[],
- (instrs
- BRINC,
- EVABS,
- EVEQV,
- EVMRA,
- EVNAND,
- EVNEG,
- (instregex "EVADD(I)?W$"),
- (instregex "EVADD(SM|SS|UM|US)IAAW$"),
- (instregex "EVAND(C)?$"),
- (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
- (instregex "EVCNTL(S|Z)W$"),
- (instregex "EVDIVW(S|U)$"),
- (instregex "EVEXTS(B|H)$"),
- (instregex "EVLD(H|W|D)(X)?$"),
- (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
- (instregex "EVLWHE(X)?$"),
- (instregex "EVLWHO(S|U)(X)?$"),
- (instregex "EVLW(H|W)SPLAT(X)?$"),
- (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
- (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
- (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
- (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
- (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
- (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
- (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
- (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
- (instregex "EVMWHUMI(A)?$"),
- (instregex "EVMWLS(M|S)IA(A|N)W$"),
- (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
- (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
- (instregex "EVMWSSF(A|AA|AN)?$"),
- (instregex "EVMWUMI(A|AA|AN)?$"),
- (instregex "EV(N|X)?OR(C)?$"),
- (instregex "EVR(LW|LWI|NDW)$"),
- (instregex "EVSLW(I)?$"),
- (instregex "EVSPLAT(F)?I$"),
- (instregex "EVSRW(I)?(S|U)$"),
- (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
- (instregex "EVSUBF(S|U)(M|S)IAAW$"),
- (instregex "EVSUB(I)?FW$")
- )> { let Unsupported = 1; }
- // General Instructions without scheduling support.
- def : InstRW<[],
- (instrs
- (instregex "(H)?RFI(D)?$"),
- (instregex "DSS(ALL)?$"),
- (instregex "DST(ST)?(T)?(64)?$"),
- (instregex "ICBL(C|Q)$"),
- (instregex "L(W|H|B)EPX$"),
- (instregex "ST(W|H|B)EPX$"),
- (instregex "(L|ST)FDEPX$"),
- (instregex "M(T|F)SR(IN)?$"),
- (instregex "M(T|F)DCR$"),
- (instregex "NOP_GT_PWR(6|7)$"),
- (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
- (instregex "WRTEE(I)?$"),
- (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
- ATTN,
- CLRBHRB,
- MFBHRBE,
- MBAR,
- MSYNC,
- SLBSYNC,
- SLBFEE_rec,
- NAP,
- STOP,
- TRAP,
- RFCI,
- RFDI,
- RFMCI,
- SC,
- DCBA,
- DCBI,
- DCCCI,
- ICCCI,
- ADDEX,
- ADDEX8
- )> { let Unsupported = 1; }
|