P9InstrResources.td 38 KB


  1. //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the resources required by P9 instructions. This is part of
  10. // the P9 processor model used for instruction scheduling. This file should
  11. // contain all the instructions that may be used on Power 9. This is not
  12. // just instructions that are new on Power 9 but also instructions that were
  13. // available on earlier architectures and are still used in Power 9.
  14. //
  15. // The makeup of the P9 CPU is modeled as follows:
  16. // - Each CPU is made up of two superslices.
  17. // - Each superslice is made up of two slices. Therefore, there are 4 slices
  18. // for each CPU.
  19. // - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
  20. // - Each CPU has:
  21. // - One CY (Crypto) unit P9_CY_*
  22. // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
  23. // - Two PM (Permute) units. One on each superslice. P9_PM_*
  24. // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
  25. // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
  26. // - Four DP (Floating Point) units. One on each slice. P9_DP_*
  27. // This also includes fixed point multiply add.
  28. // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
  29. // - Four Load/Store Queues. P9_LS_*
  30. // - Each set of instructions will require a number of these resources.
  31. //===----------------------------------------------------------------------===//
  32. // Two cycle ALU vector operation that uses an entire superslice.
  33. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
  34. // (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
  35. def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  36. (instrs
  37. (instregex "VADDU(B|H|W|D)M$"),
  38. (instregex "VAND(C)?$"),
  39. (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
  40. (instregex "V_SET0(B|H)?$"),
  41. (instregex "VS(R|L)(B|H|W|D)$"),
  42. (instregex "VSUBU(B|H|W|D)M$"),
  43. (instregex "VPOPCNT(B|H)$"),
  44. (instregex "VRL(B|H|W|D)$"),
  45. (instregex "VSRA(B|H|W|D)$"),
  46. (instregex "XV(N)?ABS(D|S)P$"),
  47. (instregex "XVCPSGN(D|S)P$"),
  48. (instregex "XV(I|X)EXP(D|S)P$"),
  49. (instregex "VRL(D|W)(MI|NM)$"),
  50. (instregex "VMRG(E|O)W$"),
  51. MTVSRDD,
  52. VEQV,
  53. VNAND,
  54. VNEGD,
  55. VNEGW,
  56. VNOR,
  57. VOR,
  58. VORC,
  59. VSEL,
  60. VXOR,
  61. XVNEGDP,
  62. XVNEGSP,
  63. XXLAND,
  64. XXLANDC,
  65. XXLEQV,
  66. XXLEQVOnes,
  67. XXLNAND,
  68. XXLNOR,
  69. XXLOR,
  70. XXLORf,
  71. XXLORC,
  72. XXLXOR,
  73. XXLXORdpz,
  74. XXLXORspz,
  75. XXLXORz,
  76. XXSEL,
  77. XSABSQP,
  78. XSCPSGNQP,
  79. XSIEXPQP,
  80. XSNABSQP,
  81. XSNEGQP,
  82. XSXEXPQP
  83. )>;
  84. // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
  85. // single slice. However, since it is Restricted, it requires all 3 dispatches
  86. // (DISP) for that superslice.
  87. def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
  88. (instrs
  89. (instregex "TABORT(D|W)C(I)?$"),
  90. (instregex "MTFSB(0|1)$"),
  91. (instregex "MFFSC(D)?RN(I)?$"),
  92. (instregex "CMPRB(8)?$"),
  93. (instregex "TD(I)?$"),
  94. (instregex "TW(I)?$"),
  95. (instregex "FCMP(O|U)(S|D)$"),
  96. (instregex "XSTSTDC(S|D)P$"),
  97. FTDIV,
  98. FTSQRT,
  99. CMPEQB
  100. )>;
  101. // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
  102. def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
  103. (instrs
  104. (instregex "XSMAX(C|J)?DP$"),
  105. (instregex "XSMIN(C|J)?DP$"),
  106. (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
  107. (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
  108. (instregex "POPCNT(D|W)$"),
  109. (instregex "CMPB(8)?$"),
  110. (instregex "SETB(8)?$"),
  111. XSTDIVDP,
  112. XSTSQRTDP,
  113. XSXSIGDP,
  114. XSCVSPDPN,
  115. BPERMD
  116. )>;
  117. // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
  118. def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
  119. (instrs
  120. (instregex "S(L|R)D$"),
  121. (instregex "SRAD(I)?$"),
  122. (instregex "EXTSWSLI_32_64$"),
  123. (instregex "MFV(S)?RD$"),
  124. (instregex "MTV(S)?RD$"),
  125. (instregex "MTV(S)?RW(A|Z)$"),
  126. (instregex "CMP(WI|LWI|W|LW)(8)?$"),
  127. (instregex "CMP(L)?D(I)?$"),
  128. (instregex "SUBF(I)?C(8)?(O)?$"),
  129. (instregex "ANDI(S)?(8)?(_rec)?$"),
  130. (instregex "ADDC(8)?(O)?$"),
  131. (instregex "ADDIC(8)?(_rec)?$"),
  132. (instregex "ADD(8|4)(O)?(_rec)?$"),
  133. (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
  134. (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
  135. (instregex "NEG(8)?(O)?(_rec)?$"),
  136. (instregex "POPCNTB$"),
  137. (instregex "POPCNTB8$"),
  138. (instregex "ADD(I|IS)?(8)?$"),
  139. (instregex "LI(S)?(8)?$"),
  140. (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
  141. (instregex "NAND(8)?(_rec)?$"),
  142. (instregex "AND(C)?(8)?(_rec)?$"),
  143. (instregex "NOR(8)?(_rec)?$"),
  144. (instregex "OR(C)?(8)?(_rec)?$"),
  145. (instregex "EQV(8)?(_rec)?$"),
  146. (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
  147. (instregex "ADD(4|8)(TLS)?(_)?$"),
  148. (instregex "NEG(8)?(O)?$"),
  149. (instregex "ADDI(S)?toc(HA|L)(8)?$"),
  150. (instregex "LA(8)?$"),
  151. COPY,
  152. MCRF,
  153. MCRXRX,
  154. XSNABSDP,
  155. XSXEXPDP,
  156. XSABSDP,
  157. XSNEGDP,
  158. XSCPSGNDP,
  159. MFVSRWZ,
  160. MFVRWZ,
  161. EXTSWSLI,
  162. SRADI_32,
  163. RLDIC,
  164. RFEBB,
  165. TBEGIN,
  166. TRECHKPT,
  167. NOP,
  168. WAIT
  169. )>;
  170. // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
  171. // single slice. However, since it is Restricted, it requires all 3 dispatches
  172. // (DISP) for that superslice.
  173. def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
  174. (instrs
  175. (instregex "RLDC(L|R)$"),
  176. (instregex "RLWIMI(8)?$"),
  177. (instregex "RLDIC(L|R)(_32)?(_64)?$"),
  178. (instregex "M(F|T)OCRF(8)?$"),
  179. (instregex "CR(6)?(UN)?SET$"),
  180. (instregex "CR(N)?(OR|AND)(C)?$"),
  181. (instregex "S(L|R)W(8)?$"),
  182. (instregex "RLW(INM|NM)(8)?$"),
  183. (instregex "F(N)?ABS(D|S)$"),
  184. (instregex "FNEG(D|S)$"),
  185. (instregex "FCPSGN(D|S)$"),
  186. (instregex "SRAW(I)?$"),
  187. (instregex "ISEL(8)?$"),
  188. RLDIMI,
  189. XSIEXPDP,
  190. FMR,
  191. CREQV,
  192. CRXOR,
  193. TRECLAIM,
  194. TSR,
  195. TABORT
  196. )>;
  197. // Three cycle ALU vector operation that uses an entire superslice.
  198. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
  199. // (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
  200. def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  201. (instrs
  202. (instregex "M(T|F)VSCR$"),
  203. (instregex "VCMPNEZ(B|H|W)$"),
  204. (instregex "VCMPEQU(B|H|W|D)$"),
  205. (instregex "VCMPNE(B|H|W)$"),
  206. (instregex "VABSDU(B|H|W)$"),
  207. (instregex "VADDU(B|H|W)S$"),
  208. (instregex "VAVG(S|U)(B|H|W)$"),
  209. (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
  210. (instregex "VCMPBFP(_rec)?$"),
  211. (instregex "VC(L|T)Z(B|H|W|D)$"),
  212. (instregex "VADDS(B|H|W)S$"),
  213. (instregex "V(MIN|MAX)FP$"),
  214. (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
  215. VBPERMD,
  216. VADDCUW,
  217. VPOPCNTW,
  218. VPOPCNTD,
  219. VPRTYBD,
  220. VPRTYBW,
  221. VSHASIGMAD,
  222. VSHASIGMAW,
  223. VSUBSBS,
  224. VSUBSHS,
  225. VSUBSWS,
  226. VSUBUBS,
  227. VSUBUHS,
  228. VSUBUWS,
  229. VSUBCUW,
  230. VCMPGTSB,
  231. VCMPGTSB_rec,
  232. VCMPGTSD,
  233. VCMPGTSD_rec,
  234. VCMPGTSH,
  235. VCMPGTSH_rec,
  236. VCMPGTSW,
  237. VCMPGTSW_rec,
  238. VCMPGTUB,
  239. VCMPGTUB_rec,
  240. VCMPGTUD,
  241. VCMPGTUD_rec,
  242. VCMPGTUH,
  243. VCMPGTUH_rec,
  244. VCMPGTUW,
  245. VCMPGTUW_rec,
  246. VCMPNEB_rec,
  247. VCMPNEH_rec,
  248. VCMPNEW_rec,
  249. VCMPNEZB_rec,
  250. VCMPNEZH_rec,
  251. VCMPNEZW_rec,
  252. VCMPEQUB_rec,
  253. VCMPEQUD_rec,
  254. VCMPEQUH_rec,
  255. VCMPEQUW_rec,
  256. XVCMPEQDP,
  257. XVCMPEQDP_rec,
  258. XVCMPEQSP,
  259. XVCMPEQSP_rec,
  260. XVCMPGEDP,
  261. XVCMPGEDP_rec,
  262. XVCMPGESP,
  263. XVCMPGESP_rec,
  264. XVCMPGTDP,
  265. XVCMPGTDP_rec,
  266. XVCMPGTSP,
  267. XVCMPGTSP_rec,
  268. XVMAXDP,
  269. XVMAXSP,
  270. XVMINDP,
  271. XVMINSP,
  272. XVTDIVDP,
  273. XVTDIVSP,
  274. XVTSQRTDP,
  275. XVTSQRTSP,
  276. XVTSTDCDP,
  277. XVTSTDCSP,
  278. XVXSIGDP,
  279. XVXSIGSP
  280. )>;
  281. // 7 cycle DP vector operation that uses an entire superslice.
  282. // Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
  283. // EXECO) and all three dispatches (DISP) to the given superslice.
  284. def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  285. (instrs
  286. VADDFP,
  287. VCTSXS,
  288. VCTSXS_0,
  289. VCTUXS,
  290. VCTUXS_0,
  291. VEXPTEFP,
  292. VLOGEFP,
  293. VMADDFP,
  294. VMHADDSHS,
  295. VNMSUBFP,
  296. VREFP,
  297. VRFIM,
  298. VRFIN,
  299. VRFIP,
  300. VRFIZ,
  301. VRSQRTEFP,
  302. VSUBFP,
  303. XVADDDP,
  304. XVADDSP,
  305. XVCVDPSP,
  306. XVCVDPSXDS,
  307. XVCVDPSXWS,
  308. XVCVDPUXDS,
  309. XVCVDPUXWS,
  310. XVCVHPSP,
  311. XVCVSPDP,
  312. XVCVSPHP,
  313. XVCVSPSXDS,
  314. XVCVSPSXWS,
  315. XVCVSPUXDS,
  316. XVCVSPUXWS,
  317. XVCVSXDDP,
  318. XVCVSXDSP,
  319. XVCVSXWDP,
  320. XVCVSXWSP,
  321. XVCVUXDDP,
  322. XVCVUXDSP,
  323. XVCVUXWDP,
  324. XVCVUXWSP,
  325. XVMADDADP,
  326. XVMADDASP,
  327. XVMADDMDP,
  328. XVMADDMSP,
  329. XVMSUBADP,
  330. XVMSUBASP,
  331. XVMSUBMDP,
  332. XVMSUBMSP,
  333. XVMULDP,
  334. XVMULSP,
  335. XVNMADDADP,
  336. XVNMADDASP,
  337. XVNMADDMDP,
  338. XVNMADDMSP,
  339. XVNMSUBADP,
  340. XVNMSUBASP,
  341. XVNMSUBMDP,
  342. XVNMSUBMSP,
  343. XVRDPI,
  344. XVRDPIC,
  345. XVRDPIM,
  346. XVRDPIP,
  347. XVRDPIZ,
  348. XVREDP,
  349. XVRESP,
  350. XVRSPI,
  351. XVRSPIC,
  352. XVRSPIM,
  353. XVRSPIP,
  354. XVRSPIZ,
  355. XVRSQRTEDP,
  356. XVRSQRTESP,
  357. XVSUBDP,
  358. XVSUBSP,
  359. VCFSX,
  360. VCFSX_0,
  361. VCFUX,
  362. VCFUX_0,
  363. VMHRADDSHS,
  364. VMLADDUHM,
  365. VMSUMMBM,
  366. VMSUMSHM,
  367. VMSUMSHS,
  368. VMSUMUBM,
  369. VMSUMUHM,
  370. VMSUMUDM,
  371. VMSUMUHS,
  372. VMULESB,
  373. VMULESH,
  374. VMULESW,
  375. VMULEUB,
  376. VMULEUH,
  377. VMULEUW,
  378. VMULOSB,
  379. VMULOSH,
  380. VMULOSW,
  381. VMULOUB,
  382. VMULOUH,
  383. VMULOUW,
  384. VMULUWM,
  385. VSUM2SWS,
  386. VSUM4SBS,
  387. VSUM4SHS,
  388. VSUM4UBS,
  389. VSUMSWS
  390. )>;
  391. // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
  392. // dispatch units for the superslice.
  393. def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
  394. (instrs
  395. (instregex "MADD(HD|HDU|LD|LD8)$"),
  396. (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
  397. )>;
  398. // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
  399. // dispatch units for the superslice.
  400. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
  401. (instrs
  402. FRSP,
  403. (instregex "FRI(N|P|Z|M)(D|S)$"),
  404. (instregex "FRE(S)?$"),
  405. (instregex "FADD(S)?$"),
  406. (instregex "FMSUB(S)?$"),
  407. (instregex "FMADD(S)?$"),
  408. (instregex "FSUB(S)?$"),
  409. (instregex "FCFID(U)?(S)?$"),
  410. (instregex "FCTID(U)?(Z)?$"),
  411. (instregex "FCTIW(U)?(Z)?$"),
  412. (instregex "FRSQRTE(S)?$"),
  413. FNMADDS,
  414. FNMADD,
  415. FNMSUBS,
  416. FNMSUB,
  417. FSELD,
  418. FSELS,
  419. FMULS,
  420. FMUL,
  421. XSMADDADP,
  422. XSMADDASP,
  423. XSMADDMDP,
  424. XSMADDMSP,
  425. XSMSUBADP,
  426. XSMSUBASP,
  427. XSMSUBMDP,
  428. XSMSUBMSP,
  429. XSMULDP,
  430. XSMULSP,
  431. XSNMADDADP,
  432. XSNMADDASP,
  433. XSNMADDMDP,
  434. XSNMADDMSP,
  435. XSNMSUBADP,
  436. XSNMSUBASP,
  437. XSNMSUBMDP,
  438. XSNMSUBMSP
  439. )>;
  440. // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
  441. // These operations can be done in parallel. The DP is restricted so we need a
  442. // full 4 dispatches.
  443. def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  444. DISP_3SLOTS_1C, DISP_1C],
  445. (instrs
  446. (instregex "FSEL(D|S)_rec$")
  447. )>;
  448. // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
  449. def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
  450. DISP_3SLOTS_1C, DISP_1C],
  451. (instrs
  452. (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
  453. )>;
  454. // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
  455. // These operations must be done sequentially.The DP is restricted so we need a
  456. // full 4 dispatches.
  457. def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
  458. DISP_3SLOTS_1C, DISP_1C],
  459. (instrs
  460. (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
  461. (instregex "FRE(S)?_rec$"),
  462. (instregex "FADD(S)?_rec$"),
  463. (instregex "FSUB(S)?_rec$"),
  464. (instregex "F(N)?MSUB(S)?_rec$"),
  465. (instregex "F(N)?MADD(S)?_rec$"),
  466. (instregex "FCFID(U)?(S)?_rec$"),
  467. (instregex "FCTID(U)?(Z)?_rec$"),
  468. (instregex "FCTIW(U)?(Z)?_rec$"),
  469. (instregex "FMUL(S)?_rec$"),
  470. (instregex "FRSQRTE(S)?_rec$"),
  471. FRSP_rec
  472. )>;
  473. // 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
  474. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
  475. (instrs
  476. XSADDDP,
  477. XSADDSP,
  478. XSCVDPHP,
  479. XSCVDPSP,
  480. XSCVDPSXDS,
  481. XSCVDPSXDSs,
  482. XSCVDPSXWS,
  483. XSCVDPUXDS,
  484. XSCVDPUXDSs,
  485. XSCVDPUXWS,
  486. XSCVDPSXWSs,
  487. XSCVDPUXWSs,
  488. XSCVHPDP,
  489. XSCVSPDP,
  490. XSCVSXDDP,
  491. XSCVSXDSP,
  492. XSCVUXDDP,
  493. XSCVUXDSP,
  494. XSRDPI,
  495. XSRDPIC,
  496. XSRDPIM,
  497. XSRDPIP,
  498. XSRDPIZ,
  499. XSREDP,
  500. XSRESP,
  501. XSRSQRTEDP,
  502. XSRSQRTESP,
  503. XSSUBDP,
  504. XSSUBSP,
  505. XSCVDPSPN,
  506. XSRSP
  507. )>;
  508. // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
  509. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  510. // dispatch.
  511. def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
  512. (instrs
  513. (instregex "LVS(L|R)$"),
  514. (instregex "VSPLTIS(W|H|B)$"),
  515. (instregex "VSPLT(W|H|B)(s)?$"),
  516. (instregex "V_SETALLONES(B|H)?$"),
  517. (instregex "VEXTRACTU(B|H|W)$"),
  518. (instregex "VINSERT(B|H|W|D)$"),
  519. MFVSRLD,
  520. MTVSRWS,
  521. VBPERMQ,
  522. VCLZLSBB,
  523. VCTZLSBB,
  524. VEXTRACTD,
  525. VEXTUBLX,
  526. VEXTUBRX,
  527. VEXTUHLX,
  528. VEXTUHRX,
  529. VEXTUWLX,
  530. VEXTUWRX,
  531. VGBBD,
  532. VMRGHB,
  533. VMRGHH,
  534. VMRGHW,
  535. VMRGLB,
  536. VMRGLH,
  537. VMRGLW,
  538. VPERM,
  539. VPERMR,
  540. VPERMXOR,
  541. VPKPX,
  542. VPKSDSS,
  543. VPKSDUS,
  544. VPKSHSS,
  545. VPKSHUS,
  546. VPKSWSS,
  547. VPKSWUS,
  548. VPKUDUM,
  549. VPKUDUS,
  550. VPKUHUM,
  551. VPKUHUS,
  552. VPKUWUM,
  553. VPKUWUS,
  554. VPRTYBQ,
  555. VSL,
  556. VSLDOI,
  557. VSLO,
  558. VSLV,
  559. VSR,
  560. VSRO,
  561. VSRV,
  562. VUPKHPX,
  563. VUPKHSB,
  564. VUPKHSH,
  565. VUPKHSW,
  566. VUPKLPX,
  567. VUPKLSB,
  568. VUPKLSH,
  569. VUPKLSW,
  570. XXBRD,
  571. XXBRH,
  572. XXBRQ,
  573. XXBRW,
  574. XXEXTRACTUW,
  575. XXINSERTW,
  576. XXMRGHW,
  577. XXMRGLW,
  578. XXPERM,
  579. XXPERMR,
  580. XXSLDWI,
  581. XXSLDWIs,
  582. XXSPLTIB,
  583. XXSPLTW,
  584. XXSPLTWs,
  585. XXPERMDI,
  586. XXPERMDIs,
  587. VADDCUQ,
  588. VADDECUQ,
  589. VADDEUQM,
  590. VADDUQM,
  591. VMUL10CUQ,
  592. VMUL10ECUQ,
  593. VMUL10EUQ,
  594. VMUL10UQ,
  595. VSUBCUQ,
  596. VSUBECUQ,
  597. VSUBEUQM,
  598. VSUBUQM,
  599. XSCMPEXPQP,
  600. XSCMPOQP,
  601. XSCMPUQP,
  602. XSTSTDCQP,
  603. XSXSIGQP,
  604. BCDCFN_rec,
  605. BCDCFZ_rec,
  606. BCDCPSGN_rec,
  607. BCDCTN_rec,
  608. BCDCTZ_rec,
  609. BCDSETSGN_rec,
  610. BCDS_rec,
  611. BCDTRUNC_rec,
  612. BCDUS_rec,
  613. BCDUTRUNC_rec,
  614. BCDADD_rec,
  615. BCDSUB_rec
  616. )>;
  617. // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  618. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  619. // dispatch.
  620. def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  621. (instrs
  622. BCDSR_rec,
  623. XSADDQP,
  624. XSADDQPO,
  625. XSCVDPQP,
  626. XSCVQPDP,
  627. XSCVQPDPO,
  628. XSCVQPSDZ,
  629. XSCVQPSWZ,
  630. XSCVQPUDZ,
  631. XSCVQPUWZ,
  632. XSCVSDQP,
  633. XSCVUDQP,
  634. XSRQPI,
  635. XSRQPIX,
  636. XSRQPXP,
  637. XSSUBQP,
  638. XSSUBQPO
  639. )>;
  640. // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  641. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  642. // dispatch.
  643. def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  644. (instrs
  645. BCDCTSQ_rec
  646. )>;
  647. // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  648. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  649. // dispatch.
  650. def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  651. (instrs
  652. XSMADDQP,
  653. XSMADDQPO,
  654. XSMSUBQP,
  655. XSMSUBQPO,
  656. XSMULQP,
  657. XSMULQPO,
  658. XSNMADDQP,
  659. XSNMADDQPO,
  660. XSNMSUBQP,
  661. XSNMSUBQPO
  662. )>;
  663. // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  664. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  665. // dispatch.
  666. def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  667. (instrs
  668. BCDCFSQ_rec
  669. )>;
  670. // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  671. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  672. // dispatch.
  673. def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  674. (instrs
  675. XSDIVQP,
  676. XSDIVQPO
  677. )>;
  678. // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  679. // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
  680. // dispatches.
  681. def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  682. (instrs
  683. XSSQRTQP,
  684. XSSQRTQPO
  685. )>;
  686. // 6 Cycle Load uses a single slice.
  687. def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
  688. (instrs
  689. (instregex "LXVL(L)?")
  690. )>;
  691. // 5 Cycle Load uses a single slice.
  692. def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
  693. (instrs
  694. (instregex "LVE(B|H|W)X$"),
  695. (instregex "LVX(L)?"),
  696. (instregex "LXSI(B|H)ZX$"),
  697. LXSDX,
  698. LXVB16X,
  699. LXVD2X,
  700. LXVWSX,
  701. LXSIWZX,
  702. LXV,
  703. LXVX,
  704. LXSD,
  705. DFLOADf64,
  706. XFLOADf64,
  707. LIWZX
  708. )>;
  709. // 4 Cycle Load uses a single slice.
  710. def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
  711. (instrs
  712. (instregex "DCB(F|T|ST)(EP)?$"),
  713. (instregex "DCBZ(L)?(EP)?$"),
  714. (instregex "DCBTST(EP)?$"),
  715. (instregex "CP_COPY(8)?$"),
  716. (instregex "ICBI(EP)?$"),
  717. (instregex "ICBT(LS)?$"),
  718. (instregex "LBARX(L)?$"),
  719. (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
  720. (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
  721. (instregex "LH(A|B)RX(L)?(8)?$"),
  722. (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
  723. (instregex "LWARX(L)?$"),
  724. (instregex "LWBRX(8)?$"),
  725. (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
  726. CP_ABORT,
  727. DARN,
  728. EnforceIEIO,
  729. ISYNC,
  730. MSGSYNC,
  731. TLBSYNC,
  732. SYNC,
  733. LMW,
  734. LSWI
  735. )>;
  736. // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
  737. // superslice.
  738. def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
  739. (instrs
  740. LFIWZX,
  741. LFDX,
  742. LFD
  743. )>;
  744. // Cracked Load Instructions.
  745. // Load instructions that can be done in parallel.
  746. def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
  747. DISP_PAIR_1C],
  748. (instrs
  749. SLBIA,
  750. SLBIE,
  751. SLBMFEE,
  752. SLBMFEV,
  753. SLBMTE,
  754. TLBIEL
  755. )>;
  756. // Cracked Load Instruction.
  757. // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
  758. // operations can be run in parallel.
  759. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
  760. DISP_PAIR_1C, DISP_PAIR_1C],
  761. (instrs
  762. (instregex "L(W|H)ZU(X)?(8)?$")
  763. )>;
  764. // Cracked TEND Instruction.
  765. // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
  766. // operations can be run in parallel.
  767. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
  768. DISP_1C, DISP_1C],
  769. (instrs
  770. TEND
  771. )>;
  772. // Cracked Store Instruction
  773. // Consecutive Store and ALU instructions. The store is restricted and requires
  774. // three dispatches.
  775. def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
  776. DISP_3SLOTS_1C, DISP_1C],
  777. (instrs
  778. (instregex "ST(B|H|W|D)CX$")
  779. )>;
  780. // Cracked Load Instruction.
  781. // Two consecutive load operations for a total of 8 cycles.
  782. def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
  783. DISP_1C, DISP_1C],
  784. (instrs
  785. LDMX
  786. )>;
  787. // Cracked Load instruction.
  788. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
  789. // operations cannot be done at the same time and so their latencies are added.
  790. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
  791. DISP_1C, DISP_1C],
  792. (instrs
  793. (instregex "LHA(X)?(8)?$"),
  794. (instregex "CP_PASTE(8)?_rec$"),
  795. (instregex "LWA(X)?(_32)?$"),
  796. TCHECK
  797. )>;
  798. // Cracked Restricted Load instruction.
  799. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
  800. // operations cannot be done at the same time and so their latencies are added.
  801. // Full 6 dispatches are required as this is both cracked and restricted.
  802. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
  803. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  804. (instrs
  805. LFIWAX
  806. )>;
  807. // Cracked Load instruction.
  808. // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
  809. // operations cannot be done at the same time and so their latencies are added.
  810. // Full 4 dispatches are required as this is a cracked instruction.
  811. def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
  812. (instrs
  813. LXSIWAX,
  814. LIWAX
  815. )>;
  816. // Cracked Load instruction.
  817. // Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
  818. // cycles. The Load and ALU operations cannot be done at the same time and so
  819. // their latencies are added.
  820. // Full 6 dispatches are required as this is a restricted instruction.
  821. def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
  822. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  823. (instrs
  824. LFSX,
  825. LFS
  826. )>;
  827. // Cracked Load instruction.
  828. // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
  829. // operations cannot be done at the same time and so their latencies are added.
  830. // Full 4 dispatches are required as this is a cracked instruction.
  831. def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
  832. (instrs
  833. LXSSP,
  834. LXSSPX,
  835. XFLOADf32,
  836. DFLOADf32
  837. )>;
  838. // Cracked 3-Way Load Instruction
  839. // Load with two ALU operations that depend on each other
  840. def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
  841. DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
  842. (instrs
  843. (instregex "LHAU(X)?(8)?$"),
  844. LWAUX
  845. )>;
  846. // Cracked Load that requires the PM resource.
  847. // Since the Load and the PM cannot be done at the same time the latencies are
  848. // added. Requires 8 cycles. Since the PM requires the full superslice we need
  849. // both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
  850. // requires the remaining 1 dispatch.
  851. def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
  852. DISP_1C, DISP_1C],
  853. (instrs
  854. LXVH8X,
  855. LXVDSX,
  856. LXVW4X
  857. )>;
  858. // Single slice Restricted store operation. The restricted operation requires
  859. // all three dispatches for the superslice.
  860. def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
  861. (instrs
  862. (instregex "STF(S|D|IWX|SX|DX)$"),
  863. (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
  864. (instregex "STW(8)?$"),
  865. (instregex "(D|X)FSTORE(f32|f64)$"),
  866. (instregex "ST(W|H|D)BRX$"),
  867. (instregex "ST(B|H|D)(8)?$"),
  868. (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
  869. STIWX,
  870. SLBIEG,
  871. STMW,
  872. STSWI,
  873. TLBIE
  874. )>;
  875. // Vector Store Instruction
  876. // Requires the whole superslice and therefore requires one dispatch
  877. // as well as both the Even and Odd exec pipelines.
  878. def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
  879. (instrs
  880. (instregex "STVE(B|H|W)X$"),
  881. (instregex "STVX(L)?$"),
  882. (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
  883. )>;
  884. // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  885. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  886. // dispatches.
  887. def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
  888. (instrs
  889. (instregex "MTCTR(8)?(loop)?$"),
  890. (instregex "MTLR(8)?$")
  891. )>;
  892. // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  893. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  894. // dispatches.
  895. def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
  896. (instrs
  897. (instregex "M(T|F)VRSAVE(v)?$"),
  898. (instregex "M(T|F)PMR$"),
  899. (instregex "M(T|F)TB(8)?$"),
  900. (instregex "MF(SPR|CTR|LR)(8)?$"),
  901. (instregex "M(T|F)MSR(D)?$"),
  902. (instregex "MTSPR(8)?$")
  903. )>;
  904. // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  905. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  906. // dispatches.
  907. def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
  908. (instrs
  909. DIVW,
  910. DIVWO,
  911. DIVWU,
  912. DIVWUO,
  913. MODSW
  914. )>;
  915. // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  916. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  917. // dispatches.
  918. def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
  919. (instrs
  920. DIVWE,
  921. DIVWEO,
  922. DIVD,
  923. DIVDO,
  924. DIVWEU,
  925. DIVWEUO,
  926. DIVDU,
  927. DIVDUO,
  928. MODSD,
  929. MODUD,
  930. MODUW
  931. )>;
  932. // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  933. // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
  934. // dispatches.
  935. def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
  936. (instrs
  937. DIVDE,
  938. DIVDEO,
  939. DIVDEU,
  940. DIVDEUO
  941. )>;
  942. // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
  943. // and one full superslice for the DIV operation since there is only one DIV per
  944. // superslice. Latency of DIV plus ALU is 26.
  945. def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
  946. DISP_EVEN_1C, DISP_1C],
  947. (instrs
  948. (instregex "DIVW(U)?(O)?_rec$")
  949. )>;
  950. // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
  951. // and one full superslice for the DIV operation since there is only one DIV per
  952. // superslice. Latency of DIV plus ALU is 26.
  953. def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
  954. DISP_EVEN_1C, DISP_1C],
  955. (instrs
  956. DIVD_rec,
  957. DIVDO_rec,
  958. DIVDU_rec,
  959. DIVDUO_rec,
  960. DIVWE_rec,
  961. DIVWEO_rec,
  962. DIVWEU_rec,
  963. DIVWEUO_rec
  964. )>;
  965. // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
  966. // and one full superslice for the DIV operation since there is only one DIV per
  967. // superslice. Latency of DIV plus ALU is 42.
  968. def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
  969. DISP_EVEN_1C, DISP_1C],
  970. (instrs
  971. DIVDE_rec,
  972. DIVDEO_rec,
  973. DIVDEU_rec,
  974. DIVDEUO_rec
  975. )>;
  976. // CR access instructions in _BrMCR, IIC_BrMCRX.
  977. // Cracked, restricted, ALU operations.
  978. // Here the two ALU ops can actually be done in parallel and therefore the
  979. // latencies are not added together. Otherwise this is like having two
  980. // instructions running together on two pipelines and 6 dispatches. ALU ops are
  981. // 2 cycles each.
  982. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
  983. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  984. (instrs
  985. MTCRF,
  986. MTCRF8
  987. )>;
  988. // Cracked ALU operations.
  989. // Here the two ALU ops can actually be done in parallel and therefore the
  990. // latencies are not added together. Otherwise this is like having two
  991. // instructions running together on two pipelines and 2 dispatches. ALU ops are
  992. // 2 cycles each.
  993. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
  994. DISP_1C, DISP_1C],
  995. (instrs
  996. (instregex "ADDC(8)?(O)?_rec$"),
  997. (instregex "SUBFC(8)?(O)?_rec$")
  998. )>;
  999. // Cracked ALU operations.
  1000. // Two ALU ops can be done in parallel.
  1001. // One is three cycle ALU the ohter is a two cycle ALU.
  1002. // One of the ALU ops is restricted the other is not so we have a total of
  1003. // 5 dispatches.
  1004. def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  1005. DISP_3SLOTS_1C, DISP_1C],
  1006. (instrs
  1007. (instregex "F(N)?ABS(D|S)_rec$"),
  1008. (instregex "FCPSGN(D|S)_rec$"),
  1009. (instregex "FNEG(D|S)_rec$"),
  1010. FMR_rec
  1011. )>;
  1012. // Cracked ALU operations.
  1013. // Here the two ALU ops can actually be done in parallel and therefore the
  1014. // latencies are not added together. Otherwise this is like having two
  1015. // instructions running together on two pipelines and 2 dispatches.
  1016. // ALU ops are 3 cycles each.
  1017. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  1018. DISP_1C, DISP_1C],
  1019. (instrs
  1020. MCRFS
  1021. )>;
  1022. // Cracked Restricted ALU operations.
  1023. // Here the two ALU ops can actually be done in parallel and therefore the
  1024. // latencies are not added together. Otherwise this is like having two
  1025. // instructions running together on two pipelines and 6 dispatches.
  1026. // ALU ops are 3 cycles each.
  1027. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  1028. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  1029. (instrs
  1030. (instregex "MTFSF(b|_rec)?$"),
  1031. (instregex "MTFSFI(_rec)?$"),
  1032. MTFSFIb
  1033. )>;
  1034. // Cracked instruction made of two ALU ops.
  1035. // The two ops cannot be done in parallel.
  1036. // One of the ALU ops is restricted and takes 3 dispatches.
  1037. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
  1038. DISP_3SLOTS_1C, DISP_1C],
  1039. (instrs
  1040. (instregex "RLD(I)?C(R|L)_rec$"),
  1041. (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
  1042. (instregex "SLW(8)?_rec$"),
  1043. (instregex "SRAW(I)?_rec$"),
  1044. (instregex "SRW(8)?_rec$"),
  1045. RLDICL_32_rec,
  1046. RLDIMI_rec
  1047. )>;
  1048. // Cracked instruction made of two ALU ops.
  1049. // The two ops cannot be done in parallel.
  1050. // Both of the ALU ops are restricted and take 3 dispatches.
  1051. def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
  1052. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  1053. (instrs
  1054. (instregex "MFFS(L|CE|_rec)?$")
  1055. )>;
  1056. // Cracked ALU instruction composed of three consecutive 2 cycle loads for a
  1057. // total of 6 cycles. All of the ALU operations are also restricted so each
  1058. // takes 3 dispatches for a total of 9.
  1059. def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
  1060. DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  1061. (instrs
  1062. (instregex "MFCR(8)?$")
  1063. )>;
  1064. // Cracked instruction made of two ALU ops.
  1065. // The two ops cannot be done in parallel.
  1066. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
  1067. (instrs
  1068. (instregex "EXTSWSLI_32_64_rec$"),
  1069. (instregex "SRAD(I)?_rec$"),
  1070. EXTSWSLI_rec,
  1071. SLD_rec,
  1072. SRD_rec,
  1073. RLDIC_rec
  1074. )>;
  1075. // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1076. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
  1077. (instrs
  1078. FDIV
  1079. )>;
  1080. // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
  1081. def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
  1082. DISP_3SLOTS_1C, DISP_1C],
  1083. (instrs
  1084. FDIV_rec
  1085. )>;
  1086. // 36 Cycle DP Instruction.
  1087. // Instruction can be done on a single slice.
  1088. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
  1089. (instrs
  1090. XSSQRTDP
  1091. )>;
  1092. // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1093. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
  1094. (instrs
  1095. FSQRT
  1096. )>;
  1097. // 36 Cycle DP Vector Instruction.
  1098. def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
  1099. DISP_1C],
  1100. (instrs
  1101. XVSQRTDP
  1102. )>;
  1103. // 27 Cycle DP Vector Instruction.
  1104. def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
  1105. DISP_1C],
  1106. (instrs
  1107. XVSQRTSP
  1108. )>;
  1109. // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
  1110. def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
  1111. DISP_3SLOTS_1C, DISP_1C],
  1112. (instrs
  1113. FSQRT_rec
  1114. )>;
  1115. // 26 Cycle DP Instruction.
  1116. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
  1117. (instrs
  1118. XSSQRTSP
  1119. )>;
  1120. // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1121. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
  1122. (instrs
  1123. FSQRTS
  1124. )>;
  1125. // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
  1126. def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
  1127. DISP_3SLOTS_1C, DISP_1C],
  1128. (instrs
  1129. FSQRTS_rec
  1130. )>;
  1131. // 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
  1132. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
  1133. (instrs
  1134. XSDIVDP
  1135. )>;
  1136. // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1137. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
  1138. (instrs
  1139. FDIVS
  1140. )>;
  1141. // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
  1142. def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
  1143. DISP_3SLOTS_1C, DISP_1C],
  1144. (instrs
  1145. FDIVS_rec
  1146. )>;
  1147. // 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
  1148. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
  1149. (instrs
  1150. XSDIVSP
  1151. )>;
  1152. // 24 Cycle DP Vector Instruction. Takes one full superslice.
  1153. // Includes both EXECE, EXECO pipelines and 1 dispatch for the given
  1154. // superslice.
  1155. def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
  1156. DISP_1C],
  1157. (instrs
  1158. XVDIVSP
  1159. )>;
  1160. // 33 Cycle DP Vector Instruction. Takes one full superslice.
  1161. // Includes both EXECE, EXECO pipelines and 1 dispatch for the given
  1162. // superslice.
  1163. def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
  1164. DISP_1C],
  1165. (instrs
  1166. XVDIVDP
  1167. )>;
  1168. // Instruction cracked into three pieces. One Load and two ALU operations.
  1169. // The Load and one of the ALU ops cannot be run at the same time and so the
  1170. // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
  1171. // Both the load and the ALU that depends on it are restricted and so they take
  1172. // a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
  1173. // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
  1174. def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
  1175. IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
  1176. DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
  1177. (instrs
  1178. (instregex "LF(SU|SUX)$")
  1179. )>;
  1180. // Cracked instruction made up of a Store and an ALU. The ALU does not depend on
  1181. // the store and so it can be run at the same time as the store. The store is
  1182. // also restricted.
  1183. def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
  1184. DISP_3SLOTS_1C, DISP_1C],
  1185. (instrs
  1186. (instregex "STF(S|D)U(X)?$"),
  1187. (instregex "ST(B|H|W|D)U(X)?(8)?$")
  1188. )>;
  1189. // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
  1190. // the load and so it can be run at the same time as the load.
  1191. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
  1192. DISP_PAIR_1C, DISP_PAIR_1C],
  1193. (instrs
  1194. (instregex "LBZU(X)?(8)?$"),
  1195. (instregex "LDU(X)?$")
  1196. )>;
  1197. // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
  1198. // the load and so it can be run at the same time as the load. The load is also
  1199. // restricted. 3 dispatches are from the restricted load while the other two
  1200. // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
  1201. // is required for the ALU.
  1202. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
  1203. DISP_3SLOTS_1C, DISP_1C],
  1204. (instrs
  1205. (instregex "LF(DU|DUX)$")
  1206. )>;
  1207. // Crypto Instructions
  1208. // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
  1209. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  1210. // dispatch.
  1211. def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
  1212. (instrs
  1213. (instregex "VPMSUM(B|H|W|D)$"),
  1214. (instregex "V(N)?CIPHER(LAST)?$"),
  1215. VSBOX
  1216. )>;
  1217. // Branch Instructions
  1218. // Two Cycle Branch
  1219. def : InstRW<[P9_BR_2C, DISP_BR_1C],
  1220. (instrs
  1221. (instregex "BCCCTR(L)?(8)?$"),
  1222. (instregex "BCCL(A|R|RL)?$"),
  1223. (instregex "BCCTR(L)?(8)?(n)?$"),
  1224. (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
  1225. (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
  1226. (instregex "BL(_TLS|_NOP)?(_RM)?$"),
  1227. (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
  1228. (instregex "BLA(8|8_NOP)?(_RM)?$"),
  1229. (instregex "BLR(8|L)?$"),
  1230. (instregex "TAILB(A)?(8)?$"),
  1231. (instregex "TAILBCTR(8)?$"),
  1232. (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
  1233. (instregex "BCLR(L)?(n)?$"),
  1234. (instregex "BCTR(L)?(8)?(_RM)?$"),
  1235. B,
  1236. BA,
  1237. BC,
  1238. BCC,
  1239. BCCA,
  1240. BCL,
  1241. BCLalways,
  1242. BCLn,
  1243. BCTRL8_LDinto_toc,
  1244. BCTRL_LWZinto_toc,
  1245. BCTRL8_LDinto_toc_RM,
  1246. BCTRL_LWZinto_toc_RM,
  1247. BCn,
  1248. CTRL_DEP
  1249. )>;
  1250. // Five Cycle Branch with a 2 Cycle ALU Op
  1251. // Operations must be done consecutively and not in parallel.
  1252. def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
  1253. (instrs
  1254. ADDPCIS
  1255. )>;
  1256. // Special Extracted Instructions For Atomics
  1257. // Atomic Load
  1258. def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
  1259. IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
  1260. IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
  1261. DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
  1262. (instrs
  1263. (instregex "L(D|W)AT$")
  1264. )>;
  1265. // Atomic Store
  1266. def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
  1267. IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
  1268. (instrs
  1269. (instregex "ST(D|W)AT$")
  1270. )>;
  1271. // Signal Processing Engine (SPE) Instructions
  1272. // These instructions are not supported on Power 9
  1273. def : InstRW<[],
  1274. (instrs
  1275. BRINC,
  1276. EVABS,
  1277. EVEQV,
  1278. EVMRA,
  1279. EVNAND,
  1280. EVNEG,
  1281. (instregex "EVADD(I)?W$"),
  1282. (instregex "EVADD(SM|SS|UM|US)IAAW$"),
  1283. (instregex "EVAND(C)?$"),
  1284. (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
  1285. (instregex "EVCNTL(S|Z)W$"),
  1286. (instregex "EVDIVW(S|U)$"),
  1287. (instregex "EVEXTS(B|H)$"),
  1288. (instregex "EVLD(H|W|D)(X)?$"),
  1289. (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
  1290. (instregex "EVLWHE(X)?$"),
  1291. (instregex "EVLWHO(S|U)(X)?$"),
  1292. (instregex "EVLW(H|W)SPLAT(X)?$"),
  1293. (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
  1294. (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
  1295. (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
  1296. (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
  1297. (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
  1298. (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
  1299. (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
  1300. (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
  1301. (instregex "EVMWHUMI(A)?$"),
  1302. (instregex "EVMWLS(M|S)IA(A|N)W$"),
  1303. (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
  1304. (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
  1305. (instregex "EVMWSSF(A|AA|AN)?$"),
  1306. (instregex "EVMWUMI(A|AA|AN)?$"),
  1307. (instregex "EV(N|X)?OR(C)?$"),
  1308. (instregex "EVR(LW|LWI|NDW)$"),
  1309. (instregex "EVSLW(I)?$"),
  1310. (instregex "EVSPLAT(F)?I$"),
  1311. (instregex "EVSRW(I)?(S|U)$"),
  1312. (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
  1313. (instregex "EVSUBF(S|U)(M|S)IAAW$"),
  1314. (instregex "EVSUB(I)?FW$")
  1315. )> { let Unsupported = 1; }
  1316. // General Instructions without scheduling support.
  1317. def : InstRW<[],
  1318. (instrs
  1319. (instregex "(H)?RFI(D)?$"),
  1320. (instregex "DSS(ALL)?$"),
  1321. (instregex "DST(ST)?(T)?(64)?$"),
  1322. (instregex "ICBL(C|Q)$"),
  1323. (instregex "L(W|H|B)EPX$"),
  1324. (instregex "ST(W|H|B)EPX$"),
  1325. (instregex "(L|ST)FDEPX$"),
  1326. (instregex "M(T|F)SR(IN)?$"),
  1327. (instregex "M(T|F)DCR$"),
  1328. (instregex "NOP_GT_PWR(6|7)$"),
  1329. (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
  1330. (instregex "WRTEE(I)?$"),
  1331. (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
  1332. ATTN,
  1333. CLRBHRB,
  1334. MFBHRBE,
  1335. MBAR,
  1336. MSYNC,
  1337. SLBSYNC,
  1338. SLBFEE_rec,
  1339. NAP,
  1340. STOP,
  1341. TRAP,
  1342. RFCI,
  1343. RFDI,
  1344. RFMCI,
  1345. SC,
  1346. DCBA,
  1347. DCBI,
  1348. DCCCI,
  1349. ICCCI,
  1350. ADDEX,
  1351. ADDEX8
  1352. )> { let Unsupported = 1; }