P9InstrResources.td 37 KB


  1. //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the resources required by P9 instructions. This is part of
  10. // the P9 processor model used for instruction scheduling. This file should
  11. // contain all the instructions that may be used on Power 9. This is not
  12. // just instructions that are new on Power 9 but also instructions that were
  13. // available on earlier architectures and are still used in Power 9.
  14. //
  15. // The makeup of the P9 CPU is modeled as follows:
  16. // - Each CPU is made up of two superslices.
  17. // - Each superslice is made up of two slices. Therefore, there are 4 slices
  18. // for each CPU.
  19. // - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
  20. // - Each CPU has:
  21. // - One CY (Crypto) unit P9_CY_*
  22. // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
  23. // - Two PM (Permute) units. One on each superslice. P9_PM_*
  24. // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
  25. // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
  26. // - Four DP (Floating Point) units. One on each slice. P9_DP_*
  27. // This also includes fixed point multiply add.
  28. // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
  29. // - Four Load/Store Queues. P9_LS_*
  30. // - Each set of instructions will require a number of these resources.
  31. //===----------------------------------------------------------------------===//
  32. // Two cycle ALU vector operation that uses an entire superslice.
  33. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
  34. // (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
  35. def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  36. (instrs
  37. (instregex "VADDU(B|H|W|D)M$"),
  38. (instregex "VAND(C)?$"),
  39. (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
  40. (instregex "V_SET0(B|H)?$"),
  41. (instregex "VS(R|L)(B|H|W|D)$"),
  42. (instregex "VSUBU(B|H|W|D)M$"),
  43. (instregex "VPOPCNT(B|H)$"),
  44. (instregex "VRL(B|H|W|D)$"),
  45. (instregex "VSRA(B|H|W|D)$"),
  46. (instregex "XV(N)?ABS(D|S)P$"),
  47. (instregex "XVCPSGN(D|S)P$"),
  48. (instregex "XV(I|X)EXP(D|S)P$"),
  49. (instregex "VRL(D|W)(MI|NM)$"),
  50. (instregex "VMRG(E|O)W$"),
  51. MTVSRDD,
  52. VEQV,
  53. VNAND,
  54. VNEGD,
  55. VNEGW,
  56. VNOR,
  57. VOR,
  58. VORC,
  59. VSEL,
  60. VXOR,
  61. XVNEGDP,
  62. XVNEGSP,
  63. XXLAND,
  64. XXLANDC,
  65. XXLEQV,
  66. XXLEQVOnes,
  67. XXLNAND,
  68. XXLNOR,
  69. XXLOR,
  70. XXLORf,
  71. XXLORC,
  72. XXLXOR,
  73. XXLXORdpz,
  74. XXLXORspz,
  75. XXLXORz,
  76. XXSEL,
  77. XSABSQP,
  78. XSCPSGNQP,
  79. XSIEXPQP,
  80. XSNABSQP,
  81. XSNEGQP,
  82. XSXEXPQP
  83. )>;
  84. // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
  85. // single slice. However, since it is Restricted, it requires all 3 dispatches
  86. // (DISP) for that superslice.
  87. def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
  88. (instrs
  89. (instregex "TABORT(D|W)C(I)?$"),
  90. (instregex "MTFSB(0|1)$"),
  91. (instregex "MFFSC(D)?RN(I)?$"),
  92. (instregex "CMPRB(8)?$"),
  93. (instregex "TD(I)?$"),
  94. (instregex "TW(I)?$"),
  95. (instregex "FCMP(O|U)(S|D)$"),
  96. (instregex "XSTSTDC(S|D)P$"),
  97. FTDIV,
  98. FTSQRT,
  99. CMPEQB
  100. )>;
  101. // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
  102. def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
  103. (instrs
  104. (instregex "XSMAX(C|J)?DP$"),
  105. (instregex "XSMIN(C|J)?DP$"),
  106. (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
  107. (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
  108. (instregex "POPCNT(D|W)$"),
  109. (instregex "CMPB(8)?$"),
  110. (instregex "SETB(8)?$"),
  111. XSTDIVDP,
  112. XSTSQRTDP,
  113. XSXSIGDP,
  114. XSCVSPDPN,
  115. BPERMD
  116. )>;
  117. // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
  118. def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
  119. (instrs
  120. (instregex "S(L|R)D$"),
  121. (instregex "SRAD(I)?$"),
  122. (instregex "EXTSWSLI_32_64$"),
  123. (instregex "MFV(S)?RD$"),
  124. (instregex "MTV(S)?RD$"),
  125. (instregex "MTV(S)?RW(A|Z)$"),
  126. (instregex "CMP(WI|LWI|W|LW)(8)?$"),
  127. (instregex "CMP(L)?D(I)?$"),
  128. (instregex "SUBF(I)?C(8)?(O)?$"),
  129. (instregex "ANDI(S)?(8)?(_rec)?$"),
  130. (instregex "ADDC(8)?(O)?$"),
  131. (instregex "ADDIC(8)?(_rec)?$"),
  132. (instregex "ADD(8|4)(O)?(_rec)?$"),
  133. (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
  134. (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
  135. (instregex "NEG(8)?(O)?(_rec)?$"),
  136. (instregex "POPCNTB$"),
  137. (instregex "POPCNTB8$"),
  138. (instregex "ADD(I|IS)?(8)?$"),
  139. (instregex "LI(S)?(8)?$"),
  140. (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
  141. (instregex "NAND(8)?(_rec)?$"),
  142. (instregex "AND(C)?(8)?(_rec)?$"),
  143. (instregex "NOR(8)?(_rec)?$"),
  144. (instregex "OR(C)?(8)?(_rec)?$"),
  145. (instregex "EQV(8)?(_rec)?$"),
  146. (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
  147. (instregex "ADD(4|8)(TLS)?(_)?$"),
  148. (instregex "NEG(8)?(O)?$"),
  149. (instregex "ADDI(S)?toc(HA|L)(8)?$"),
  150. (instregex "LA(8)?$"),
  151. COPY,
  152. MCRF,
  153. MCRXRX,
  154. XSNABSDP,
  155. XSNABSDPs,
  156. XSXEXPDP,
  157. XSABSDP,
  158. XSNEGDP,
  159. XSCPSGNDP,
  160. MFVSRWZ,
  161. MFVRWZ,
  162. EXTSWSLI,
  163. SRADI_32,
  164. RLDIC,
  165. RFEBB,
  166. TBEGIN,
  167. TRECHKPT,
  168. NOP,
  169. WAIT
  170. )>;
  171. // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
  172. // single slice. However, since it is Restricted, it requires all 3 dispatches
  173. // (DISP) for that superslice.
  174. def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
  175. (instrs
  176. (instregex "RLDC(L|R)$"),
  177. (instregex "RLWIMI(8)?$"),
  178. (instregex "RLDIC(L|R)(_32)?(_64)?$"),
  179. (instregex "M(F|T)OCRF(8)?$"),
  180. (instregex "CR(6)?(UN)?SET$"),
  181. (instregex "CR(N)?(OR|AND)(C)?$"),
  182. (instregex "S(L|R)W(8)?$"),
  183. (instregex "RLW(INM|NM)(8)?$"),
  184. (instregex "F(N)?ABS(D|S)$"),
  185. (instregex "FNEG(D|S)$"),
  186. (instregex "FCPSGN(D|S)$"),
  187. (instregex "SRAW(I)?$"),
  188. (instregex "ISEL(8)?$"),
  189. RLDIMI,
  190. XSIEXPDP,
  191. FMR,
  192. CREQV,
  193. CRNOT,
  194. CRXOR,
  195. TRECLAIM,
  196. TSR,
  197. TABORT
  198. )>;
  199. // Three cycle ALU vector operation that uses an entire superslice.
  200. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
  201. // (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
  202. def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  203. (instrs
  204. (instregex "M(T|F)VSCR$"),
  205. (instregex "VCMPNEZ(B|H|W)$"),
  206. (instregex "VCMPEQU(B|H|W|D)$"),
  207. (instregex "VCMPNE(B|H|W)$"),
  208. (instregex "VABSDU(B|H|W)$"),
  209. (instregex "VADDU(B|H|W)S$"),
  210. (instregex "VAVG(S|U)(B|H|W)$"),
  211. (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
  212. (instregex "VCMPBFP(_rec)?$"),
  213. (instregex "VC(L|T)Z(B|H|W|D)$"),
  214. (instregex "VADDS(B|H|W)S$"),
  215. (instregex "V(MIN|MAX)FP$"),
  216. (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
  217. VBPERMD,
  218. VADDCUW,
  219. VPOPCNTW,
  220. VPOPCNTD,
  221. VPRTYBD,
  222. VPRTYBW,
  223. VSHASIGMAD,
  224. VSHASIGMAW,
  225. VSUBSBS,
  226. VSUBSHS,
  227. VSUBSWS,
  228. VSUBUBS,
  229. VSUBUHS,
  230. VSUBUWS,
  231. VSUBCUW,
  232. VCMPGTSB,
  233. VCMPGTSB_rec,
  234. VCMPGTSD,
  235. VCMPGTSD_rec,
  236. VCMPGTSH,
  237. VCMPGTSH_rec,
  238. VCMPGTSW,
  239. VCMPGTSW_rec,
  240. VCMPGTUB,
  241. VCMPGTUB_rec,
  242. VCMPGTUD,
  243. VCMPGTUD_rec,
  244. VCMPGTUH,
  245. VCMPGTUH_rec,
  246. VCMPGTUW,
  247. VCMPGTUW_rec,
  248. VCMPNEB_rec,
  249. VCMPNEH_rec,
  250. VCMPNEW_rec,
  251. VCMPNEZB_rec,
  252. VCMPNEZH_rec,
  253. VCMPNEZW_rec,
  254. VCMPEQUB_rec,
  255. VCMPEQUD_rec,
  256. VCMPEQUH_rec,
  257. VCMPEQUW_rec,
  258. XVCMPEQDP,
  259. XVCMPEQDP_rec,
  260. XVCMPEQSP,
  261. XVCMPEQSP_rec,
  262. XVCMPGEDP,
  263. XVCMPGEDP_rec,
  264. XVCMPGESP,
  265. XVCMPGESP_rec,
  266. XVCMPGTDP,
  267. XVCMPGTDP_rec,
  268. XVCMPGTSP,
  269. XVCMPGTSP_rec,
  270. XVMAXDP,
  271. XVMAXSP,
  272. XVMINDP,
  273. XVMINSP,
  274. XVTDIVDP,
  275. XVTDIVSP,
  276. XVTSQRTDP,
  277. XVTSQRTSP,
  278. XVTSTDCDP,
  279. XVTSTDCSP,
  280. XVXSIGDP,
  281. XVXSIGSP
  282. )>;
  283. // 7 cycle DP vector operation that uses an entire superslice.
  284. // Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
  285. // EXECO) and all three dispatches (DISP) to the given superslice.
  286. def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  287. (instrs
  288. VADDFP,
  289. VCTSXS,
  290. VCTSXS_0,
  291. VCTUXS,
  292. VCTUXS_0,
  293. VEXPTEFP,
  294. VLOGEFP,
  295. VMADDFP,
  296. VMHADDSHS,
  297. VNMSUBFP,
  298. VREFP,
  299. VRFIM,
  300. VRFIN,
  301. VRFIP,
  302. VRFIZ,
  303. VRSQRTEFP,
  304. VSUBFP,
  305. XVADDDP,
  306. XVADDSP,
  307. XVCVDPSP,
  308. XVCVDPSXDS,
  309. XVCVDPSXWS,
  310. XVCVDPUXDS,
  311. XVCVDPUXWS,
  312. XVCVHPSP,
  313. XVCVSPDP,
  314. XVCVSPHP,
  315. XVCVSPSXDS,
  316. XVCVSPSXWS,
  317. XVCVSPUXDS,
  318. XVCVSPUXWS,
  319. XVCVSXDDP,
  320. XVCVSXDSP,
  321. XVCVSXWDP,
  322. XVCVSXWSP,
  323. XVCVUXDDP,
  324. XVCVUXDSP,
  325. XVCVUXWDP,
  326. XVCVUXWSP,
  327. XVMADDADP,
  328. XVMADDASP,
  329. XVMADDMDP,
  330. XVMADDMSP,
  331. XVMSUBADP,
  332. XVMSUBASP,
  333. XVMSUBMDP,
  334. XVMSUBMSP,
  335. XVMULDP,
  336. XVMULSP,
  337. XVNMADDADP,
  338. XVNMADDASP,
  339. XVNMADDMDP,
  340. XVNMADDMSP,
  341. XVNMSUBADP,
  342. XVNMSUBASP,
  343. XVNMSUBMDP,
  344. XVNMSUBMSP,
  345. XVRDPI,
  346. XVRDPIC,
  347. XVRDPIM,
  348. XVRDPIP,
  349. XVRDPIZ,
  350. XVREDP,
  351. XVRESP,
  352. XVRSPI,
  353. XVRSPIC,
  354. XVRSPIM,
  355. XVRSPIP,
  356. XVRSPIZ,
  357. XVRSQRTEDP,
  358. XVRSQRTESP,
  359. XVSUBDP,
  360. XVSUBSP,
  361. VCFSX,
  362. VCFSX_0,
  363. VCFUX,
  364. VCFUX_0,
  365. VMHRADDSHS,
  366. VMLADDUHM,
  367. VMSUMMBM,
  368. VMSUMSHM,
  369. VMSUMSHS,
  370. VMSUMUBM,
  371. VMSUMUHM,
  372. VMSUMUDM,
  373. VMSUMUHS,
  374. VMULESB,
  375. VMULESH,
  376. VMULESW,
  377. VMULEUB,
  378. VMULEUH,
  379. VMULEUW,
  380. VMULOSB,
  381. VMULOSH,
  382. VMULOSW,
  383. VMULOUB,
  384. VMULOUH,
  385. VMULOUW,
  386. VMULUWM,
  387. VSUM2SWS,
  388. VSUM4SBS,
  389. VSUM4SHS,
  390. VSUM4UBS,
  391. VSUMSWS
  392. )>;
  393. // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
  394. // dispatch units for the superslice.
  395. def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
  396. (instrs
  397. (instregex "MADD(HD|HDU|LD|LD8)$"),
  398. (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
  399. )>;
  400. // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
  401. // dispatch units for the superslice.
  402. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
  403. (instrs
  404. FRSP,
  405. (instregex "FRI(N|P|Z|M)(D|S)$"),
  406. (instregex "FRE(S)?$"),
  407. (instregex "FADD(S)?$"),
  408. (instregex "FMSUB(S)?$"),
  409. (instregex "FMADD(S)?$"),
  410. (instregex "FSUB(S)?$"),
  411. (instregex "FCFID(U)?(S)?$"),
  412. (instregex "FCTID(U)?(Z)?$"),
  413. (instregex "FCTIW(U)?(Z)?$"),
  414. (instregex "FRSQRTE(S)?$"),
  415. FNMADDS,
  416. FNMADD,
  417. FNMSUBS,
  418. FNMSUB,
  419. FSELD,
  420. FSELS,
  421. FMULS,
  422. FMUL,
  423. XSMADDADP,
  424. XSMADDASP,
  425. XSMADDMDP,
  426. XSMADDMSP,
  427. XSMSUBADP,
  428. XSMSUBASP,
  429. XSMSUBMDP,
  430. XSMSUBMSP,
  431. XSMULDP,
  432. XSMULSP,
  433. XSNMADDADP,
  434. XSNMADDASP,
  435. XSNMADDMDP,
  436. XSNMADDMSP,
  437. XSNMSUBADP,
  438. XSNMSUBASP,
  439. XSNMSUBMDP,
  440. XSNMSUBMSP
  441. )>;
  442. // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
  443. // These operations can be done in parallel. The DP is restricted so we need a
  444. // full 4 dispatches.
  445. def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  446. DISP_3SLOTS_1C, DISP_1C],
  447. (instrs
  448. (instregex "FSEL(D|S)_rec$")
  449. )>;
  450. // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
  451. def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
  452. DISP_3SLOTS_1C, DISP_1C],
  453. (instrs
  454. (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
  455. )>;
  456. // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
  457. // These operations must be done sequentially.The DP is restricted so we need a
  458. // full 4 dispatches.
  459. def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
  460. DISP_3SLOTS_1C, DISP_1C],
  461. (instrs
  462. (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
  463. (instregex "FRE(S)?_rec$"),
  464. (instregex "FADD(S)?_rec$"),
  465. (instregex "FSUB(S)?_rec$"),
  466. (instregex "F(N)?MSUB(S)?_rec$"),
  467. (instregex "F(N)?MADD(S)?_rec$"),
  468. (instregex "FCFID(U)?(S)?_rec$"),
  469. (instregex "FCTID(U)?(Z)?_rec$"),
  470. (instregex "FCTIW(U)?(Z)?_rec$"),
  471. (instregex "FMUL(S)?_rec$"),
  472. (instregex "FRSQRTE(S)?_rec$"),
  473. FRSP_rec
  474. )>;
  475. // 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
  476. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
  477. (instrs
  478. XSADDDP,
  479. XSADDSP,
  480. XSCVDPHP,
  481. XSCVDPSP,
  482. XSCVDPSXDS,
  483. XSCVDPSXDSs,
  484. XSCVDPSXWS,
  485. XSCVDPUXDS,
  486. XSCVDPUXDSs,
  487. XSCVDPUXWS,
  488. XSCVDPSXWSs,
  489. XSCVDPUXWSs,
  490. XSCVHPDP,
  491. XSCVSPDP,
  492. XSCVSXDDP,
  493. XSCVSXDSP,
  494. XSCVUXDDP,
  495. XSCVUXDSP,
  496. XSRDPI,
  497. XSRDPIC,
  498. XSRDPIM,
  499. XSRDPIP,
  500. XSRDPIZ,
  501. XSREDP,
  502. XSRESP,
  503. XSRSQRTEDP,
  504. XSRSQRTESP,
  505. XSSUBDP,
  506. XSSUBSP,
  507. XSCVDPSPN,
  508. XSRSP
  509. )>;
  510. // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
  511. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  512. // dispatch.
  513. def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
  514. (instrs
  515. (instregex "LVS(L|R)$"),
  516. (instregex "VSPLTIS(W|H|B)$"),
  517. (instregex "VSPLT(W|H|B)(s)?$"),
  518. (instregex "V_SETALLONES(B|H)?$"),
  519. (instregex "VEXTRACTU(B|H|W)$"),
  520. (instregex "VINSERT(B|H|W|D)$"),
  521. MFVSRLD,
  522. MTVSRWS,
  523. VBPERMQ,
  524. VCLZLSBB,
  525. VCTZLSBB,
  526. VEXTRACTD,
  527. VEXTUBLX,
  528. VEXTUBRX,
  529. VEXTUHLX,
  530. VEXTUHRX,
  531. VEXTUWLX,
  532. VEXTUWRX,
  533. VGBBD,
  534. VMRGHB,
  535. VMRGHH,
  536. VMRGHW,
  537. VMRGLB,
  538. VMRGLH,
  539. VMRGLW,
  540. VPERM,
  541. VPERMR,
  542. VPERMXOR,
  543. VPKPX,
  544. VPKSDSS,
  545. VPKSDUS,
  546. VPKSHSS,
  547. VPKSHUS,
  548. VPKSWSS,
  549. VPKSWUS,
  550. VPKUDUM,
  551. VPKUDUS,
  552. VPKUHUM,
  553. VPKUHUS,
  554. VPKUWUM,
  555. VPKUWUS,
  556. VPRTYBQ,
  557. VSL,
  558. VSLDOI,
  559. VSLO,
  560. VSLV,
  561. VSR,
  562. VSRO,
  563. VSRV,
  564. VUPKHPX,
  565. VUPKHSB,
  566. VUPKHSH,
  567. VUPKHSW,
  568. VUPKLPX,
  569. VUPKLSB,
  570. VUPKLSH,
  571. VUPKLSW,
  572. XXBRD,
  573. XXBRH,
  574. XXBRQ,
  575. XXBRW,
  576. XXEXTRACTUW,
  577. XXINSERTW,
  578. XXMRGHW,
  579. XXMRGLW,
  580. XXPERM,
  581. XXPERMR,
  582. XXSLDWI,
  583. XXSLDWIs,
  584. XXSPLTIB,
  585. XXSPLTW,
  586. XXSPLTWs,
  587. XXPERMDI,
  588. XXPERMDIs,
  589. VADDCUQ,
  590. VADDECUQ,
  591. VADDEUQM,
  592. VADDUQM,
  593. VMUL10CUQ,
  594. VMUL10ECUQ,
  595. VMUL10EUQ,
  596. VMUL10UQ,
  597. VSUBCUQ,
  598. VSUBECUQ,
  599. VSUBEUQM,
  600. VSUBUQM,
  601. XSCMPEXPQP,
  602. XSCMPOQP,
  603. XSCMPUQP,
  604. XSTSTDCQP,
  605. XSXSIGQP,
  606. BCDCFN_rec,
  607. BCDCFZ_rec,
  608. BCDCPSGN_rec,
  609. BCDCTN_rec,
  610. BCDCTZ_rec,
  611. BCDSETSGN_rec,
  612. BCDS_rec,
  613. BCDTRUNC_rec,
  614. BCDUS_rec,
  615. BCDUTRUNC_rec,
  616. BCDADD_rec,
  617. BCDSUB_rec
  618. )>;
  619. // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  620. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  621. // dispatch.
  622. def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  623. (instrs
  624. BCDSR_rec,
  625. XSADDQP,
  626. XSADDQPO,
  627. XSCVDPQP,
  628. XSCVQPDP,
  629. XSCVQPDPO,
  630. XSCVQPSDZ,
  631. XSCVQPSWZ,
  632. XSCVQPUDZ,
  633. XSCVQPUWZ,
  634. XSCVSDQP,
  635. XSCVUDQP,
  636. XSRQPI,
  637. XSRQPIX,
  638. XSRQPXP,
  639. XSSUBQP,
  640. XSSUBQPO
  641. )>;
  642. // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  643. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  644. // dispatch.
  645. def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  646. (instrs
  647. BCDCTSQ_rec
  648. )>;
  649. // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  650. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  651. // dispatch.
  652. def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  653. (instrs
  654. XSMADDQP,
  655. XSMADDQPO,
  656. XSMSUBQP,
  657. XSMSUBQPO,
  658. XSMULQP,
  659. XSMULQPO,
  660. XSNMADDQP,
  661. XSNMADDQPO,
  662. XSNMSUBQP,
  663. XSNMSUBQPO
  664. )>;
  665. // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  666. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  667. // dispatch.
  668. def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  669. (instrs
  670. BCDCFSQ_rec
  671. )>;
  672. // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  673. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  674. // dispatch.
  675. def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  676. (instrs
  677. XSDIVQP,
  678. XSDIVQPO
  679. )>;
  680. // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
  681. // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
  682. // dispatches.
  683. def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
  684. (instrs
  685. XSSQRTQP,
  686. XSSQRTQPO
  687. )>;
  688. // 6 Cycle Load uses a single slice.
  689. def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
  690. (instrs
  691. (instregex "LXVL(L)?")
  692. )>;
  693. // 5 Cycle Load uses a single slice.
  694. def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
  695. (instrs
  696. (instregex "LVE(B|H|W)X$"),
  697. (instregex "LVX(L)?"),
  698. (instregex "LXSI(B|H)ZX$"),
  699. LXSDX,
  700. LXVB16X,
  701. LXVD2X,
  702. LXVWSX,
  703. LXSIWZX,
  704. LXV,
  705. LXVX,
  706. LXSD,
  707. DFLOADf64,
  708. XFLOADf64,
  709. LIWZX
  710. )>;
  711. // 4 Cycle Load uses a single slice.
  712. def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
  713. (instrs
  714. (instregex "DCB(F|T|ST)(EP)?$"),
  715. (instregex "DCBZ(L)?(EP)?$"),
  716. (instregex "DCBTST(EP)?$"),
  717. (instregex "CP_COPY(8)?$"),
  718. (instregex "ICBI(EP)?$"),
  719. (instregex "ICBT(LS)?$"),
  720. (instregex "LBARX(L)?$"),
  721. (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
  722. (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
  723. (instregex "LH(A|B)RX(L)?(8)?$"),
  724. (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
  725. (instregex "LWARX(L)?$"),
  726. (instregex "LWBRX(8)?$"),
  727. (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
  728. CP_ABORT,
  729. DARN,
  730. EnforceIEIO,
  731. ISYNC,
  732. MSGSYNC,
  733. TLBSYNC,
  734. SYNC,
  735. LMW,
  736. LSWI
  737. )>;
  738. // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
  739. // superslice.
  740. def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
  741. (instrs
  742. LFIWZX,
  743. LFDX,
  744. LFD
  745. )>;
  746. // Cracked Load Instructions.
  747. // Load instructions that can be done in parallel.
  748. def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
  749. DISP_PAIR_1C],
  750. (instrs
  751. SLBIA,
  752. SLBIE,
  753. SLBMFEE,
  754. SLBMFEV,
  755. SLBMTE,
  756. TLBIEL
  757. )>;
  758. // Cracked Load Instruction.
  759. // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
  760. // operations can be run in parallel.
  761. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
  762. DISP_PAIR_1C, DISP_PAIR_1C],
  763. (instrs
  764. (instregex "L(W|H)ZU(X)?(8)?$")
  765. )>;
  766. // Cracked TEND Instruction.
  767. // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
  768. // operations can be run in parallel.
  769. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
  770. DISP_1C, DISP_1C],
  771. (instrs
  772. TEND
  773. )>;
  774. // Cracked Store Instruction
  775. // Consecutive Store and ALU instructions. The store is restricted and requires
  776. // three dispatches.
  777. def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
  778. DISP_3SLOTS_1C, DISP_1C],
  779. (instrs
  780. (instregex "ST(B|H|W|D)CX$")
  781. )>;
  782. // Cracked Load instruction.
  783. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
  784. // operations cannot be done at the same time and so their latencies are added.
  785. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
  786. DISP_1C, DISP_1C],
  787. (instrs
  788. (instregex "LHA(X)?(8)?$"),
  789. (instregex "CP_PASTE(8)?_rec$"),
  790. (instregex "LWA(X)?(_32)?$"),
  791. TCHECK
  792. )>;
  793. // Cracked Restricted Load instruction.
  794. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
  795. // operations cannot be done at the same time and so their latencies are added.
  796. // Full 6 dispatches are required as this is both cracked and restricted.
  797. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
  798. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  799. (instrs
  800. LFIWAX
  801. )>;
  802. // Cracked Load instruction.
  803. // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
  804. // operations cannot be done at the same time and so their latencies are added.
  805. // Full 4 dispatches are required as this is a cracked instruction.
  806. def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
  807. (instrs
  808. LXSIWAX,
  809. LIWAX
  810. )>;
  811. // Cracked Load instruction.
  812. // Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
  813. // cycles. The Load and ALU operations cannot be done at the same time and so
  814. // their latencies are added.
  815. // Full 6 dispatches are required as this is a restricted instruction.
  816. def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
  817. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  818. (instrs
  819. LFSX,
  820. LFS
  821. )>;
  822. // Cracked Load instruction.
  823. // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
  824. // operations cannot be done at the same time and so their latencies are added.
  825. // Full 4 dispatches are required as this is a cracked instruction.
  826. def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
  827. (instrs
  828. LXSSP,
  829. LXSSPX,
  830. XFLOADf32,
  831. DFLOADf32
  832. )>;
  833. // Cracked 3-Way Load Instruction
  834. // Load with two ALU operations that depend on each other
  835. def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
  836. DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
  837. (instrs
  838. (instregex "LHAU(X)?(8)?$"),
  839. LWAUX
  840. )>;
  841. // Cracked Load that requires the PM resource.
  842. // Since the Load and the PM cannot be done at the same time the latencies are
  843. // added. Requires 8 cycles. Since the PM requires the full superslice we need
  844. // both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
  845. // requires the remaining 1 dispatch.
  846. def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
  847. DISP_1C, DISP_1C],
  848. (instrs
  849. LXVH8X,
  850. LXVDSX,
  851. LXVW4X
  852. )>;
  853. // Single slice Restricted store operation. The restricted operation requires
  854. // all three dispatches for the superslice.
  855. def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
  856. (instrs
  857. (instregex "STF(S|D|IWX|SX|DX)$"),
  858. (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
  859. (instregex "STW(8)?$"),
  860. (instregex "(D|X)FSTORE(f32|f64)$"),
  861. (instregex "ST(W|H|D)BRX$"),
  862. (instregex "ST(B|H|D)(8)?$"),
  863. (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
  864. STIWX,
  865. SLBIEG,
  866. STMW,
  867. STSWI,
  868. TLBIE
  869. )>;
  870. // Vector Store Instruction
  871. // Requires the whole superslice and therefore requires one dispatch
  872. // as well as both the Even and Odd exec pipelines.
  873. def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
  874. (instrs
  875. (instregex "STVE(B|H|W)X$"),
  876. (instregex "STVX(L)?$"),
  877. (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
  878. )>;
  879. // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  880. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  881. // dispatches.
  882. def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
  883. (instrs
  884. (instregex "MTCTR(8)?(loop)?$"),
  885. (instregex "MTLR(8)?$")
  886. )>;
  887. // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  888. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  889. // dispatches.
  890. def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
  891. (instrs
  892. (instregex "M(T|F)VRSAVE(v)?$"),
  893. (instregex "M(T|F)PMR$"),
  894. (instregex "M(T|F)TB(8)?$"),
  895. (instregex "MF(SPR|CTR|LR)(8)?$"),
  896. (instregex "M(T|F)MSR(D)?$"),
  897. (instregex "M(T|F)(U)?DSCR$"),
  898. (instregex "MTSPR(8)?$")
  899. )>;
  900. // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  901. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  902. // dispatches.
  903. def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
  904. (instrs
  905. DIVW,
  906. DIVWO,
  907. DIVWU,
  908. DIVWUO,
  909. MODSW
  910. )>;
  911. // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  912. // superslice. That includes both exec pipelines (EXECO, EXECE) and two
  913. // dispatches.
  914. def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
  915. (instrs
  916. DIVWE,
  917. DIVWEO,
  918. DIVD,
  919. DIVDO,
  920. DIVWEU,
  921. DIVWEUO,
  922. DIVDU,
  923. DIVDUO,
  924. MODSD,
  925. MODUD,
  926. MODUW
  927. )>;
  928. // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
  929. // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
  930. // dispatches.
  931. def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
  932. (instrs
  933. DIVDE,
  934. DIVDEO,
  935. DIVDEU,
  936. DIVDEUO
  937. )>;
  938. // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
  939. // and one full superslice for the DIV operation since there is only one DIV per
  940. // superslice. Latency of DIV plus ALU is 26.
  941. def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
  942. DISP_EVEN_1C, DISP_1C],
  943. (instrs
  944. (instregex "DIVW(U)?(O)?_rec$")
  945. )>;
  946. // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
  947. // and one full superslice for the DIV operation since there is only one DIV per
  948. // superslice. Latency of DIV plus ALU is 26.
  949. def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
  950. DISP_EVEN_1C, DISP_1C],
  951. (instrs
  952. DIVD_rec,
  953. DIVDO_rec,
  954. DIVDU_rec,
  955. DIVDUO_rec,
  956. DIVWE_rec,
  957. DIVWEO_rec,
  958. DIVWEU_rec,
  959. DIVWEUO_rec
  960. )>;
  961. // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
  962. // and one full superslice for the DIV operation since there is only one DIV per
  963. // superslice. Latency of DIV plus ALU is 42.
  964. def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
  965. DISP_EVEN_1C, DISP_1C],
  966. (instrs
  967. DIVDE_rec,
  968. DIVDEO_rec,
  969. DIVDEU_rec,
  970. DIVDEUO_rec
  971. )>;
  972. // CR access instructions in _BrMCR, IIC_BrMCRX.
  973. // Cracked, restricted, ALU operations.
  974. // Here the two ALU ops can actually be done in parallel and therefore the
  975. // latencies are not added together. Otherwise this is like having two
  976. // instructions running together on two pipelines and 6 dispatches. ALU ops are
  977. // 2 cycles each.
  978. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
  979. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  980. (instrs
  981. MTCRF,
  982. MTCRF8
  983. )>;
  984. // Cracked ALU operations.
  985. // Here the two ALU ops can actually be done in parallel and therefore the
  986. // latencies are not added together. Otherwise this is like having two
  987. // instructions running together on two pipelines and 2 dispatches. ALU ops are
  988. // 2 cycles each.
  989. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
  990. DISP_1C, DISP_1C],
  991. (instrs
  992. (instregex "ADDC(8)?(O)?_rec$"),
  993. (instregex "SUBFC(8)?(O)?_rec$")
  994. )>;
  995. // Cracked ALU operations.
  996. // Two ALU ops can be done in parallel.
  997. // One is three cycle ALU the ohter is a two cycle ALU.
  998. // One of the ALU ops is restricted the other is not so we have a total of
  999. // 5 dispatches.
  1000. def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  1001. DISP_3SLOTS_1C, DISP_1C],
  1002. (instrs
  1003. (instregex "F(N)?ABS(D|S)_rec$"),
  1004. (instregex "FCPSGN(D|S)_rec$"),
  1005. (instregex "FNEG(D|S)_rec$"),
  1006. FMR_rec
  1007. )>;
  1008. // Cracked ALU operations.
  1009. // Here the two ALU ops can actually be done in parallel and therefore the
  1010. // latencies are not added together. Otherwise this is like having two
  1011. // instructions running together on two pipelines and 2 dispatches.
  1012. // ALU ops are 3 cycles each.
  1013. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  1014. DISP_1C, DISP_1C],
  1015. (instrs
  1016. MCRFS
  1017. )>;
  1018. // Cracked Restricted ALU operations.
  1019. // Here the two ALU ops can actually be done in parallel and therefore the
  1020. // latencies are not added together. Otherwise this is like having two
  1021. // instructions running together on two pipelines and 6 dispatches.
  1022. // ALU ops are 3 cycles each.
  1023. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
  1024. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  1025. (instrs
  1026. (instregex "MTFSF(b|_rec)?$"),
  1027. (instregex "MTFSFI(_rec)?$"),
  1028. MTFSFIb
  1029. )>;
  1030. // Cracked instruction made of two ALU ops.
  1031. // The two ops cannot be done in parallel.
  1032. // One of the ALU ops is restricted and takes 3 dispatches.
  1033. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
  1034. DISP_3SLOTS_1C, DISP_1C],
  1035. (instrs
  1036. (instregex "RLD(I)?C(R|L)_rec$"),
  1037. (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
  1038. (instregex "SLW(8)?_rec$"),
  1039. (instregex "SRAW(I)?_rec$"),
  1040. (instregex "SRW(8)?_rec$"),
  1041. RLDICL_32_rec,
  1042. RLDIMI_rec
  1043. )>;
  1044. // Cracked instruction made of two ALU ops.
  1045. // The two ops cannot be done in parallel.
  1046. // Both of the ALU ops are restricted and take 3 dispatches.
  1047. def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
  1048. DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  1049. (instrs
  1050. (instregex "MFFS(L|CE|_rec)?$")
  1051. )>;
  1052. // Cracked ALU instruction composed of three consecutive 2 cycle loads for a
  1053. // total of 6 cycles. All of the ALU operations are also restricted so each
  1054. // takes 3 dispatches for a total of 9.
  1055. def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
  1056. DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
  1057. (instrs
  1058. (instregex "MFCR(8)?$")
  1059. )>;
  1060. // Cracked instruction made of two ALU ops.
  1061. // The two ops cannot be done in parallel.
  1062. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
  1063. (instrs
  1064. (instregex "EXTSWSLI_32_64_rec$"),
  1065. (instregex "SRAD(I)?_rec$"),
  1066. EXTSWSLI_rec,
  1067. SLD_rec,
  1068. SRD_rec,
  1069. RLDIC_rec
  1070. )>;
  1071. // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1072. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
  1073. (instrs
  1074. FDIV
  1075. )>;
  1076. // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
  1077. def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
  1078. DISP_3SLOTS_1C, DISP_1C],
  1079. (instrs
  1080. FDIV_rec
  1081. )>;
  1082. // 36 Cycle DP Instruction.
  1083. // Instruction can be done on a single slice.
  1084. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
  1085. (instrs
  1086. XSSQRTDP
  1087. )>;
  1088. // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1089. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
  1090. (instrs
  1091. FSQRT
  1092. )>;
  1093. // 36 Cycle DP Vector Instruction.
  1094. def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
  1095. DISP_1C],
  1096. (instrs
  1097. XVSQRTDP
  1098. )>;
  1099. // 27 Cycle DP Vector Instruction.
  1100. def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
  1101. DISP_1C],
  1102. (instrs
  1103. XVSQRTSP
  1104. )>;
  1105. // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
  1106. def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
  1107. DISP_3SLOTS_1C, DISP_1C],
  1108. (instrs
  1109. FSQRT_rec
  1110. )>;
  1111. // 26 Cycle DP Instruction.
  1112. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
  1113. (instrs
  1114. XSSQRTSP
  1115. )>;
  1116. // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1117. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
  1118. (instrs
  1119. FSQRTS
  1120. )>;
  1121. // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
  1122. def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
  1123. DISP_3SLOTS_1C, DISP_1C],
  1124. (instrs
  1125. FSQRTS_rec
  1126. )>;
  1127. // 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
  1128. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
  1129. (instrs
  1130. XSDIVDP
  1131. )>;
  1132. // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
  1133. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
  1134. (instrs
  1135. FDIVS
  1136. )>;
  1137. // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
  1138. def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
  1139. DISP_3SLOTS_1C, DISP_1C],
  1140. (instrs
  1141. FDIVS_rec
  1142. )>;
  1143. // 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
  1144. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
  1145. (instrs
  1146. XSDIVSP
  1147. )>;
  1148. // 24 Cycle DP Vector Instruction. Takes one full superslice.
  1149. // Includes both EXECE, EXECO pipelines and 1 dispatch for the given
  1150. // superslice.
  1151. def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
  1152. DISP_1C],
  1153. (instrs
  1154. XVDIVSP
  1155. )>;
  1156. // 33 Cycle DP Vector Instruction. Takes one full superslice.
  1157. // Includes both EXECE, EXECO pipelines and 1 dispatch for the given
  1158. // superslice.
  1159. def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
  1160. DISP_1C],
  1161. (instrs
  1162. XVDIVDP
  1163. )>;
  1164. // Instruction cracked into three pieces. One Load and two ALU operations.
  1165. // The Load and one of the ALU ops cannot be run at the same time and so the
  1166. // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
  1167. // Both the load and the ALU that depends on it are restricted and so they take
  1168. // a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
  1169. // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
  1170. def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
  1171. IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
  1172. DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
  1173. (instrs
  1174. (instregex "LF(SU|SUX)$")
  1175. )>;
  1176. // Cracked instruction made up of a Store and an ALU. The ALU does not depend on
  1177. // the store and so it can be run at the same time as the store. The store is
  1178. // also restricted.
  1179. def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
  1180. DISP_3SLOTS_1C, DISP_1C],
  1181. (instrs
  1182. (instregex "STF(S|D)U(X)?$"),
  1183. (instregex "ST(B|H|W|D)U(X)?(8)?$")
  1184. )>;
  1185. // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
  1186. // the load and so it can be run at the same time as the load.
  1187. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
  1188. DISP_PAIR_1C, DISP_PAIR_1C],
  1189. (instrs
  1190. (instregex "LBZU(X)?(8)?$"),
  1191. (instregex "LDU(X)?$")
  1192. )>;
  1193. // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
  1194. // the load and so it can be run at the same time as the load. The load is also
  1195. // restricted. 3 dispatches are from the restricted load while the other two
  1196. // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
  1197. // is required for the ALU.
  1198. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
  1199. DISP_3SLOTS_1C, DISP_1C],
  1200. (instrs
  1201. (instregex "LF(DU|DUX)$")
  1202. )>;
  1203. // Crypto Instructions
  1204. // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
  1205. // superslice. That includes both exec pipelines (EXECO, EXECE) and one
  1206. // dispatch.
  1207. def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
  1208. (instrs
  1209. (instregex "VPMSUM(B|H|W|D)$"),
  1210. (instregex "V(N)?CIPHER(LAST)?$"),
  1211. VSBOX
  1212. )>;
  1213. // Branch Instructions
  1214. // Two Cycle Branch
  1215. def : InstRW<[P9_BR_2C, DISP_BR_1C],
  1216. (instrs
  1217. (instregex "BCCCTR(L)?(8)?$"),
  1218. (instregex "BCCL(A|R|RL)?$"),
  1219. (instregex "BCCTR(L)?(8)?(n)?$"),
  1220. (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
  1221. (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
  1222. (instregex "BL(_TLS|_NOP)?(_RM)?$"),
  1223. (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
  1224. (instregex "BLA(8|8_NOP)?(_RM)?$"),
  1225. (instregex "BLR(8|L)?$"),
  1226. (instregex "TAILB(A)?(8)?$"),
  1227. (instregex "TAILBCTR(8)?$"),
  1228. (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
  1229. (instregex "BCLR(L)?(n)?$"),
  1230. (instregex "BCTR(L)?(8)?(_RM)?$"),
  1231. B,
  1232. BA,
  1233. BC,
  1234. BCC,
  1235. BCCA,
  1236. BCL,
  1237. BCLalways,
  1238. BCLn,
  1239. BCTRL8_LDinto_toc,
  1240. BCTRL_LWZinto_toc,
  1241. BCTRL8_LDinto_toc_RM,
  1242. BCTRL_LWZinto_toc_RM,
  1243. BCn,
  1244. CTRL_DEP
  1245. )>;
  1246. // Five Cycle Branch with a 2 Cycle ALU Op
  1247. // Operations must be done consecutively and not in parallel.
  1248. def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
  1249. (instrs
  1250. ADDPCIS
  1251. )>;
  1252. // Special Extracted Instructions For Atomics
  1253. // Atomic Load
  1254. def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
  1255. IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
  1256. IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
  1257. DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
  1258. (instrs
  1259. (instregex "L(D|W)AT$")
  1260. )>;
  1261. // Atomic Store
  1262. def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
  1263. IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
  1264. (instrs
  1265. (instregex "ST(D|W)AT$")
  1266. )>;
  1267. // Signal Processing Engine (SPE) Instructions
  1268. // These instructions are not supported on Power 9
  1269. def : InstRW<[],
  1270. (instrs
  1271. BRINC,
  1272. EVABS,
  1273. EVEQV,
  1274. EVMRA,
  1275. EVNAND,
  1276. EVNEG,
  1277. (instregex "EVADD(I)?W$"),
  1278. (instregex "EVADD(SM|SS|UM|US)IAAW$"),
  1279. (instregex "EVAND(C)?$"),
  1280. (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
  1281. (instregex "EVCNTL(S|Z)W$"),
  1282. (instregex "EVDIVW(S|U)$"),
  1283. (instregex "EVEXTS(B|H)$"),
  1284. (instregex "EVLD(H|W|D)(X)?$"),
  1285. (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
  1286. (instregex "EVLWHE(X)?$"),
  1287. (instregex "EVLWHO(S|U)(X)?$"),
  1288. (instregex "EVLW(H|W)SPLAT(X)?$"),
  1289. (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
  1290. (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
  1291. (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
  1292. (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
  1293. (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
  1294. (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
  1295. (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
  1296. (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
  1297. (instregex "EVMWHUMI(A)?$"),
  1298. (instregex "EVMWLS(M|S)IA(A|N)W$"),
  1299. (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
  1300. (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
  1301. (instregex "EVMWSSF(A|AA|AN)?$"),
  1302. (instregex "EVMWUMI(A|AA|AN)?$"),
  1303. (instregex "EV(N|X)?OR(C)?$"),
  1304. (instregex "EVR(LW|LWI|NDW)$"),
  1305. (instregex "EVSLW(I)?$"),
  1306. (instregex "EVSPLAT(F)?I$"),
  1307. (instregex "EVSRW(I)?(S|U)$"),
  1308. (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
  1309. (instregex "EVSUBF(S|U)(M|S)IAAW$"),
  1310. (instregex "EVSUB(I)?FW$")
  1311. )> { let Unsupported = 1; }
  1312. // General Instructions without scheduling support.
  1313. def : InstRW<[],
  1314. (instrs
  1315. (instregex "(H)?RFI(D)?$"),
  1316. (instregex "DSS(ALL)?$"),
  1317. (instregex "DST(ST)?(T)?(64)?$"),
  1318. (instregex "ICBL(C|Q)$"),
  1319. (instregex "L(W|H|B)EPX$"),
  1320. (instregex "ST(W|H|B)EPX$"),
  1321. (instregex "(L|ST)FDEPX$"),
  1322. (instregex "M(T|F)SR(IN)?$"),
  1323. (instregex "M(T|F)DCR$"),
  1324. (instregex "NOP_GT_PWR(6|7)$"),
  1325. (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
  1326. (instregex "WRTEE(I)?$"),
  1327. (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
  1328. ATTN,
  1329. CLRBHRB,
  1330. MFBHRBE,
  1331. MBAR,
  1332. MSYNC,
  1333. SLBSYNC,
  1334. SLBFEE_rec,
  1335. NAP,
  1336. STOP,
  1337. TRAP,
  1338. RFCI,
  1339. RFDI,
  1340. RFMCI,
  1341. SC,
  1342. DCBA,
  1343. DCBI,
  1344. DCCCI,
  1345. ICCCI,
  1346. ADDEX,
  1347. ADDEX8
  1348. )> { let Unsupported = 1; }