1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "POPCNTB8$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
145    (instregex "NAND(8)?(_rec)?$"),
146    (instregex "AND(C)?(8)?(_rec)?$"),
147    (instregex "NOR(8)?(_rec)?$"),
148    (instregex "OR(C)?(8)?(_rec)?$"),
149    (instregex "EQV(8)?(_rec)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?(O)?$"),
153    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
154    (instregex "LA(8)?$"),
155    COPY,
156    MCRF,
157    MCRXRX,
158    XSNABSDP,
159    XSNABSDPs,
160    XSXEXPDP,
161    XSABSDP,
162    XSNEGDP,
163    XSCPSGNDP,
164    MFVSRWZ,
165    MFVRWZ,
166    EXTSWSLI,
167    SRADI_32,
168    RLDIC,
169    RFEBB,
170    TBEGIN,
171    TRECHKPT,
172    NOP,
173    WAIT
174)>;
175
176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
177// single slice. However, since it is Restricted, it requires all 3 dispatches
178// (DISP) for that superslice.
179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
180      (instrs
181    (instregex "RLDC(L|R)$"),
182    (instregex "RLWIMI(8)?$"),
183    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
184    (instregex "M(F|T)OCRF(8)?$"),
185    (instregex "CR(6)?(UN)?SET$"),
186    (instregex "CR(N)?(OR|AND)(C)?$"),
187    (instregex "S(L|R)W(8)?$"),
188    (instregex "RLW(INM|NM)(8)?$"),
189    (instregex "F(N)?ABS(D|S)$"),
190    (instregex "FNEG(D|S)$"),
191    (instregex "FCPSGN(D|S)$"),
192    (instregex "SRAW(I)?$"),
193    (instregex "ISEL(8)?$"),
194    RLDIMI,
195    XSIEXPDP,
196    FMR,
197    CREQV,
198    CRXOR,
199    TRECLAIM,
200    TSR,
201    TABORT
202)>;
203
204// Three cycle ALU vector operation that uses an entire superslice.
205// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
206// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
207def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
208      (instrs
209    (instregex "M(T|F)VSCR$"),
210    (instregex "VCMPNEZ(B|H|W)$"),
211    (instregex "VCMPEQU(B|H|W|D)$"),
212    (instregex "VCMPNE(B|H|W)$"),
213    (instregex "VABSDU(B|H|W)$"),
214    (instregex "VADDU(B|H|W)S$"),
215    (instregex "VAVG(S|U)(B|H|W)$"),
216    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
217    (instregex "VCMPBFP(_rec)?$"),
218    (instregex "VC(L|T)Z(B|H|W|D)$"),
219    (instregex "VADDS(B|H|W)S$"),
220    (instregex "V(MIN|MAX)FP$"),
221    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
222    VBPERMD,
223    VADDCUW,
224    VPOPCNTW,
225    VPOPCNTD,
226    VPRTYBD,
227    VPRTYBW,
228    VSHASIGMAD,
229    VSHASIGMAW,
230    VSUBSBS,
231    VSUBSHS,
232    VSUBSWS,
233    VSUBUBS,
234    VSUBUHS,
235    VSUBUWS,
236    VSUBCUW,
237    VCMPGTSB,
238    VCMPGTSB_rec,
239    VCMPGTSD,
240    VCMPGTSD_rec,
241    VCMPGTSH,
242    VCMPGTSH_rec,
243    VCMPGTSW,
244    VCMPGTSW_rec,
245    VCMPGTUB,
246    VCMPGTUB_rec,
247    VCMPGTUD,
248    VCMPGTUD_rec,
249    VCMPGTUH,
250    VCMPGTUH_rec,
251    VCMPGTUW,
252    VCMPGTUW_rec,
253    VCMPNEB_rec,
254    VCMPNEH_rec,
255    VCMPNEW_rec,
256    VCMPNEZB_rec,
257    VCMPNEZH_rec,
258    VCMPNEZW_rec,
259    VCMPEQUB_rec,
260    VCMPEQUD_rec,
261    VCMPEQUH_rec,
262    VCMPEQUW_rec,
263    XVCMPEQDP,
264    XVCMPEQDP_rec,
265    XVCMPEQSP,
266    XVCMPEQSP_rec,
267    XVCMPGEDP,
268    XVCMPGEDP_rec,
269    XVCMPGESP,
270    XVCMPGESP_rec,
271    XVCMPGTDP,
272    XVCMPGTDP_rec,
273    XVCMPGTSP,
274    XVCMPGTSP_rec,
275    XVMAXDP,
276    XVMAXSP,
277    XVMINDP,
278    XVMINSP,
279    XVTDIVDP,
280    XVTDIVSP,
281    XVTSQRTDP,
282    XVTSQRTSP,
283    XVTSTDCDP,
284    XVTSTDCSP,
285    XVXSIGDP,
286    XVXSIGSP
287)>;
288
289// 7 cycle DP vector operation that uses an entire superslice.
290// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
291// EXECO) and all three dispatches (DISP) to the given superslice.
292def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
293      (instrs
294    VADDFP,
295    VCTSXS,
296    VCTSXS_0,
297    VCTUXS,
298    VCTUXS_0,
299    VEXPTEFP,
300    VLOGEFP,
301    VMADDFP,
302    VMHADDSHS,
303    VNMSUBFP,
304    VREFP,
305    VRFIM,
306    VRFIN,
307    VRFIP,
308    VRFIZ,
309    VRSQRTEFP,
310    VSUBFP,
311    XVADDDP,
312    XVADDSP,
313    XVCVDPSP,
314    XVCVDPSXDS,
315    XVCVDPSXWS,
316    XVCVDPUXDS,
317    XVCVDPUXWS,
318    XVCVHPSP,
319    XVCVSPDP,
320    XVCVSPHP,
321    XVCVSPSXDS,
322    XVCVSPSXWS,
323    XVCVSPUXDS,
324    XVCVSPUXWS,
325    XVCVSXDDP,
326    XVCVSXDSP,
327    XVCVSXWDP,
328    XVCVSXWSP,
329    XVCVUXDDP,
330    XVCVUXDSP,
331    XVCVUXWDP,
332    XVCVUXWSP,
333    XVMADDADP,
334    XVMADDASP,
335    XVMADDMDP,
336    XVMADDMSP,
337    XVMSUBADP,
338    XVMSUBASP,
339    XVMSUBMDP,
340    XVMSUBMSP,
341    XVMULDP,
342    XVMULSP,
343    XVNMADDADP,
344    XVNMADDASP,
345    XVNMADDMDP,
346    XVNMADDMSP,
347    XVNMSUBADP,
348    XVNMSUBASP,
349    XVNMSUBMDP,
350    XVNMSUBMSP,
351    XVRDPI,
352    XVRDPIC,
353    XVRDPIM,
354    XVRDPIP,
355    XVRDPIZ,
356    XVREDP,
357    XVRESP,
358    XVRSPI,
359    XVRSPIC,
360    XVRSPIM,
361    XVRSPIP,
362    XVRSPIZ,
363    XVRSQRTEDP,
364    XVRSQRTESP,
365    XVSUBDP,
366    XVSUBSP,
367    VCFSX,
368    VCFSX_0,
369    VCFUX,
370    VCFUX_0,
371    VMHRADDSHS,
372    VMLADDUHM,
373    VMSUMMBM,
374    VMSUMSHM,
375    VMSUMSHS,
376    VMSUMUBM,
377    VMSUMUHM,
378    VMSUMUDM,
379    VMSUMUHS,
380    VMULESB,
381    VMULESH,
382    VMULESW,
383    VMULEUB,
384    VMULEUH,
385    VMULEUW,
386    VMULOSB,
387    VMULOSH,
388    VMULOSW,
389    VMULOUB,
390    VMULOUH,
391    VMULOUW,
392    VMULUWM,
393    VSUM2SWS,
394    VSUM4SBS,
395    VSUM4SHS,
396    VSUM4UBS,
397    VSUMSWS
398)>;
399
400// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
401// dispatch units for the superslice.
402def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
403      (instrs
404    (instregex "MADD(HD|HDU|LD|LD8)$"),
405    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
406)>;
407
408// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
409// dispatch units for the superslice.
410def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
411      (instrs
412    FRSP,
413    (instregex "FRI(N|P|Z|M)(D|S)$"),
414    (instregex "FRE(S)?$"),
415    (instregex "FADD(S)?$"),
416    (instregex "FMSUB(S)?$"),
417    (instregex "FMADD(S)?$"),
418    (instregex "FSUB(S)?$"),
419    (instregex "FCFID(U)?(S)?$"),
420    (instregex "FCTID(U)?(Z)?$"),
421    (instregex "FCTIW(U)?(Z)?$"),
422    (instregex "FRSQRTE(S)?$"),
423    FNMADDS,
424    FNMADD,
425    FNMSUBS,
426    FNMSUB,
427    FSELD,
428    FSELS,
429    FMULS,
430    FMUL,
431    XSMADDADP,
432    XSMADDASP,
433    XSMADDMDP,
434    XSMADDMSP,
435    XSMSUBADP,
436    XSMSUBASP,
437    XSMSUBMDP,
438    XSMSUBMSP,
439    XSMULDP,
440    XSMULSP,
441    XSNMADDADP,
442    XSNMADDASP,
443    XSNMADDMDP,
444    XSNMADDMSP,
445    XSNMSUBADP,
446    XSNMSUBASP,
447    XSNMSUBMDP,
448    XSNMSUBMSP
449)>;
450
451// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
452// These operations can be done in parallel. The DP is restricted so we need a
453// full 4 dispatches.
454def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
455              DISP_3SLOTS_1C, DISP_1C],
456      (instrs
457    (instregex "FSEL(D|S)_rec$")
458)>;
459
460// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
461def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
462              DISP_3SLOTS_1C, DISP_1C],
463      (instrs
464    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
465)>;
466
467// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
468// These operations must be done sequentially.The DP is restricted so we need a
469// full 4 dispatches.
470def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
471              DISP_3SLOTS_1C, DISP_1C],
472      (instrs
473    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
474    (instregex "FRE(S)?_rec$"),
475    (instregex "FADD(S)?_rec$"),
476    (instregex "FSUB(S)?_rec$"),
477    (instregex "F(N)?MSUB(S)?_rec$"),
478    (instregex "F(N)?MADD(S)?_rec$"),
479    (instregex "FCFID(U)?(S)?_rec$"),
480    (instregex "FCTID(U)?(Z)?_rec$"),
481    (instregex "FCTIW(U)?(Z)?_rec$"),
482    (instregex "FMUL(S)?_rec$"),
483    (instregex "FRSQRTE(S)?_rec$"),
484    FRSP_rec
485)>;
486
487// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
488def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
489      (instrs
490    XSADDDP,
491    XSADDSP,
492    XSCVDPHP,
493    XSCVDPSP,
494    XSCVDPSXDS,
495    XSCVDPSXDSs,
496    XSCVDPSXWS,
497    XSCVDPUXDS,
498    XSCVDPUXDSs,
499    XSCVDPUXWS,
500    XSCVDPSXWSs,
501    XSCVDPUXWSs,
502    XSCVHPDP,
503    XSCVSPDP,
504    XSCVSXDDP,
505    XSCVSXDSP,
506    XSCVUXDDP,
507    XSCVUXDSP,
508    XSRDPI,
509    XSRDPIC,
510    XSRDPIM,
511    XSRDPIP,
512    XSRDPIZ,
513    XSREDP,
514    XSRESP,
515    XSRSQRTEDP,
516    XSRSQRTESP,
517    XSSUBDP,
518    XSSUBSP,
519    XSCVDPSPN,
520    XSRSP
521)>;
522
523// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
524// superslice. That includes both exec pipelines (EXECO, EXECE) and one
525// dispatch.
526def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
527      (instrs
528    (instregex "LVS(L|R)$"),
529    (instregex "VSPLTIS(W|H|B)$"),
530    (instregex "VSPLT(W|H|B)(s)?$"),
531    (instregex "V_SETALLONES(B|H)?$"),
532    (instregex "VEXTRACTU(B|H|W)$"),
533    (instregex "VINSERT(B|H|W|D)$"),
534    MFVSRLD,
535    MTVSRWS,
536    VBPERMQ,
537    VCLZLSBB,
538    VCTZLSBB,
539    VEXTRACTD,
540    VEXTUBLX,
541    VEXTUBRX,
542    VEXTUHLX,
543    VEXTUHRX,
544    VEXTUWLX,
545    VEXTUWRX,
546    VGBBD,
547    VMRGHB,
548    VMRGHH,
549    VMRGHW,
550    VMRGLB,
551    VMRGLH,
552    VMRGLW,
553    VPERM,
554    VPERMR,
555    VPERMXOR,
556    VPKPX,
557    VPKSDSS,
558    VPKSDUS,
559    VPKSHSS,
560    VPKSHUS,
561    VPKSWSS,
562    VPKSWUS,
563    VPKUDUM,
564    VPKUDUS,
565    VPKUHUM,
566    VPKUHUS,
567    VPKUWUM,
568    VPKUWUS,
569    VPRTYBQ,
570    VSL,
571    VSLDOI,
572    VSLO,
573    VSLV,
574    VSR,
575    VSRO,
576    VSRV,
577    VUPKHPX,
578    VUPKHSB,
579    VUPKHSH,
580    VUPKHSW,
581    VUPKLPX,
582    VUPKLSB,
583    VUPKLSH,
584    VUPKLSW,
585    XXBRD,
586    XXBRH,
587    XXBRQ,
588    XXBRW,
589    XXEXTRACTUW,
590    XXINSERTW,
591    XXMRGHW,
592    XXMRGLW,
593    XXPERM,
594    XXPERMR,
595    XXSLDWI,
596    XXSLDWIs,
597    XXSPLTIB,
598    XXSPLTW,
599    XXSPLTWs,
600    XXPERMDI,
601    XXPERMDIs,
602    VADDCUQ,
603    VADDECUQ,
604    VADDEUQM,
605    VADDUQM,
606    VMUL10CUQ,
607    VMUL10ECUQ,
608    VMUL10EUQ,
609    VMUL10UQ,
610    VSUBCUQ,
611    VSUBECUQ,
612    VSUBEUQM,
613    VSUBUQM,
614    XSCMPEXPQP,
615    XSCMPOQP,
616    XSCMPUQP,
617    XSTSTDCQP,
618    XSXSIGQP,
619    BCDCFN_rec,
620    BCDCFZ_rec,
621    BCDCPSGN_rec,
622    BCDCTN_rec,
623    BCDCTZ_rec,
624    BCDSETSGN_rec,
625    BCDS_rec,
626    BCDTRUNC_rec,
627    BCDUS_rec,
628    BCDUTRUNC_rec,
629    BCDADD_rec,
630    BCDSUB_rec
631)>;
632
633// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
634// superslice. That includes both exec pipelines (EXECO, EXECE) and one
635// dispatch.
636def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
637      (instrs
638    BCDSR_rec,
639    XSADDQP,
640    XSADDQPO,
641    XSCVDPQP,
642    XSCVQPDP,
643    XSCVQPDPO,
644    XSCVQPSDZ,
645    XSCVQPSWZ,
646    XSCVQPUDZ,
647    XSCVQPUWZ,
648    XSCVSDQP,
649    XSCVUDQP,
650    XSRQPI,
651    XSRQPIX,
652    XSRQPXP,
653    XSSUBQP,
654    XSSUBQPO
655)>;
656
657// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
658// superslice. That includes both exec pipelines (EXECO, EXECE) and one
659// dispatch.
660def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
661      (instrs
662    BCDCTSQ_rec
663)>;
664
665// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
666// superslice. That includes both exec pipelines (EXECO, EXECE) and one
667// dispatch.
668def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
669      (instrs
670    XSMADDQP,
671    XSMADDQPO,
672    XSMSUBQP,
673    XSMSUBQPO,
674    XSMULQP,
675    XSMULQPO,
676    XSNMADDQP,
677    XSNMADDQPO,
678    XSNMSUBQP,
679    XSNMSUBQPO
680)>;
681
682// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
683// superslice. That includes both exec pipelines (EXECO, EXECE) and one
684// dispatch.
685def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
686      (instrs
687    BCDCFSQ_rec
688)>;
689
690// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
691// superslice. That includes both exec pipelines (EXECO, EXECE) and one
692// dispatch.
693def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
694      (instrs
695    XSDIVQP,
696    XSDIVQPO
697)>;
698
699// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
700// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
701// dispatches.
702def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
703      (instrs
704    XSSQRTQP,
705    XSSQRTQPO
706)>;
707
708// 6 Cycle Load uses a single slice.
709def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
710      (instrs
711    (instregex "LXVL(L)?")
712)>;
713
714// 5 Cycle Load uses a single slice.
715def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
716      (instrs
717    (instregex "LVE(B|H|W)X$"),
718    (instregex "LVX(L)?"),
719    (instregex "LXSI(B|H)ZX$"),
720    LXSDX,
721    LXVB16X,
722    LXVD2X,
723    LXVWSX,
724    LXSIWZX,
725    LXV,
726    LXVX,
727    LXSD,
728    DFLOADf64,
729    XFLOADf64,
730    LIWZX
731)>;
732
733// 4 Cycle Load uses a single slice.
734def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
735      (instrs
736    (instregex "DCB(F|T|ST)(EP)?$"),
737    (instregex "DCBZ(L)?(EP)?$"),
738    (instregex "DCBTST(EP)?$"),
739    (instregex "CP_COPY(8)?$"),
740    (instregex "ICBI(EP)?$"),
741    (instregex "ICBT(LS)?$"),
742    (instregex "LBARX(L)?$"),
743    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
744    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
745    (instregex "LH(A|B)RX(L)?(8)?$"),
746    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
747    (instregex "LWARX(L)?$"),
748    (instregex "LWBRX(8)?$"),
749    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
750    CP_ABORT,
751    DARN,
752    EnforceIEIO,
753    ISYNC,
754    MSGSYNC,
755    TLBSYNC,
756    SYNC,
757    LMW,
758    LSWI
759)>;
760
761// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
762// superslice.
763def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
764      (instrs
765    LFIWZX,
766    LFDX,
767    LFD
768)>;
769
770// Cracked Load Instructions.
771// Load instructions that can be done in parallel.
772def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
773              DISP_PAIR_1C],
774      (instrs
775    SLBIA,
776    SLBIE,
777    SLBMFEE,
778    SLBMFEV,
779    SLBMTE,
780    TLBIEL
781)>;
782
783// Cracked Load Instruction.
784// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
785// operations can be run in parallel.
786def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
787              DISP_PAIR_1C, DISP_PAIR_1C],
788      (instrs
789    (instregex "L(W|H)ZU(X)?(8)?$")
790)>;
791
792// Cracked TEND Instruction.
793// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
794// operations can be run in parallel.
795def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
796              DISP_1C, DISP_1C],
797      (instrs
798    TEND
799)>;
800
801
802// Cracked Store Instruction
803// Consecutive Store and ALU instructions. The store is restricted and requires
804// three dispatches.
805def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
806              DISP_3SLOTS_1C, DISP_1C],
807      (instrs
808    (instregex "ST(B|H|W|D)CX$")
809)>;
810
811// Cracked Load instruction.
812// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
813// operations cannot be done at the same time and so their latencies are added.
814def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
815              DISP_1C, DISP_1C],
816      (instrs
817    (instregex "LHA(X)?(8)?$"),
818    (instregex "CP_PASTE(8)?_rec$"),
819    (instregex "LWA(X)?(_32)?$"),
820    TCHECK
821)>;
822
823// Cracked Restricted Load instruction.
824// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
825// operations cannot be done at the same time and so their latencies are added.
826// Full 6 dispatches are required as this is both cracked and restricted.
827def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
828              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
829      (instrs
830    LFIWAX
831)>;
832
833// Cracked Load instruction.
834// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
835// operations cannot be done at the same time and so their latencies are added.
836// Full 4 dispatches are required as this is a cracked instruction.
837def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
838      (instrs
839    LXSIWAX,
840    LIWAX
841)>;
842
843// Cracked Load instruction.
844// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
845// cycles. The Load and ALU operations cannot be done at the same time and so
846// their latencies are added.
847// Full 6 dispatches are required as this is a restricted instruction.
848def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
849              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
850      (instrs
851    LFSX,
852    LFS
853)>;
854
855// Cracked Load instruction.
856// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
857// operations cannot be done at the same time and so their latencies are added.
858// Full 4 dispatches are required as this is a cracked instruction.
859def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
860      (instrs
861    LXSSP,
862    LXSSPX,
863    XFLOADf32,
864    DFLOADf32
865)>;
866
867// Cracked 3-Way Load Instruction
868// Load with two ALU operations that depend on each other
869def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
870              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
871      (instrs
872    (instregex "LHAU(X)?(8)?$"),
873    LWAUX
874)>;
875
876// Cracked Load that requires the PM resource.
877// Since the Load and the PM cannot be done at the same time the latencies are
878// added. Requires 8 cycles. Since the PM requires the full superslice we need
879// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
880// requires the remaining 1 dispatch.
881def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
882              DISP_1C, DISP_1C],
883      (instrs
884    LXVH8X,
885    LXVDSX,
886    LXVW4X
887)>;
888
889// Single slice Restricted store operation. The restricted operation requires
890// all three dispatches for the superslice.
891def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
892      (instrs
893    (instregex "STF(S|D|IWX|SX|DX)$"),
894    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
895    (instregex "STW(8)?$"),
896    (instregex "(D|X)FSTORE(f32|f64)$"),
897    (instregex "ST(W|H|D)BRX$"),
898    (instregex "ST(B|H|D)(8)?$"),
899    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
900    STIWX,
901    SLBIEG,
902    STMW,
903    STSWI,
904    TLBIE
905)>;
906
907// Vector Store Instruction
908// Requires the whole superslice and therefore requires one dispatch
909// as well as both the Even and Odd exec pipelines.
910def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
911      (instrs
912    (instregex "STVE(B|H|W)X$"),
913    (instregex "STVX(L)?$"),
914    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
915)>;
916
917// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
918// superslice. That includes both exec pipelines (EXECO, EXECE) and two
919// dispatches.
920def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
921      (instrs
922    (instregex "MTCTR(8)?(loop)?$"),
923    (instregex "MTLR(8)?$")
924)>;
925
926// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
927// superslice. That includes both exec pipelines (EXECO, EXECE) and two
928// dispatches.
929def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
930      (instrs
931    (instregex "M(T|F)VRSAVE(v)?$"),
932    (instregex "M(T|F)PMR$"),
933    (instregex "M(T|F)TB(8)?$"),
934    (instregex "MF(SPR|CTR|LR)(8)?$"),
935    (instregex "M(T|F)MSR(D)?$"),
936    (instregex "M(T|F)(U)?DSCR$"),
937    (instregex "MTSPR(8)?$")
938)>;
939
940// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
941// superslice. That includes both exec pipelines (EXECO, EXECE) and two
942// dispatches.
943def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
944      (instrs
945    DIVW,
946    DIVWO,
947    DIVWU,
948    DIVWUO,
949    MODSW
950)>;
951
952// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
953// superslice. That includes both exec pipelines (EXECO, EXECE) and two
954// dispatches.
955def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
956      (instrs
957    DIVWE,
958    DIVWEO,
959    DIVD,
960    DIVDO,
961    DIVWEU,
962    DIVWEUO,
963    DIVDU,
964    DIVDUO,
965    MODSD,
966    MODUD,
967    MODUW
968)>;
969
970// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
971// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
972// dispatches.
973def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
974      (instrs
975    DIVDE,
976    DIVDEO,
977    DIVDEU,
978    DIVDEUO
979)>;
980
981// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
982// and one full superslice for the DIV operation since there is only one DIV per
983// superslice. Latency of DIV plus ALU is 26.
984def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
985              DISP_EVEN_1C, DISP_1C],
986      (instrs
987    (instregex "DIVW(U)?(O)?_rec$")
988)>;
989
990// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
991// and one full superslice for the DIV operation since there is only one DIV per
992// superslice. Latency of DIV plus ALU is 26.
993def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
994              DISP_EVEN_1C, DISP_1C],
995      (instrs
996    DIVD_rec,
997    DIVDO_rec,
998    DIVDU_rec,
999    DIVDUO_rec,
1000    DIVWE_rec,
1001    DIVWEO_rec,
1002    DIVWEU_rec,
1003    DIVWEUO_rec
1004)>;
1005
1006// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1007// and one full superslice for the DIV operation since there is only one DIV per
1008// superslice. Latency of DIV plus ALU is 42.
1009def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1010              DISP_EVEN_1C, DISP_1C],
1011      (instrs
1012    DIVDE_rec,
1013    DIVDEO_rec,
1014    DIVDEU_rec,
1015    DIVDEUO_rec
1016)>;
1017
1018// CR access instructions in _BrMCR, IIC_BrMCRX.
1019
1020// Cracked, restricted, ALU operations.
1021// Here the two ALU ops can actually be done in parallel and therefore the
1022// latencies are not added together. Otherwise this is like having two
1023// instructions running together on two pipelines and 6 dispatches. ALU ops are
1024// 2 cycles each.
1025def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1026              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1027      (instrs
1028    MTCRF,
1029    MTCRF8
1030)>;
1031
1032// Cracked ALU operations.
1033// Here the two ALU ops can actually be done in parallel and therefore the
1034// latencies are not added together. Otherwise this is like having two
1035// instructions running together on two pipelines and 2 dispatches. ALU ops are
1036// 2 cycles each.
1037def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1038              DISP_1C, DISP_1C],
1039      (instrs
1040    (instregex "ADDC(8)?(O)?_rec$"),
1041    (instregex "SUBFC(8)?(O)?_rec$")
1042)>;
1043
1044// Cracked ALU operations.
1045// Two ALU ops can be done in parallel.
1046// One is three cycle ALU the ohter is a two cycle ALU.
1047// One of the ALU ops is restricted the other is not so we have a total of
1048// 5 dispatches.
1049def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1050              DISP_3SLOTS_1C, DISP_1C],
1051      (instrs
1052    (instregex "F(N)?ABS(D|S)_rec$"),
1053    (instregex "FCPSGN(D|S)_rec$"),
1054    (instregex "FNEG(D|S)_rec$"),
1055    FMR_rec
1056)>;
1057
1058// Cracked ALU operations.
1059// Here the two ALU ops can actually be done in parallel and therefore the
1060// latencies are not added together. Otherwise this is like having two
1061// instructions running together on two pipelines and 2 dispatches.
1062// ALU ops are 3 cycles each.
1063def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1064              DISP_1C, DISP_1C],
1065      (instrs
1066    MCRFS
1067)>;
1068
1069// Cracked Restricted ALU operations.
1070// Here the two ALU ops can actually be done in parallel and therefore the
1071// latencies are not added together. Otherwise this is like having two
1072// instructions running together on two pipelines and 6 dispatches.
1073// ALU ops are 3 cycles each.
1074def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1075              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1076      (instrs
1077    (instregex "MTFSF(b|_rec)?$"),
1078    (instregex "MTFSFI(_rec)?$"),
1079    MTFSFIb
1080)>;
1081
1082// Cracked instruction made of two ALU ops.
1083// The two ops cannot be done in parallel.
1084// One of the ALU ops is restricted and takes 3 dispatches.
1085def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1086              DISP_3SLOTS_1C, DISP_1C],
1087      (instrs
1088    (instregex "RLD(I)?C(R|L)_rec$"),
1089    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1090    (instregex "SLW(8)?_rec$"),
1091    (instregex "SRAW(I)?_rec$"),
1092    (instregex "SRW(8)?_rec$"),
1093    RLDICL_32_rec,
1094    RLDIMI_rec
1095)>;
1096
1097// Cracked instruction made of two ALU ops.
1098// The two ops cannot be done in parallel.
1099// Both of the ALU ops are restricted and take 3 dispatches.
1100def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1101              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1102      (instrs
1103    (instregex "MFFS(L|CE|_rec)?$")
1104)>;
1105
1106// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1107// total of 6 cycles. All of the ALU operations are also restricted so each
1108// takes 3 dispatches for a total of 9.
1109def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1110              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1111      (instrs
1112    (instregex "MFCR(8)?$")
1113)>;
1114
1115// Cracked instruction made of two ALU ops.
1116// The two ops cannot be done in parallel.
1117def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1118      (instrs
1119    (instregex "EXTSWSLI_32_64_rec$"),
1120    (instregex "SRAD(I)?_rec$"),
1121    EXTSWSLI_rec,
1122    SLD_rec,
1123    SRD_rec,
1124    RLDIC_rec
1125)>;
1126
1127// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1128def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1129      (instrs
1130    FDIV
1131)>;
1132
1133// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1134def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1135              DISP_3SLOTS_1C, DISP_1C],
1136      (instrs
1137    FDIV_rec
1138)>;
1139
1140// 36 Cycle DP Instruction.
1141// Instruction can be done on a single slice.
1142def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1143      (instrs
1144    XSSQRTDP
1145)>;
1146
1147// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1148def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1149      (instrs
1150    FSQRT
1151)>;
1152
1153// 36 Cycle DP Vector Instruction.
1154def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1155              DISP_1C],
1156      (instrs
1157    XVSQRTDP
1158)>;
1159
1160// 27 Cycle DP Vector Instruction.
1161def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1162              DISP_1C],
1163      (instrs
1164    XVSQRTSP
1165)>;
1166
1167// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1168def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1169              DISP_3SLOTS_1C, DISP_1C],
1170      (instrs
1171    FSQRT_rec
1172)>;
1173
1174// 26 Cycle DP Instruction.
1175def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1176      (instrs
1177    XSSQRTSP
1178)>;
1179
1180// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1181def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1182      (instrs
1183    FSQRTS
1184)>;
1185
1186// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1187def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1188              DISP_3SLOTS_1C, DISP_1C],
1189      (instrs
1190    FSQRTS_rec
1191)>;
1192
1193// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1194def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1195      (instrs
1196    XSDIVDP
1197)>;
1198
1199// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1200def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1201      (instrs
1202    FDIVS
1203)>;
1204
1205// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1206def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1207              DISP_3SLOTS_1C, DISP_1C],
1208      (instrs
1209    FDIVS_rec
1210)>;
1211
1212// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1213def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1214      (instrs
1215    XSDIVSP
1216)>;
1217
1218// 24 Cycle DP Vector Instruction. Takes one full superslice.
1219// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1220// superslice.
1221def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1222              DISP_1C],
1223      (instrs
1224    XVDIVSP
1225)>;
1226
1227// 33 Cycle DP Vector Instruction. Takes one full superslice.
1228// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1229// superslice.
1230def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1231              DISP_1C],
1232      (instrs
1233    XVDIVDP
1234)>;
1235
1236// Instruction cracked into three pieces. One Load and two ALU operations.
1237// The Load and one of the ALU ops cannot be run at the same time and so the
1238// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1239// Both the load and the ALU that depends on it are restricted and so they take
1240// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1241// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1242def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1243              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1244              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1245      (instrs
1246    (instregex "LF(SU|SUX)$")
1247)>;
1248
1249// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1250// the store and so it can be run at the same time as the store. The store is
1251// also restricted.
1252def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1253              DISP_3SLOTS_1C, DISP_1C],
1254      (instrs
1255    (instregex "STF(S|D)U(X)?$"),
1256    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1257)>;
1258
1259// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1260// the load and so it can be run at the same time as the load.
1261def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1262              DISP_PAIR_1C, DISP_PAIR_1C],
1263      (instrs
1264    (instregex "LBZU(X)?(8)?$"),
1265    (instregex "LDU(X)?$")
1266)>;
1267
1268// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1269// the load and so it can be run at the same time as the load. The load is also
1270// restricted. 3 dispatches are from the restricted load while the other two
1271// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1272// is required for the ALU.
1273def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1274              DISP_3SLOTS_1C, DISP_1C],
1275      (instrs
1276    (instregex "LF(DU|DUX)$")
1277)>;
1278
1279// Crypto Instructions
1280
1281// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1282// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1283// dispatch.
1284def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1285      (instrs
1286    (instregex "VPMSUM(B|H|W|D)$"),
1287    (instregex "V(N)?CIPHER(LAST)?$"),
1288    VSBOX
1289)>;
1290
1291// Branch Instructions
1292
1293// Two Cycle Branch
1294def : InstRW<[P9_BR_2C, DISP_BR_1C],
1295      (instrs
1296  (instregex "BCCCTR(L)?(8)?$"),
1297  (instregex "BCCL(A|R|RL)?$"),
1298  (instregex "BCCTR(L)?(8)?(n)?$"),
1299  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1300  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1301  (instregex "BL(_TLS|_NOP)?(_RM)?$"),
1302  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
1303  (instregex "BLA(8|8_NOP)?(_RM)?$"),
1304  (instregex "BLR(8|L)?$"),
1305  (instregex "TAILB(A)?(8)?$"),
1306  (instregex "TAILBCTR(8)?$"),
1307  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1308  (instregex "BCLR(L)?(n)?$"),
1309  (instregex "BCTR(L)?(8)?(_RM)?$"),
1310  B,
1311  BA,
1312  BC,
1313  BCC,
1314  BCCA,
1315  BCL,
1316  BCLalways,
1317  BCLn,
1318  BCTRL8_LDinto_toc,
1319  BCTRL_LWZinto_toc,
1320  BCTRL8_LDinto_toc_RM,
1321  BCTRL_LWZinto_toc_RM,
1322  BCn,
1323  CTRL_DEP
1324)>;
1325
1326// Five Cycle Branch with a 2 Cycle ALU Op
1327// Operations must be done consecutively and not in parallel.
1328def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1329      (instrs
1330    ADDPCIS
1331)>;
1332
1333// Special Extracted Instructions For Atomics
1334
1335// Atomic Load
1336def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1337              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1338              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1339              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1340      (instrs
1341    (instregex "L(D|W)AT$")
1342)>;
1343
1344// Atomic Store
1345def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1346              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1347      (instrs
1348    (instregex "ST(D|W)AT$")
1349)>;
1350
1351// Signal Processing Engine (SPE) Instructions
1352// These instructions are not supported on Power 9
1353def : InstRW<[],
1354    (instrs
1355  BRINC,
1356  EVABS,
1357  EVEQV,
1358  EVMRA,
1359  EVNAND,
1360  EVNEG,
1361  (instregex "EVADD(I)?W$"),
1362  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1363  (instregex "EVAND(C)?$"),
1364  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1365  (instregex "EVCNTL(S|Z)W$"),
1366  (instregex "EVDIVW(S|U)$"),
1367  (instregex "EVEXTS(B|H)$"),
1368  (instregex "EVLD(H|W|D)(X)?$"),
1369  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1370  (instregex "EVLWHE(X)?$"),
1371  (instregex "EVLWHO(S|U)(X)?$"),
1372  (instregex "EVLW(H|W)SPLAT(X)?$"),
1373  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1374  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1375  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1376  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1377  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1378  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1379  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1380  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1381  (instregex "EVMWHUMI(A)?$"),
1382  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1383  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1384  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1385  (instregex "EVMWSSF(A|AA|AN)?$"),
1386  (instregex "EVMWUMI(A|AA|AN)?$"),
1387  (instregex "EV(N|X)?OR(C)?$"),
1388  (instregex "EVR(LW|LWI|NDW)$"),
1389  (instregex "EVSLW(I)?$"),
1390  (instregex "EVSPLAT(F)?I$"),
1391  (instregex "EVSRW(I)?(S|U)$"),
1392  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1393  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1394  (instregex "EVSUB(I)?FW$")
1395)> { let Unsupported = 1; }
1396
1397// General Instructions without scheduling support.
1398def : InstRW<[],
1399    (instrs
1400  (instregex "(H)?RFI(D)?$"),
1401  (instregex "DSS(ALL)?$"),
1402  (instregex "DST(ST)?(T)?(64)?$"),
1403  (instregex "ICBL(C|Q)$"),
1404  (instregex "L(W|H|B)EPX$"),
1405  (instregex "ST(W|H|B)EPX$"),
1406  (instregex "(L|ST)FDEPX$"),
1407  (instregex "M(T|F)SR(IN)?$"),
1408  (instregex "M(T|F)DCR$"),
1409  (instregex "NOP_GT_PWR(6|7)$"),
1410  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1411  (instregex "WRTEE(I)?$"),
1412  (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
1413  ATTN,
1414  CLRBHRB,
1415  MFBHRBE,
1416  MBAR,
1417  MSYNC,
1418  SLBSYNC,
1419  SLBFEE_rec,
1420  NAP,
1421  STOP,
1422  TRAP,
1423  RFCI,
1424  RFDI,
1425  RFMCI,
1426  SC,
1427  DCBA,
1428  DCBI,
1429  DCCCI,
1430  ICCCI,
1431  ADDEX,
1432  ADDEX8
1433)> { let Unsupported = 1; }
1434