1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "POPCNTB8$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
145    (instregex "NAND(8)?(_rec)?$"),
146    (instregex "AND(C)?(8)?(_rec)?$"),
147    (instregex "NOR(8)?(_rec)?$"),
148    (instregex "OR(C)?(8)?(_rec)?$"),
149    (instregex "EQV(8)?(_rec)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?(O)?$"),
153    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
154    COPY,
155    MCRF,
156    MCRXRX,
157    XSNABSDP,
158    XSXEXPDP,
159    XSABSDP,
160    XSNEGDP,
161    XSCPSGNDP,
162    MFVSRWZ,
163    MFVRWZ,
164    EXTSWSLI,
165    SRADI_32,
166    RLDIC,
167    RFEBB,
168    LA,
169    TBEGIN,
170    TRECHKPT,
171    NOP,
172    WAIT
173)>;
174
175// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
176// single slice. However, since it is Restricted, it requires all 3 dispatches
177// (DISP) for that superslice.
178def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
179      (instrs
180    (instregex "RLDC(L|R)$"),
181    (instregex "RLWIMI(8)?$"),
182    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
183    (instregex "M(F|T)OCRF(8)?$"),
184    (instregex "CR(6)?(UN)?SET$"),
185    (instregex "CR(N)?(OR|AND)(C)?$"),
186    (instregex "S(L|R)W(8)?$"),
187    (instregex "RLW(INM|NM)(8)?$"),
188    (instregex "F(N)?ABS(D|S)$"),
189    (instregex "FNEG(D|S)$"),
190    (instregex "FCPSGN(D|S)$"),
191    (instregex "SRAW(I)?$"),
192    (instregex "ISEL(8)?$"),
193    RLDIMI,
194    XSIEXPDP,
195    FMR,
196    CREQV,
197    CRXOR,
198    TRECLAIM,
199    TSR,
200    TABORT
201)>;
202
203// Three cycle ALU vector operation that uses an entire superslice.
204// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
205// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
206def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
207      (instrs
208    (instregex "M(T|F)VSCR$"),
209    (instregex "VCMPNEZ(B|H|W)$"),
210    (instregex "VCMPEQU(B|H|W|D)$"),
211    (instregex "VCMPNE(B|H|W)$"),
212    (instregex "VABSDU(B|H|W)$"),
213    (instregex "VADDU(B|H|W)S$"),
214    (instregex "VAVG(S|U)(B|H|W)$"),
215    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
216    (instregex "VCMPBFP(_rec)?$"),
217    (instregex "VC(L|T)Z(B|H|W|D)$"),
218    (instregex "VADDS(B|H|W)S$"),
219    (instregex "V(MIN|MAX)FP$"),
220    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
221    VBPERMD,
222    VADDCUW,
223    VPOPCNTW,
224    VPOPCNTD,
225    VPRTYBD,
226    VPRTYBW,
227    VSHASIGMAD,
228    VSHASIGMAW,
229    VSUBSBS,
230    VSUBSHS,
231    VSUBSWS,
232    VSUBUBS,
233    VSUBUHS,
234    VSUBUWS,
235    VSUBCUW,
236    VCMPGTSB,
237    VCMPGTSB_rec,
238    VCMPGTSD,
239    VCMPGTSD_rec,
240    VCMPGTSH,
241    VCMPGTSH_rec,
242    VCMPGTSW,
243    VCMPGTSW_rec,
244    VCMPGTUB,
245    VCMPGTUB_rec,
246    VCMPGTUD,
247    VCMPGTUD_rec,
248    VCMPGTUH,
249    VCMPGTUH_rec,
250    VCMPGTUW,
251    VCMPGTUW_rec,
252    VCMPNEB_rec,
253    VCMPNEH_rec,
254    VCMPNEW_rec,
255    VCMPNEZB_rec,
256    VCMPNEZH_rec,
257    VCMPNEZW_rec,
258    VCMPEQUB_rec,
259    VCMPEQUD_rec,
260    VCMPEQUH_rec,
261    VCMPEQUW_rec,
262    XVCMPEQDP,
263    XVCMPEQDP_rec,
264    XVCMPEQSP,
265    XVCMPEQSP_rec,
266    XVCMPGEDP,
267    XVCMPGEDP_rec,
268    XVCMPGESP,
269    XVCMPGESP_rec,
270    XVCMPGTDP,
271    XVCMPGTDP_rec,
272    XVCMPGTSP,
273    XVCMPGTSP_rec,
274    XVMAXDP,
275    XVMAXSP,
276    XVMINDP,
277    XVMINSP,
278    XVTDIVDP,
279    XVTDIVSP,
280    XVTSQRTDP,
281    XVTSQRTSP,
282    XVTSTDCDP,
283    XVTSTDCSP,
284    XVXSIGDP,
285    XVXSIGSP
286)>;
287
288// 7 cycle DP vector operation that uses an entire superslice.
289// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
290// EXECO) and all three dispatches (DISP) to the given superslice.
291def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
292      (instrs
293    VADDFP,
294    VCTSXS,
295    VCTSXS_0,
296    VCTUXS,
297    VCTUXS_0,
298    VEXPTEFP,
299    VLOGEFP,
300    VMADDFP,
301    VMHADDSHS,
302    VNMSUBFP,
303    VREFP,
304    VRFIM,
305    VRFIN,
306    VRFIP,
307    VRFIZ,
308    VRSQRTEFP,
309    VSUBFP,
310    XVADDDP,
311    XVADDSP,
312    XVCVDPSP,
313    XVCVDPSXDS,
314    XVCVDPSXWS,
315    XVCVDPUXDS,
316    XVCVDPUXWS,
317    XVCVHPSP,
318    XVCVSPDP,
319    XVCVSPHP,
320    XVCVSPSXDS,
321    XVCVSPSXWS,
322    XVCVSPUXDS,
323    XVCVSPUXWS,
324    XVCVSXDDP,
325    XVCVSXDSP,
326    XVCVSXWDP,
327    XVCVSXWSP,
328    XVCVUXDDP,
329    XVCVUXDSP,
330    XVCVUXWDP,
331    XVCVUXWSP,
332    XVMADDADP,
333    XVMADDASP,
334    XVMADDMDP,
335    XVMADDMSP,
336    XVMSUBADP,
337    XVMSUBASP,
338    XVMSUBMDP,
339    XVMSUBMSP,
340    XVMULDP,
341    XVMULSP,
342    XVNMADDADP,
343    XVNMADDASP,
344    XVNMADDMDP,
345    XVNMADDMSP,
346    XVNMSUBADP,
347    XVNMSUBASP,
348    XVNMSUBMDP,
349    XVNMSUBMSP,
350    XVRDPI,
351    XVRDPIC,
352    XVRDPIM,
353    XVRDPIP,
354    XVRDPIZ,
355    XVREDP,
356    XVRESP,
357    XVRSPI,
358    XVRSPIC,
359    XVRSPIM,
360    XVRSPIP,
361    XVRSPIZ,
362    XVRSQRTEDP,
363    XVRSQRTESP,
364    XVSUBDP,
365    XVSUBSP,
366    VCFSX,
367    VCFSX_0,
368    VCFUX,
369    VCFUX_0,
370    VMHRADDSHS,
371    VMLADDUHM,
372    VMSUMMBM,
373    VMSUMSHM,
374    VMSUMSHS,
375    VMSUMUBM,
376    VMSUMUHM,
377    VMSUMUDM,
378    VMSUMUHS,
379    VMULESB,
380    VMULESH,
381    VMULESW,
382    VMULEUB,
383    VMULEUH,
384    VMULEUW,
385    VMULOSB,
386    VMULOSH,
387    VMULOSW,
388    VMULOUB,
389    VMULOUH,
390    VMULOUW,
391    VMULUWM,
392    VSUM2SWS,
393    VSUM4SBS,
394    VSUM4SHS,
395    VSUM4UBS,
396    VSUMSWS
397)>;
398
399// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
400// dispatch units for the superslice.
401def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
402      (instrs
403    (instregex "MADD(HD|HDU|LD|LD8)$"),
404    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
405)>;
406
407// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
408// dispatch units for the superslice.
409def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
410      (instrs
411    FRSP,
412    (instregex "FRI(N|P|Z|M)(D|S)$"),
413    (instregex "FRE(S)?$"),
414    (instregex "FADD(S)?$"),
415    (instregex "FMSUB(S)?$"),
416    (instregex "FMADD(S)?$"),
417    (instregex "FSUB(S)?$"),
418    (instregex "FCFID(U)?(S)?$"),
419    (instregex "FCTID(U)?(Z)?$"),
420    (instregex "FCTIW(U)?(Z)?$"),
421    (instregex "FRSQRTE(S)?$"),
422    FNMADDS,
423    FNMADD,
424    FNMSUBS,
425    FNMSUB,
426    FSELD,
427    FSELS,
428    FMULS,
429    FMUL,
430    XSMADDADP,
431    XSMADDASP,
432    XSMADDMDP,
433    XSMADDMSP,
434    XSMSUBADP,
435    XSMSUBASP,
436    XSMSUBMDP,
437    XSMSUBMSP,
438    XSMULDP,
439    XSMULSP,
440    XSNMADDADP,
441    XSNMADDASP,
442    XSNMADDMDP,
443    XSNMADDMSP,
444    XSNMSUBADP,
445    XSNMSUBASP,
446    XSNMSUBMDP,
447    XSNMSUBMSP
448)>;
449
450// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
451// These operations can be done in parallel. The DP is restricted so we need a
452// full 4 dispatches.
453def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
454              DISP_3SLOTS_1C, DISP_1C],
455      (instrs
456    (instregex "FSEL(D|S)_rec$")
457)>;
458
459// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
460def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
461              DISP_3SLOTS_1C, DISP_1C],
462      (instrs
463    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
464)>;
465
466// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
467// These operations must be done sequentially.The DP is restricted so we need a
468// full 4 dispatches.
469def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
470              DISP_3SLOTS_1C, DISP_1C],
471      (instrs
472    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
473    (instregex "FRE(S)?_rec$"),
474    (instregex "FADD(S)?_rec$"),
475    (instregex "FSUB(S)?_rec$"),
476    (instregex "F(N)?MSUB(S)?_rec$"),
477    (instregex "F(N)?MADD(S)?_rec$"),
478    (instregex "FCFID(U)?(S)?_rec$"),
479    (instregex "FCTID(U)?(Z)?_rec$"),
480    (instregex "FCTIW(U)?(Z)?_rec$"),
481    (instregex "FMUL(S)?_rec$"),
482    (instregex "FRSQRTE(S)?_rec$"),
483    FRSP_rec
484)>;
485
486// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
487def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
488      (instrs
489    XSADDDP,
490    XSADDSP,
491    XSCVDPHP,
492    XSCVDPSP,
493    XSCVDPSXDS,
494    XSCVDPSXDSs,
495    XSCVDPSXWS,
496    XSCVDPUXDS,
497    XSCVDPUXDSs,
498    XSCVDPUXWS,
499    XSCVDPSXWSs,
500    XSCVDPUXWSs,
501    XSCVHPDP,
502    XSCVSPDP,
503    XSCVSXDDP,
504    XSCVSXDSP,
505    XSCVUXDDP,
506    XSCVUXDSP,
507    XSRDPI,
508    XSRDPIC,
509    XSRDPIM,
510    XSRDPIP,
511    XSRDPIZ,
512    XSREDP,
513    XSRESP,
514    XSRSQRTEDP,
515    XSRSQRTESP,
516    XSSUBDP,
517    XSSUBSP,
518    XSCVDPSPN,
519    XSRSP
520)>;
521
522// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
523// superslice. That includes both exec pipelines (EXECO, EXECE) and one
524// dispatch.
525def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
526      (instrs
527    (instregex "LVS(L|R)$"),
528    (instregex "VSPLTIS(W|H|B)$"),
529    (instregex "VSPLT(W|H|B)(s)?$"),
530    (instregex "V_SETALLONES(B|H)?$"),
531    (instregex "VEXTRACTU(B|H|W)$"),
532    (instregex "VINSERT(B|H|W|D)$"),
533    MFVSRLD,
534    MTVSRWS,
535    VBPERMQ,
536    VCLZLSBB,
537    VCTZLSBB,
538    VEXTRACTD,
539    VEXTUBLX,
540    VEXTUBRX,
541    VEXTUHLX,
542    VEXTUHRX,
543    VEXTUWLX,
544    VEXTUWRX,
545    VGBBD,
546    VMRGHB,
547    VMRGHH,
548    VMRGHW,
549    VMRGLB,
550    VMRGLH,
551    VMRGLW,
552    VPERM,
553    VPERMR,
554    VPERMXOR,
555    VPKPX,
556    VPKSDSS,
557    VPKSDUS,
558    VPKSHSS,
559    VPKSHUS,
560    VPKSWSS,
561    VPKSWUS,
562    VPKUDUM,
563    VPKUDUS,
564    VPKUHUM,
565    VPKUHUS,
566    VPKUWUM,
567    VPKUWUS,
568    VPRTYBQ,
569    VSL,
570    VSLDOI,
571    VSLO,
572    VSLV,
573    VSR,
574    VSRO,
575    VSRV,
576    VUPKHPX,
577    VUPKHSB,
578    VUPKHSH,
579    VUPKHSW,
580    VUPKLPX,
581    VUPKLSB,
582    VUPKLSH,
583    VUPKLSW,
584    XXBRD,
585    XXBRH,
586    XXBRQ,
587    XXBRW,
588    XXEXTRACTUW,
589    XXINSERTW,
590    XXMRGHW,
591    XXMRGLW,
592    XXPERM,
593    XXPERMR,
594    XXSLDWI,
595    XXSLDWIs,
596    XXSPLTIB,
597    XXSPLTW,
598    XXSPLTWs,
599    XXPERMDI,
600    XXPERMDIs,
601    VADDCUQ,
602    VADDECUQ,
603    VADDEUQM,
604    VADDUQM,
605    VMUL10CUQ,
606    VMUL10ECUQ,
607    VMUL10EUQ,
608    VMUL10UQ,
609    VSUBCUQ,
610    VSUBECUQ,
611    VSUBEUQM,
612    VSUBUQM,
613    XSCMPEXPQP,
614    XSCMPOQP,
615    XSCMPUQP,
616    XSTSTDCQP,
617    XSXSIGQP,
618    BCDCFN_rec,
619    BCDCFZ_rec,
620    BCDCPSGN_rec,
621    BCDCTN_rec,
622    BCDCTZ_rec,
623    BCDSETSGN_rec,
624    BCDS_rec,
625    BCDTRUNC_rec,
626    BCDUS_rec,
627    BCDUTRUNC_rec
628)>;
629
630// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
631// superslice. That includes both exec pipelines (EXECO, EXECE) and one
632// dispatch.
633def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
634      (instrs
635    BCDSR_rec,
636    XSADDQP,
637    XSADDQPO,
638    XSCVDPQP,
639    XSCVQPDP,
640    XSCVQPDPO,
641    XSCVQPSDZ,
642    XSCVQPSWZ,
643    XSCVQPUDZ,
644    XSCVQPUWZ,
645    XSCVSDQP,
646    XSCVUDQP,
647    XSRQPI,
648    XSRQPIX,
649    XSRQPXP,
650    XSSUBQP,
651    XSSUBQPO
652)>;
653
654// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
655// superslice. That includes both exec pipelines (EXECO, EXECE) and one
656// dispatch.
657def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
658      (instrs
659    BCDCTSQ_rec
660)>;
661
662// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
663// superslice. That includes both exec pipelines (EXECO, EXECE) and one
664// dispatch.
665def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
666      (instrs
667    XSMADDQP,
668    XSMADDQPO,
669    XSMSUBQP,
670    XSMSUBQPO,
671    XSMULQP,
672    XSMULQPO,
673    XSNMADDQP,
674    XSNMADDQPO,
675    XSNMSUBQP,
676    XSNMSUBQPO
677)>;
678
679// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
680// superslice. That includes both exec pipelines (EXECO, EXECE) and one
681// dispatch.
682def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
683      (instrs
684    BCDCFSQ_rec
685)>;
686
687// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
688// superslice. That includes both exec pipelines (EXECO, EXECE) and one
689// dispatch.
690def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
691      (instrs
692    XSDIVQP,
693    XSDIVQPO
694)>;
695
696// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
697// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
698// dispatches.
699def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
700      (instrs
701    XSSQRTQP,
702    XSSQRTQPO
703)>;
704
705// 6 Cycle Load uses a single slice.
706def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
707      (instrs
708    (instregex "LXVL(L)?")
709)>;
710
711// 5 Cycle Load uses a single slice.
712def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
713      (instrs
714    (instregex "LVE(B|H|W)X$"),
715    (instregex "LVX(L)?"),
716    (instregex "LXSI(B|H)ZX$"),
717    LXSDX,
718    LXVB16X,
719    LXVD2X,
720    LXVWSX,
721    LXSIWZX,
722    LXV,
723    LXVX,
724    LXSD,
725    DFLOADf64,
726    XFLOADf64,
727    LIWZX
728)>;
729
730// 4 Cycle Load uses a single slice.
731def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
732      (instrs
733    (instregex "DCB(F|T|ST)(EP)?$"),
734    (instregex "DCBZ(L)?(EP)?$"),
735    (instregex "DCBTST(EP)?$"),
736    (instregex "CP_COPY(8)?$"),
737    (instregex "ICBI(EP)?$"),
738    (instregex "ICBT(LS)?$"),
739    (instregex "LBARX(L)?$"),
740    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
741    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
742    (instregex "LH(A|B)RX(L)?(8)?$"),
743    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
744    (instregex "LWARX(L)?$"),
745    (instregex "LWBRX(8)?$"),
746    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
747    CP_ABORT,
748    DARN,
749    EnforceIEIO,
750    ISYNC,
751    MSGSYNC,
752    TLBSYNC,
753    SYNC,
754    LMW,
755    LSWI
756)>;
757
758// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
759// superslice.
760def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
761      (instrs
762    LFIWZX,
763    LFDX,
764    LFD
765)>;
766
767// Cracked Load Instructions.
768// Load instructions that can be done in parallel.
769def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
770              DISP_PAIR_1C],
771      (instrs
772    SLBIA,
773    SLBIE,
774    SLBMFEE,
775    SLBMFEV,
776    SLBMTE,
777    TLBIEL
778)>;
779
780// Cracked Load Instruction.
781// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
782// operations can be run in parallel.
783def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
784              DISP_PAIR_1C, DISP_PAIR_1C],
785      (instrs
786    (instregex "L(W|H)ZU(X)?(8)?$")
787)>;
788
789// Cracked TEND Instruction.
790// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
791// operations can be run in parallel.
792def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
793              DISP_1C, DISP_1C],
794      (instrs
795    TEND
796)>;
797
798
799// Cracked Store Instruction
800// Consecutive Store and ALU instructions. The store is restricted and requires
801// three dispatches.
802def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
803              DISP_3SLOTS_1C, DISP_1C],
804      (instrs
805    (instregex "ST(B|H|W|D)CX$")
806)>;
807
808// Cracked Load Instruction.
809// Two consecutive load operations for a total of 8 cycles.
810def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
811              DISP_1C, DISP_1C],
812      (instrs
813    LDMX
814)>;
815
816// Cracked Load instruction.
817// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
818// operations cannot be done at the same time and so their latencies are added.
819def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
820              DISP_1C, DISP_1C],
821      (instrs
822    (instregex "LHA(X)?(8)?$"),
823    (instregex "CP_PASTE(8)?_rec$"),
824    (instregex "LWA(X)?(_32)?$"),
825    TCHECK
826)>;
827
828// Cracked Restricted Load instruction.
829// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
830// operations cannot be done at the same time and so their latencies are added.
831// Full 6 dispatches are required as this is both cracked and restricted.
832def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
833              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
834      (instrs
835    LFIWAX
836)>;
837
838// Cracked Load instruction.
839// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
840// operations cannot be done at the same time and so their latencies are added.
841// Full 4 dispatches are required as this is a cracked instruction.
842def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
843      (instrs
844    LXSIWAX,
845    LIWAX
846)>;
847
848// Cracked Load instruction.
849// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
850// cycles. The Load and ALU operations cannot be done at the same time and so
851// their latencies are added.
852// Full 6 dispatches are required as this is a restricted instruction.
853def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
854              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
855      (instrs
856    LFSX,
857    LFS
858)>;
859
860// Cracked Load instruction.
861// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
862// operations cannot be done at the same time and so their latencies are added.
863// Full 4 dispatches are required as this is a cracked instruction.
864def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
865      (instrs
866    LXSSP,
867    LXSSPX,
868    XFLOADf32,
869    DFLOADf32
870)>;
871
872// Cracked 3-Way Load Instruction
873// Load with two ALU operations that depend on each other
874def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
875              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
876      (instrs
877    (instregex "LHAU(X)?(8)?$"),
878    LWAUX
879)>;
880
881// Cracked Load that requires the PM resource.
882// Since the Load and the PM cannot be done at the same time the latencies are
883// added. Requires 8 cycles. Since the PM requires the full superslice we need
884// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
885// requires the remaining 1 dispatch.
886def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
887              DISP_1C, DISP_1C],
888      (instrs
889    LXVH8X,
890    LXVDSX,
891    LXVW4X
892)>;
893
894// Single slice Restricted store operation. The restricted operation requires
895// all three dispatches for the superslice.
896def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
897      (instrs
898    (instregex "STF(S|D|IWX|SX|DX)$"),
899    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
900    (instregex "STW(8)?$"),
901    (instregex "(D|X)FSTORE(f32|f64)$"),
902    (instregex "ST(W|H|D)BRX$"),
903    (instregex "ST(B|H|D)(8)?$"),
904    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
905    STIWX,
906    SLBIEG,
907    STMW,
908    STSWI,
909    TLBIE
910)>;
911
912// Vector Store Instruction
913// Requires the whole superslice and therefore requires one dispatch
914// as well as both the Even and Odd exec pipelines.
915def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
916      (instrs
917    (instregex "STVE(B|H|W)X$"),
918    (instregex "STVX(L)?$"),
919    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
920)>;
921
922// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
923// superslice. That includes both exec pipelines (EXECO, EXECE) and two
924// dispatches.
925def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
926      (instrs
927    (instregex "MTCTR(8)?(loop)?$"),
928    (instregex "MTLR(8)?$")
929)>;
930
931// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
932// superslice. That includes both exec pipelines (EXECO, EXECE) and two
933// dispatches.
934def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
935      (instrs
936    (instregex "M(T|F)VRSAVE(v)?$"),
937    (instregex "M(T|F)PMR$"),
938    (instregex "M(T|F)TB(8)?$"),
939    (instregex "MF(SPR|CTR|LR)(8)?$"),
940    (instregex "M(T|F)MSR(D)?$"),
941    (instregex "MTSPR(8)?$")
942)>;
943
944// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
945// superslice. That includes both exec pipelines (EXECO, EXECE) and two
946// dispatches.
947def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
948      (instrs
949    DIVW,
950    DIVWO,
951    DIVWU,
952    DIVWUO,
953    MODSW
954)>;
955
956// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
957// superslice. That includes both exec pipelines (EXECO, EXECE) and two
958// dispatches.
959def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
960      (instrs
961    DIVWE,
962    DIVWEO,
963    DIVD,
964    DIVDO,
965    DIVWEU,
966    DIVWEUO,
967    DIVDU,
968    DIVDUO,
969    MODSD,
970    MODUD,
971    MODUW
972)>;
973
974// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
975// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
976// dispatches.
977def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
978      (instrs
979    DIVDE,
980    DIVDEO,
981    DIVDEU,
982    DIVDEUO
983)>;
984
985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
986// and one full superslice for the DIV operation since there is only one DIV per
987// superslice. Latency of DIV plus ALU is 26.
988def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
989              DISP_EVEN_1C, DISP_1C],
990      (instrs
991    (instregex "DIVW(U)?(O)?_rec$")
992)>;
993
994// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
995// and one full superslice for the DIV operation since there is only one DIV per
996// superslice. Latency of DIV plus ALU is 26.
997def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
998              DISP_EVEN_1C, DISP_1C],
999      (instrs
1000    DIVD_rec,
1001    DIVDO_rec,
1002    DIVDU_rec,
1003    DIVDUO_rec,
1004    DIVWE_rec,
1005    DIVWEO_rec,
1006    DIVWEU_rec,
1007    DIVWEUO_rec
1008)>;
1009
1010// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1011// and one full superslice for the DIV operation since there is only one DIV per
1012// superslice. Latency of DIV plus ALU is 42.
1013def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1014              DISP_EVEN_1C, DISP_1C],
1015      (instrs
1016    DIVDE_rec,
1017    DIVDEO_rec,
1018    DIVDEU_rec,
1019    DIVDEUO_rec
1020)>;
1021
1022// CR access instructions in _BrMCR, IIC_BrMCRX.
1023
1024// Cracked, restricted, ALU operations.
1025// Here the two ALU ops can actually be done in parallel and therefore the
1026// latencies are not added together. Otherwise this is like having two
1027// instructions running together on two pipelines and 6 dispatches. ALU ops are
1028// 2 cycles each.
1029def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1030              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1031      (instrs
1032    MTCRF,
1033    MTCRF8
1034)>;
1035
1036// Cracked ALU operations.
1037// Here the two ALU ops can actually be done in parallel and therefore the
1038// latencies are not added together. Otherwise this is like having two
1039// instructions running together on two pipelines and 2 dispatches. ALU ops are
1040// 2 cycles each.
1041def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1042              DISP_1C, DISP_1C],
1043      (instrs
1044    (instregex "ADDC(8)?(O)?_rec$"),
1045    (instregex "SUBFC(8)?(O)?_rec$")
1046)>;
1047
1048// Cracked ALU operations.
1049// Two ALU ops can be done in parallel.
1050// One is three cycle ALU the ohter is a two cycle ALU.
1051// One of the ALU ops is restricted the other is not so we have a total of
1052// 5 dispatches.
1053def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1054              DISP_3SLOTS_1C, DISP_1C],
1055      (instrs
1056    (instregex "F(N)?ABS(D|S)_rec$"),
1057    (instregex "FCPSGN(D|S)_rec$"),
1058    (instregex "FNEG(D|S)_rec$"),
1059    FMR_rec
1060)>;
1061
1062// Cracked ALU operations.
1063// Here the two ALU ops can actually be done in parallel and therefore the
1064// latencies are not added together. Otherwise this is like having two
1065// instructions running together on two pipelines and 2 dispatches.
1066// ALU ops are 3 cycles each.
1067def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1068              DISP_1C, DISP_1C],
1069      (instrs
1070    MCRFS
1071)>;
1072
1073// Cracked Restricted ALU operations.
1074// Here the two ALU ops can actually be done in parallel and therefore the
1075// latencies are not added together. Otherwise this is like having two
1076// instructions running together on two pipelines and 6 dispatches.
1077// ALU ops are 3 cycles each.
1078def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1079              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1080      (instrs
1081    (instregex "MTFSF(b|_rec)?$"),
1082    (instregex "MTFSFI(_rec)?$"),
1083    MTFSFIb
1084)>;
1085
1086// Cracked instruction made of two ALU ops.
1087// The two ops cannot be done in parallel.
1088// One of the ALU ops is restricted and takes 3 dispatches.
1089def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1090              DISP_3SLOTS_1C, DISP_1C],
1091      (instrs
1092    (instregex "RLD(I)?C(R|L)_rec$"),
1093    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1094    (instregex "SLW(8)?_rec$"),
1095    (instregex "SRAW(I)?_rec$"),
1096    (instregex "SRW(8)?_rec$"),
1097    RLDICL_32_rec,
1098    RLDIMI_rec
1099)>;
1100
1101// Cracked instruction made of two ALU ops.
1102// The two ops cannot be done in parallel.
1103// Both of the ALU ops are restricted and take 3 dispatches.
1104def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1105              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1106      (instrs
1107    (instregex "MFFS(L|CE|_rec)?$")
1108)>;
1109
1110// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1111// total of 6 cycles. All of the ALU operations are also restricted so each
1112// takes 3 dispatches for a total of 9.
1113def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1114              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1115      (instrs
1116    (instregex "MFCR(8)?$")
1117)>;
1118
1119// Cracked instruction made of two ALU ops.
1120// The two ops cannot be done in parallel.
1121def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1122      (instrs
1123    (instregex "EXTSWSLI_32_64_rec$"),
1124    (instregex "SRAD(I)?_rec$"),
1125    EXTSWSLI_rec,
1126    SLD_rec,
1127    SRD_rec,
1128    RLDIC_rec
1129)>;
1130
1131// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1132def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1133      (instrs
1134    FDIV
1135)>;
1136
1137// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1138def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1139              DISP_3SLOTS_1C, DISP_1C],
1140      (instrs
1141    FDIV_rec
1142)>;
1143
1144// 36 Cycle DP Instruction.
1145// Instruction can be done on a single slice.
1146def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1147      (instrs
1148    XSSQRTDP
1149)>;
1150
1151// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1152def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1153      (instrs
1154    FSQRT
1155)>;
1156
1157// 36 Cycle DP Vector Instruction.
1158def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1159              DISP_1C],
1160      (instrs
1161    XVSQRTDP
1162)>;
1163
1164// 27 Cycle DP Vector Instruction.
1165def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1166              DISP_1C],
1167      (instrs
1168    XVSQRTSP
1169)>;
1170
1171// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1172def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1173              DISP_3SLOTS_1C, DISP_1C],
1174      (instrs
1175    FSQRT_rec
1176)>;
1177
1178// 26 Cycle DP Instruction.
1179def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1180      (instrs
1181    XSSQRTSP
1182)>;
1183
1184// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1185def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1186      (instrs
1187    FSQRTS
1188)>;
1189
1190// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1191def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1192              DISP_3SLOTS_1C, DISP_1C],
1193      (instrs
1194    FSQRTS_rec
1195)>;
1196
1197// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1198def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1199      (instrs
1200    XSDIVDP
1201)>;
1202
1203// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1204def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1205      (instrs
1206    FDIVS
1207)>;
1208
1209// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1210def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1211              DISP_3SLOTS_1C, DISP_1C],
1212      (instrs
1213    FDIVS_rec
1214)>;
1215
1216// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1217def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1218      (instrs
1219    XSDIVSP
1220)>;
1221
1222// 24 Cycle DP Vector Instruction. Takes one full superslice.
1223// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1224// superslice.
1225def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1226              DISP_1C],
1227      (instrs
1228    XVDIVSP
1229)>;
1230
1231// 33 Cycle DP Vector Instruction. Takes one full superslice.
1232// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1233// superslice.
1234def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1235              DISP_1C],
1236      (instrs
1237    XVDIVDP
1238)>;
1239
1240// Instruction cracked into three pieces. One Load and two ALU operations.
1241// The Load and one of the ALU ops cannot be run at the same time and so the
1242// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1243// Both the load and the ALU that depends on it are restricted and so they take
1244// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1245// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1246def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1247              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1248              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1249      (instrs
1250    (instregex "LF(SU|SUX)$")
1251)>;
1252
1253// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1254// the store and so it can be run at the same time as the store. The store is
1255// also restricted.
1256def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1257              DISP_3SLOTS_1C, DISP_1C],
1258      (instrs
1259    (instregex "STF(S|D)U(X)?$"),
1260    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1261)>;
1262
1263// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1264// the load and so it can be run at the same time as the load.
1265def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1266              DISP_PAIR_1C, DISP_PAIR_1C],
1267      (instrs
1268    (instregex "LBZU(X)?(8)?$"),
1269    (instregex "LDU(X)?$")
1270)>;
1271
1272// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1273// the load and so it can be run at the same time as the load. The load is also
1274// restricted. 3 dispatches are from the restricted load while the other two
1275// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1276// is required for the ALU.
1277def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1278              DISP_3SLOTS_1C, DISP_1C],
1279      (instrs
1280    (instregex "LF(DU|DUX)$")
1281)>;
1282
1283// Crypto Instructions
1284
1285// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1286// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1287// dispatch.
1288def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1289      (instrs
1290    (instregex "VPMSUM(B|H|W|D)$"),
1291    (instregex "V(N)?CIPHER(LAST)?$"),
1292    VSBOX
1293)>;
1294
1295// Branch Instructions
1296
1297// Two Cycle Branch
1298def : InstRW<[P9_BR_2C, DISP_BR_1C],
1299      (instrs
1300  (instregex "BCCCTR(L)?(8)?$"),
1301  (instregex "BCCL(A|R|RL)?$"),
1302  (instregex "BCCTR(L)?(8)?(n)?$"),
1303  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1304  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1305  (instregex "BL(_TLS|_NOP)?$"),
1306  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1307  (instregex "BLA(8|8_NOP)?$"),
1308  (instregex "BLR(8|L)?$"),
1309  (instregex "TAILB(A)?(8)?$"),
1310  (instregex "TAILBCTR(8)?$"),
1311  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1312  (instregex "BCLR(L)?(n)?$"),
1313  (instregex "BCTR(L)?(8)?$"),
1314  B,
1315  BA,
1316  BC,
1317  BCC,
1318  BCCA,
1319  BCL,
1320  BCLalways,
1321  BCLn,
1322  BCTRL8_LDinto_toc,
1323  BCTRL_LWZinto_toc,
1324  BCn,
1325  CTRL_DEP
1326)>;
1327
1328// Five Cycle Branch with a 2 Cycle ALU Op
1329// Operations must be done consecutively and not in parallel.
1330def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1331      (instrs
1332    ADDPCIS
1333)>;
1334
1335// Special Extracted Instructions For Atomics
1336
1337// Atomic Load
1338def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1339              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1340              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1341              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1342      (instrs
1343    (instregex "L(D|W)AT$")
1344)>;
1345
1346// Atomic Store
1347def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1348              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1349      (instrs
1350    (instregex "ST(D|W)AT$")
1351)>;
1352
1353// Signal Processing Engine (SPE) Instructions
1354// These instructions are not supported on Power 9
1355def : InstRW<[],
1356    (instrs
1357  BRINC,
1358  EVABS,
1359  EVEQV,
1360  EVMRA,
1361  EVNAND,
1362  EVNEG,
1363  (instregex "EVADD(I)?W$"),
1364  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1365  (instregex "EVAND(C)?$"),
1366  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1367  (instregex "EVCNTL(S|Z)W$"),
1368  (instregex "EVDIVW(S|U)$"),
1369  (instregex "EVEXTS(B|H)$"),
1370  (instregex "EVLD(H|W|D)(X)?$"),
1371  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1372  (instregex "EVLWHE(X)?$"),
1373  (instregex "EVLWHO(S|U)(X)?$"),
1374  (instregex "EVLW(H|W)SPLAT(X)?$"),
1375  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1376  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1377  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1378  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1379  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1380  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1381  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1382  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1383  (instregex "EVMWHUMI(A)?$"),
1384  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1385  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1386  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1387  (instregex "EVMWSSF(A|AA|AN)?$"),
1388  (instregex "EVMWUMI(A|AA|AN)?$"),
1389  (instregex "EV(N|X)?OR(C)?$"),
1390  (instregex "EVR(LW|LWI|NDW)$"),
1391  (instregex "EVSLW(I)?$"),
1392  (instregex "EVSPLAT(F)?I$"),
1393  (instregex "EVSRW(I)?(S|U)$"),
1394  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1395  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1396  (instregex "EVSUB(I)?FW$")
1397)> { let Unsupported = 1; }
1398
1399// General Instructions without scheduling support.
1400def : InstRW<[],
1401    (instrs
1402  (instregex "(H)?RFI(D)?$"),
1403  (instregex "DSS(ALL)?$"),
1404  (instregex "DST(ST)?(T)?(64)?$"),
1405  (instregex "ICBL(C|Q)$"),
1406  (instregex "L(W|H|B)EPX$"),
1407  (instregex "ST(W|H|B)EPX$"),
1408  (instregex "(L|ST)FDEPX$"),
1409  (instregex "M(T|F)SR(IN)?$"),
1410  (instregex "M(T|F)DCR$"),
1411  (instregex "NOP_GT_PWR(6|7)$"),
1412  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1413  (instregex "WRTEE(I)?$"),
1414  (instregex "HASH(ST|STP|CHK|CHKP)$"),
1415  ATTN,
1416  CLRBHRB,
1417  MFBHRBE,
1418  MBAR,
1419  MSYNC,
1420  SLBSYNC,
1421  SLBFEE_rec,
1422  NAP,
1423  STOP,
1424  TRAP,
1425  RFCI,
1426  RFDI,
1427  RFMCI,
1428  SC,
1429  DCBA,
1430  DCBI,
1431  DCCCI,
1432  ICCCI,
1433  ADDEX
1434)> { let Unsupported = 1; }
1435