1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "POPCNTB8$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
145    (instregex "NAND(8)?(_rec)?$"),
146    (instregex "AND(C)?(8)?(_rec)?$"),
147    (instregex "NOR(8)?(_rec)?$"),
148    (instregex "OR(C)?(8)?(_rec)?$"),
149    (instregex "EQV(8)?(_rec)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?(O)?$"),
153    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
154    (instregex "LA(8)?$"),
155    COPY,
156    MCRF,
157    MCRXRX,
158    XSNABSDP,
159    XSNABSDPs,
160    XSXEXPDP,
161    XSABSDP,
162    XSNEGDP,
163    XSCPSGNDP,
164    MFVSRWZ,
165    MFVRWZ,
166    EXTSWSLI,
167    SRADI_32,
168    RLDIC,
169    RFEBB,
170    TBEGIN,
171    TRECHKPT,
172    NOP,
173    WAIT
174)>;
175
176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
177// single slice. However, since it is Restricted, it requires all 3 dispatches
178// (DISP) for that superslice.
179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
180      (instrs
181    (instregex "RLDC(L|R)$"),
182    (instregex "RLWIMI(8)?$"),
183    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
184    (instregex "M(F|T)OCRF(8)?$"),
185    (instregex "CR(6)?(UN)?SET$"),
186    (instregex "CR(N)?(OR|AND)(C)?$"),
187    (instregex "S(L|R)W(8)?$"),
188    (instregex "RLW(INM|NM)(8)?$"),
189    (instregex "F(N)?ABS(D|S)$"),
190    (instregex "FNEG(D|S)$"),
191    (instregex "FCPSGN(D|S)$"),
192    (instregex "SRAW(I)?$"),
193    (instregex "ISEL(8)?$"),
194    RLDIMI,
195    XSIEXPDP,
196    FMR,
197    CREQV,
198    CRNOT,
199    CRXOR,
200    TRECLAIM,
201    TSR,
202    TABORT
203)>;
204
205// Three cycle ALU vector operation that uses an entire superslice.
206// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
207// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
208def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
209      (instrs
210    (instregex "M(T|F)VSCR$"),
211    (instregex "VCMPNEZ(B|H|W)$"),
212    (instregex "VCMPEQU(B|H|W|D)$"),
213    (instregex "VCMPNE(B|H|W)$"),
214    (instregex "VABSDU(B|H|W)$"),
215    (instregex "VADDU(B|H|W)S$"),
216    (instregex "VAVG(S|U)(B|H|W)$"),
217    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
218    (instregex "VCMPBFP(_rec)?$"),
219    (instregex "VC(L|T)Z(B|H|W|D)$"),
220    (instregex "VADDS(B|H|W)S$"),
221    (instregex "V(MIN|MAX)FP$"),
222    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
223    VBPERMD,
224    VADDCUW,
225    VPOPCNTW,
226    VPOPCNTD,
227    VPRTYBD,
228    VPRTYBW,
229    VSHASIGMAD,
230    VSHASIGMAW,
231    VSUBSBS,
232    VSUBSHS,
233    VSUBSWS,
234    VSUBUBS,
235    VSUBUHS,
236    VSUBUWS,
237    VSUBCUW,
238    VCMPGTSB,
239    VCMPGTSB_rec,
240    VCMPGTSD,
241    VCMPGTSD_rec,
242    VCMPGTSH,
243    VCMPGTSH_rec,
244    VCMPGTSW,
245    VCMPGTSW_rec,
246    VCMPGTUB,
247    VCMPGTUB_rec,
248    VCMPGTUD,
249    VCMPGTUD_rec,
250    VCMPGTUH,
251    VCMPGTUH_rec,
252    VCMPGTUW,
253    VCMPGTUW_rec,
254    VCMPNEB_rec,
255    VCMPNEH_rec,
256    VCMPNEW_rec,
257    VCMPNEZB_rec,
258    VCMPNEZH_rec,
259    VCMPNEZW_rec,
260    VCMPEQUB_rec,
261    VCMPEQUD_rec,
262    VCMPEQUH_rec,
263    VCMPEQUW_rec,
264    XVCMPEQDP,
265    XVCMPEQDP_rec,
266    XVCMPEQSP,
267    XVCMPEQSP_rec,
268    XVCMPGEDP,
269    XVCMPGEDP_rec,
270    XVCMPGESP,
271    XVCMPGESP_rec,
272    XVCMPGTDP,
273    XVCMPGTDP_rec,
274    XVCMPGTSP,
275    XVCMPGTSP_rec,
276    XVMAXDP,
277    XVMAXSP,
278    XVMINDP,
279    XVMINSP,
280    XVTDIVDP,
281    XVTDIVSP,
282    XVTSQRTDP,
283    XVTSQRTSP,
284    XVTSTDCDP,
285    XVTSTDCSP,
286    XVXSIGDP,
287    XVXSIGSP
288)>;
289
290// 7 cycle DP vector operation that uses an entire superslice.
291// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
292// EXECO) and all three dispatches (DISP) to the given superslice.
293def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
294      (instrs
295    VADDFP,
296    VCTSXS,
297    VCTSXS_0,
298    VCTUXS,
299    VCTUXS_0,
300    VEXPTEFP,
301    VLOGEFP,
302    VMADDFP,
303    VMHADDSHS,
304    VNMSUBFP,
305    VREFP,
306    VRFIM,
307    VRFIN,
308    VRFIP,
309    VRFIZ,
310    VRSQRTEFP,
311    VSUBFP,
312    XVADDDP,
313    XVADDSP,
314    XVCVDPSP,
315    XVCVDPSXDS,
316    XVCVDPSXWS,
317    XVCVDPUXDS,
318    XVCVDPUXWS,
319    XVCVHPSP,
320    XVCVSPDP,
321    XVCVSPHP,
322    XVCVSPSXDS,
323    XVCVSPSXWS,
324    XVCVSPUXDS,
325    XVCVSPUXWS,
326    XVCVSXDDP,
327    XVCVSXDSP,
328    XVCVSXWDP,
329    XVCVSXWSP,
330    XVCVUXDDP,
331    XVCVUXDSP,
332    XVCVUXWDP,
333    XVCVUXWSP,
334    XVMADDADP,
335    XVMADDASP,
336    XVMADDMDP,
337    XVMADDMSP,
338    XVMSUBADP,
339    XVMSUBASP,
340    XVMSUBMDP,
341    XVMSUBMSP,
342    XVMULDP,
343    XVMULSP,
344    XVNMADDADP,
345    XVNMADDASP,
346    XVNMADDMDP,
347    XVNMADDMSP,
348    XVNMSUBADP,
349    XVNMSUBASP,
350    XVNMSUBMDP,
351    XVNMSUBMSP,
352    XVRDPI,
353    XVRDPIC,
354    XVRDPIM,
355    XVRDPIP,
356    XVRDPIZ,
357    XVREDP,
358    XVRESP,
359    XVRSPI,
360    XVRSPIC,
361    XVRSPIM,
362    XVRSPIP,
363    XVRSPIZ,
364    XVRSQRTEDP,
365    XVRSQRTESP,
366    XVSUBDP,
367    XVSUBSP,
368    VCFSX,
369    VCFSX_0,
370    VCFUX,
371    VCFUX_0,
372    VMHRADDSHS,
373    VMLADDUHM,
374    VMSUMMBM,
375    VMSUMSHM,
376    VMSUMSHS,
377    VMSUMUBM,
378    VMSUMUHM,
379    VMSUMUDM,
380    VMSUMUHS,
381    VMULESB,
382    VMULESH,
383    VMULESW,
384    VMULEUB,
385    VMULEUH,
386    VMULEUW,
387    VMULOSB,
388    VMULOSH,
389    VMULOSW,
390    VMULOUB,
391    VMULOUH,
392    VMULOUW,
393    VMULUWM,
394    VSUM2SWS,
395    VSUM4SBS,
396    VSUM4SHS,
397    VSUM4UBS,
398    VSUMSWS
399)>;
400
401// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
402// dispatch units for the superslice.
403def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
404      (instrs
405    (instregex "MADD(HD|HDU|LD|LD8)$"),
406    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
407)>;
408
409// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
410// dispatch units for the superslice.
411def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
412      (instrs
413    FRSP,
414    (instregex "FRI(N|P|Z|M)(D|S)$"),
415    (instregex "FRE(S)?$"),
416    (instregex "FADD(S)?$"),
417    (instregex "FMSUB(S)?$"),
418    (instregex "FMADD(S)?$"),
419    (instregex "FSUB(S)?$"),
420    (instregex "FCFID(U)?(S)?$"),
421    (instregex "FCTID(U)?(Z)?$"),
422    (instregex "FCTIW(U)?(Z)?$"),
423    (instregex "FRSQRTE(S)?$"),
424    FNMADDS,
425    FNMADD,
426    FNMSUBS,
427    FNMSUB,
428    FSELD,
429    FSELS,
430    FMULS,
431    FMUL,
432    XSMADDADP,
433    XSMADDASP,
434    XSMADDMDP,
435    XSMADDMSP,
436    XSMSUBADP,
437    XSMSUBASP,
438    XSMSUBMDP,
439    XSMSUBMSP,
440    XSMULDP,
441    XSMULSP,
442    XSNMADDADP,
443    XSNMADDASP,
444    XSNMADDMDP,
445    XSNMADDMSP,
446    XSNMSUBADP,
447    XSNMSUBASP,
448    XSNMSUBMDP,
449    XSNMSUBMSP
450)>;
451
452// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
453// These operations can be done in parallel. The DP is restricted so we need a
454// full 4 dispatches.
455def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
456              DISP_3SLOTS_1C, DISP_1C],
457      (instrs
458    (instregex "FSEL(D|S)_rec$")
459)>;
460
461// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
462def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
463              DISP_3SLOTS_1C, DISP_1C],
464      (instrs
465    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
466)>;
467
468// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
469// These operations must be done sequentially.The DP is restricted so we need a
470// full 4 dispatches.
471def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
472              DISP_3SLOTS_1C, DISP_1C],
473      (instrs
474    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
475    (instregex "FRE(S)?_rec$"),
476    (instregex "FADD(S)?_rec$"),
477    (instregex "FSUB(S)?_rec$"),
478    (instregex "F(N)?MSUB(S)?_rec$"),
479    (instregex "F(N)?MADD(S)?_rec$"),
480    (instregex "FCFID(U)?(S)?_rec$"),
481    (instregex "FCTID(U)?(Z)?_rec$"),
482    (instregex "FCTIW(U)?(Z)?_rec$"),
483    (instregex "FMUL(S)?_rec$"),
484    (instregex "FRSQRTE(S)?_rec$"),
485    FRSP_rec
486)>;
487
488// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
489def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
490      (instrs
491    XSADDDP,
492    XSADDSP,
493    XSCVDPHP,
494    XSCVDPSP,
495    XSCVDPSXDS,
496    XSCVDPSXDSs,
497    XSCVDPSXWS,
498    XSCVDPUXDS,
499    XSCVDPUXDSs,
500    XSCVDPUXWS,
501    XSCVDPSXWSs,
502    XSCVDPUXWSs,
503    XSCVHPDP,
504    XSCVSPDP,
505    XSCVSXDDP,
506    XSCVSXDSP,
507    XSCVUXDDP,
508    XSCVUXDSP,
509    XSRDPI,
510    XSRDPIC,
511    XSRDPIM,
512    XSRDPIP,
513    XSRDPIZ,
514    XSREDP,
515    XSRESP,
516    XSRSQRTEDP,
517    XSRSQRTESP,
518    XSSUBDP,
519    XSSUBSP,
520    XSCVDPSPN,
521    XSRSP
522)>;
523
524// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
525// superslice. That includes both exec pipelines (EXECO, EXECE) and one
526// dispatch.
527def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
528      (instrs
529    (instregex "LVS(L|R)$"),
530    (instregex "VSPLTIS(W|H|B)$"),
531    (instregex "VSPLT(W|H|B)(s)?$"),
532    (instregex "V_SETALLONES(B|H)?$"),
533    (instregex "VEXTRACTU(B|H|W)$"),
534    (instregex "VINSERT(B|H|W|D)$"),
535    MFVSRLD,
536    MTVSRWS,
537    VBPERMQ,
538    VCLZLSBB,
539    VCTZLSBB,
540    VEXTRACTD,
541    VEXTUBLX,
542    VEXTUBRX,
543    VEXTUHLX,
544    VEXTUHRX,
545    VEXTUWLX,
546    VEXTUWRX,
547    VGBBD,
548    VMRGHB,
549    VMRGHH,
550    VMRGHW,
551    VMRGLB,
552    VMRGLH,
553    VMRGLW,
554    VPERM,
555    VPERMR,
556    VPERMXOR,
557    VPKPX,
558    VPKSDSS,
559    VPKSDUS,
560    VPKSHSS,
561    VPKSHUS,
562    VPKSWSS,
563    VPKSWUS,
564    VPKUDUM,
565    VPKUDUS,
566    VPKUHUM,
567    VPKUHUS,
568    VPKUWUM,
569    VPKUWUS,
570    VPRTYBQ,
571    VSL,
572    VSLDOI,
573    VSLO,
574    VSLV,
575    VSR,
576    VSRO,
577    VSRV,
578    VUPKHPX,
579    VUPKHSB,
580    VUPKHSH,
581    VUPKHSW,
582    VUPKLPX,
583    VUPKLSB,
584    VUPKLSH,
585    VUPKLSW,
586    XXBRD,
587    XXBRH,
588    XXBRQ,
589    XXBRW,
590    XXEXTRACTUW,
591    XXINSERTW,
592    XXMRGHW,
593    XXMRGLW,
594    XXPERM,
595    XXPERMR,
596    XXSLDWI,
597    XXSLDWIs,
598    XXSPLTIB,
599    XXSPLTW,
600    XXSPLTWs,
601    XXPERMDI,
602    XXPERMDIs,
603    VADDCUQ,
604    VADDECUQ,
605    VADDEUQM,
606    VADDUQM,
607    VMUL10CUQ,
608    VMUL10ECUQ,
609    VMUL10EUQ,
610    VMUL10UQ,
611    VSUBCUQ,
612    VSUBECUQ,
613    VSUBEUQM,
614    VSUBUQM,
615    XSCMPEXPQP,
616    XSCMPOQP,
617    XSCMPUQP,
618    XSTSTDCQP,
619    XSXSIGQP,
620    BCDCFN_rec,
621    BCDCFZ_rec,
622    BCDCPSGN_rec,
623    BCDCTN_rec,
624    BCDCTZ_rec,
625    BCDSETSGN_rec,
626    BCDS_rec,
627    BCDTRUNC_rec,
628    BCDUS_rec,
629    BCDUTRUNC_rec,
630    BCDADD_rec,
631    BCDSUB_rec
632)>;
633
634// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
635// superslice. That includes both exec pipelines (EXECO, EXECE) and one
636// dispatch.
637def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
638      (instrs
639    BCDSR_rec,
640    XSADDQP,
641    XSADDQPO,
642    XSCVDPQP,
643    XSCVQPDP,
644    XSCVQPDPO,
645    XSCVQPSDZ,
646    XSCVQPSWZ,
647    XSCVQPUDZ,
648    XSCVQPUWZ,
649    XSCVSDQP,
650    XSCVUDQP,
651    XSRQPI,
652    XSRQPIX,
653    XSRQPXP,
654    XSSUBQP,
655    XSSUBQPO
656)>;
657
658// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
659// superslice. That includes both exec pipelines (EXECO, EXECE) and one
660// dispatch.
661def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
662      (instrs
663    BCDCTSQ_rec
664)>;
665
666// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
667// superslice. That includes both exec pipelines (EXECO, EXECE) and one
668// dispatch.
669def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
670      (instrs
671    XSMADDQP,
672    XSMADDQPO,
673    XSMSUBQP,
674    XSMSUBQPO,
675    XSMULQP,
676    XSMULQPO,
677    XSNMADDQP,
678    XSNMADDQPO,
679    XSNMSUBQP,
680    XSNMSUBQPO
681)>;
682
683// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
684// superslice. That includes both exec pipelines (EXECO, EXECE) and one
685// dispatch.
686def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
687      (instrs
688    BCDCFSQ_rec
689)>;
690
691// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
692// superslice. That includes both exec pipelines (EXECO, EXECE) and one
693// dispatch.
694def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
695      (instrs
696    XSDIVQP,
697    XSDIVQPO
698)>;
699
700// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
701// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
702// dispatches.
703def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
704      (instrs
705    XSSQRTQP,
706    XSSQRTQPO
707)>;
708
709// 6 Cycle Load uses a single slice.
710def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
711      (instrs
712    (instregex "LXVL(L)?")
713)>;
714
715// 5 Cycle Load uses a single slice.
716def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
717      (instrs
718    (instregex "LVE(B|H|W)X$"),
719    (instregex "LVX(L)?"),
720    (instregex "LXSI(B|H)ZX$"),
721    LXSDX,
722    LXVB16X,
723    LXVD2X,
724    LXVWSX,
725    LXSIWZX,
726    LXV,
727    LXVX,
728    LXSD,
729    DFLOADf64,
730    XFLOADf64,
731    LIWZX
732)>;
733
734// 4 Cycle Load uses a single slice.
735def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
736      (instrs
737    (instregex "DCB(F|T|ST)(EP)?$"),
738    (instregex "DCBZ(L)?(EP)?$"),
739    (instregex "DCBTST(EP)?$"),
740    (instregex "CP_COPY(8)?$"),
741    (instregex "ICBI(EP)?$"),
742    (instregex "ICBT(LS)?$"),
743    (instregex "LBARX(L)?$"),
744    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
745    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
746    (instregex "LH(A|B)RX(L)?(8)?$"),
747    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
748    (instregex "LWARX(L)?$"),
749    (instregex "LWBRX(8)?$"),
750    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
751    CP_ABORT,
752    DARN,
753    EnforceIEIO,
754    ISYNC,
755    MSGSYNC,
756    TLBSYNC,
757    SYNC,
758    LMW,
759    LSWI
760)>;
761
762// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
763// superslice.
764def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
765      (instrs
766    LFIWZX,
767    LFDX,
768    (instregex "LFDXTLS?(_)?$"),
769    LFD
770)>;
771
772// Cracked Load Instructions.
773// Load instructions that can be done in parallel.
774def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
775              DISP_PAIR_1C],
776      (instrs
777    SLBIA,
778    SLBIE,
779    SLBMFEE,
780    SLBMFEV,
781    SLBMTE,
782    TLBIEL
783)>;
784
785// Cracked Load Instruction.
786// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
787// operations can be run in parallel.
788def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
789              DISP_PAIR_1C, DISP_PAIR_1C],
790      (instrs
791    (instregex "L(W|H)ZU(X)?(8)?$")
792)>;
793
794// Cracked TEND Instruction.
795// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
796// operations can be run in parallel.
797def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
798              DISP_1C, DISP_1C],
799      (instrs
800    TEND
801)>;
802
803
804// Cracked Store Instruction
805// Consecutive Store and ALU instructions. The store is restricted and requires
806// three dispatches.
807def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
808              DISP_3SLOTS_1C, DISP_1C],
809      (instrs
810    (instregex "ST(B|H|W|D)CX$")
811)>;
812
813// Cracked Load instruction.
814// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
815// operations cannot be done at the same time and so their latencies are added.
816def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
817              DISP_1C, DISP_1C],
818      (instrs
819    (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"),
820    (instregex "CP_PASTE(8)?_rec$"),
821    (instregex "LWA(X)?(TLS)?(_32)?(_)?$"),
822    TCHECK
823)>;
824
825// Cracked Restricted Load instruction.
826// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
827// operations cannot be done at the same time and so their latencies are added.
828// Full 6 dispatches are required as this is both cracked and restricted.
829def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
830              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
831      (instrs
832    LFIWAX
833)>;
834
835// Cracked Load instruction.
836// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
837// operations cannot be done at the same time and so their latencies are added.
838// Full 4 dispatches are required as this is a cracked instruction.
839def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
840      (instrs
841    LXSIWAX,
842    LIWAX
843)>;
844
845// Cracked Load instruction.
846// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
847// cycles. The Load and ALU operations cannot be done at the same time and so
848// their latencies are added.
849// Full 6 dispatches are required as this is a restricted instruction.
850def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
851              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
852      (instrs
853    LFSX,
854    (instregex "LFSXTLS?(_)?$"),
855    LFS
856)>;
857
858// Cracked Load instruction.
859// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
860// operations cannot be done at the same time and so their latencies are added.
861// Full 4 dispatches are required as this is a cracked instruction.
862def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
863      (instrs
864    LXSSP,
865    LXSSPX,
866    XFLOADf32,
867    DFLOADf32
868)>;
869
870// Cracked 3-Way Load Instruction
871// Load with two ALU operations that depend on each other
872def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
873              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
874      (instrs
875    (instregex "LHAU(X)?(8)?$"),
876    LWAUX
877)>;
878
879// Cracked Load that requires the PM resource.
880// Since the Load and the PM cannot be done at the same time the latencies are
881// added. Requires 8 cycles. Since the PM requires the full superslice we need
882// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
883// requires the remaining 1 dispatch.
884def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
885              DISP_1C, DISP_1C],
886      (instrs
887    LXVH8X,
888    LXVDSX,
889    LXVW4X
890)>;
891
892// Single slice Restricted store operation. The restricted operation requires
893// all three dispatches for the superslice.
894def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
895      (instrs
896    (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"),
897    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
898    (instregex "STW(8)?$"),
899    (instregex "(D|X)FSTORE(f32|f64)$"),
900    (instregex "ST(W|H|D)BRX$"),
901    (instregex "ST(B|H|D)(8)?$"),
902    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
903    STIWX,
904    SLBIEG,
905    STMW,
906    STSWI,
907    TLBIE
908)>;
909
910// Vector Store Instruction
911// Requires the whole superslice and therefore requires one dispatch
912// as well as both the Even and Odd exec pipelines.
913def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
914      (instrs
915    (instregex "STVE(B|H|W)X$"),
916    (instregex "STVX(L)?$"),
917    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
918)>;
919
920// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
921// superslice. That includes both exec pipelines (EXECO, EXECE) and two
922// dispatches.
923def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
924      (instrs
925    (instregex "MTCTR(8)?(loop)?$"),
926    (instregex "MTLR(8)?$")
927)>;
928
929// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
930// superslice. That includes both exec pipelines (EXECO, EXECE) and two
931// dispatches.
932def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
933      (instrs
934    (instregex "M(T|F)VRSAVE(v)?$"),
935    (instregex "M(T|F)PMR$"),
936    (instregex "M(T|F)TB(8)?$"),
937    (instregex "MF(SPR|CTR|LR)(8)?$"),
938    (instregex "M(T|F)MSR(D)?$"),
939    (instregex "M(T|F)(U)?DSCR$"),
940    (instregex "MTSPR(8)?$")
941)>;
942
943// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
944// superslice. That includes both exec pipelines (EXECO, EXECE) and two
945// dispatches.
946def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
947      (instrs
948    DIVW,
949    DIVWO,
950    DIVWU,
951    DIVWUO,
952    MODSW
953)>;
954
955// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
956// superslice. That includes both exec pipelines (EXECO, EXECE) and two
957// dispatches.
958def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
959      (instrs
960    DIVWE,
961    DIVWEO,
962    DIVD,
963    DIVDO,
964    DIVWEU,
965    DIVWEUO,
966    DIVDU,
967    DIVDUO,
968    MODSD,
969    MODUD,
970    MODUW
971)>;
972
973// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
974// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
975// dispatches.
976def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
977      (instrs
978    DIVDE,
979    DIVDEO,
980    DIVDEU,
981    DIVDEUO
982)>;
983
984// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
985// and one full superslice for the DIV operation since there is only one DIV per
986// superslice. Latency of DIV plus ALU is 26.
987def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
988              DISP_EVEN_1C, DISP_1C],
989      (instrs
990    (instregex "DIVW(U)?(O)?_rec$")
991)>;
992
993// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
994// and one full superslice for the DIV operation since there is only one DIV per
995// superslice. Latency of DIV plus ALU is 26.
996def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
997              DISP_EVEN_1C, DISP_1C],
998      (instrs
999    DIVD_rec,
1000    DIVDO_rec,
1001    DIVDU_rec,
1002    DIVDUO_rec,
1003    DIVWE_rec,
1004    DIVWEO_rec,
1005    DIVWEU_rec,
1006    DIVWEUO_rec
1007)>;
1008
1009// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1010// and one full superslice for the DIV operation since there is only one DIV per
1011// superslice. Latency of DIV plus ALU is 42.
1012def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1013              DISP_EVEN_1C, DISP_1C],
1014      (instrs
1015    DIVDE_rec,
1016    DIVDEO_rec,
1017    DIVDEU_rec,
1018    DIVDEUO_rec
1019)>;
1020
1021// CR access instructions in _BrMCR, IIC_BrMCRX.
1022
1023// Cracked, restricted, ALU operations.
1024// Here the two ALU ops can actually be done in parallel and therefore the
1025// latencies are not added together. Otherwise this is like having two
1026// instructions running together on two pipelines and 6 dispatches. ALU ops are
1027// 2 cycles each.
1028def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1029              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1030      (instrs
1031    MTCRF,
1032    MTCRF8
1033)>;
1034
1035// Cracked ALU operations.
1036// Here the two ALU ops can actually be done in parallel and therefore the
1037// latencies are not added together. Otherwise this is like having two
1038// instructions running together on two pipelines and 2 dispatches. ALU ops are
1039// 2 cycles each.
1040def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1041              DISP_1C, DISP_1C],
1042      (instrs
1043    (instregex "ADDC(8)?(O)?_rec$"),
1044    (instregex "SUBFC(8)?(O)?_rec$")
1045)>;
1046
1047// Cracked ALU operations.
1048// Two ALU ops can be done in parallel.
1049// One is three cycle ALU the ohter is a two cycle ALU.
1050// One of the ALU ops is restricted the other is not so we have a total of
1051// 5 dispatches.
1052def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1053              DISP_3SLOTS_1C, DISP_1C],
1054      (instrs
1055    (instregex "F(N)?ABS(D|S)_rec$"),
1056    (instregex "FCPSGN(D|S)_rec$"),
1057    (instregex "FNEG(D|S)_rec$"),
1058    FMR_rec
1059)>;
1060
1061// Cracked ALU operations.
1062// Here the two ALU ops can actually be done in parallel and therefore the
1063// latencies are not added together. Otherwise this is like having two
1064// instructions running together on two pipelines and 2 dispatches.
1065// ALU ops are 3 cycles each.
1066def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1067              DISP_1C, DISP_1C],
1068      (instrs
1069    MCRFS
1070)>;
1071
1072// Cracked Restricted ALU operations.
1073// Here the two ALU ops can actually be done in parallel and therefore the
1074// latencies are not added together. Otherwise this is like having two
1075// instructions running together on two pipelines and 6 dispatches.
1076// ALU ops are 3 cycles each.
1077def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1078              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1079      (instrs
1080    (instregex "MTFSF(b|_rec)?$"),
1081    (instregex "MTFSFI(_rec)?$"),
1082    MTFSFIb
1083)>;
1084
1085// Cracked instruction made of two ALU ops.
1086// The two ops cannot be done in parallel.
1087// One of the ALU ops is restricted and takes 3 dispatches.
1088def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1089              DISP_3SLOTS_1C, DISP_1C],
1090      (instrs
1091    (instregex "RLD(I)?C(R|L)_rec$"),
1092    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1093    (instregex "SLW(8)?_rec$"),
1094    (instregex "SRAW(I)?_rec$"),
1095    (instregex "SRW(8)?_rec$"),
1096    RLDICL_32_rec,
1097    RLDIMI_rec
1098)>;
1099
1100// Cracked instruction made of two ALU ops.
1101// The two ops cannot be done in parallel.
1102// Both of the ALU ops are restricted and take 3 dispatches.
1103def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1104              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1105      (instrs
1106    (instregex "MFFS(L|CE|_rec)?$")
1107)>;
1108
1109// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1110// total of 6 cycles. All of the ALU operations are also restricted so each
1111// takes 3 dispatches for a total of 9.
1112def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1113              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1114      (instrs
1115    (instregex "MFCR(8)?$")
1116)>;
1117
1118// Cracked instruction made of two ALU ops.
1119// The two ops cannot be done in parallel.
1120def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1121      (instrs
1122    (instregex "EXTSWSLI_32_64_rec$"),
1123    (instregex "SRAD(I)?_rec$"),
1124    EXTSWSLI_rec,
1125    SLD_rec,
1126    SRD_rec,
1127    RLDIC_rec
1128)>;
1129
1130// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1131def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1132      (instrs
1133    FDIV
1134)>;
1135
1136// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1137def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1138              DISP_3SLOTS_1C, DISP_1C],
1139      (instrs
1140    FDIV_rec
1141)>;
1142
1143// 36 Cycle DP Instruction.
1144// Instruction can be done on a single slice.
1145def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1146      (instrs
1147    XSSQRTDP
1148)>;
1149
1150// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1151def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1152      (instrs
1153    FSQRT
1154)>;
1155
1156// 36 Cycle DP Vector Instruction.
1157def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1158              DISP_1C],
1159      (instrs
1160    XVSQRTDP
1161)>;
1162
1163// 27 Cycle DP Vector Instruction.
1164def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1165              DISP_1C],
1166      (instrs
1167    XVSQRTSP
1168)>;
1169
1170// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1171def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1172              DISP_3SLOTS_1C, DISP_1C],
1173      (instrs
1174    FSQRT_rec
1175)>;
1176
1177// 26 Cycle DP Instruction.
1178def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1179      (instrs
1180    XSSQRTSP
1181)>;
1182
1183// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1184def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1185      (instrs
1186    FSQRTS
1187)>;
1188
1189// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1190def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1191              DISP_3SLOTS_1C, DISP_1C],
1192      (instrs
1193    FSQRTS_rec
1194)>;
1195
1196// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1197def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1198      (instrs
1199    XSDIVDP
1200)>;
1201
1202// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1203def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1204      (instrs
1205    FDIVS
1206)>;
1207
1208// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1209def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1210              DISP_3SLOTS_1C, DISP_1C],
1211      (instrs
1212    FDIVS_rec
1213)>;
1214
1215// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1216def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1217      (instrs
1218    XSDIVSP
1219)>;
1220
1221// 24 Cycle DP Vector Instruction. Takes one full superslice.
1222// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1223// superslice.
1224def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1225              DISP_1C],
1226      (instrs
1227    XVDIVSP
1228)>;
1229
1230// 33 Cycle DP Vector Instruction. Takes one full superslice.
1231// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1232// superslice.
1233def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1234              DISP_1C],
1235      (instrs
1236    XVDIVDP
1237)>;
1238
1239// Instruction cracked into three pieces. One Load and two ALU operations.
1240// The Load and one of the ALU ops cannot be run at the same time and so the
1241// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1242// Both the load and the ALU that depends on it are restricted and so they take
1243// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1244// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1245def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1246              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1247              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1248      (instrs
1249    (instregex "LF(SU|SUX)$")
1250)>;
1251
1252// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1253// the store and so it can be run at the same time as the store. The store is
1254// also restricted.
1255def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1256              DISP_3SLOTS_1C, DISP_1C],
1257      (instrs
1258    (instregex "STF(S|D)U(X)?$"),
1259    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1260)>;
1261
1262// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1263// the load and so it can be run at the same time as the load.
1264def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1265              DISP_PAIR_1C, DISP_PAIR_1C],
1266      (instrs
1267    (instregex "LBZU(X)?(8)?$"),
1268    (instregex "LDU(X)?$")
1269)>;
1270
1271// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1272// the load and so it can be run at the same time as the load. The load is also
1273// restricted. 3 dispatches are from the restricted load while the other two
1274// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1275// is required for the ALU.
1276def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1277              DISP_3SLOTS_1C, DISP_1C],
1278      (instrs
1279    (instregex "LF(DU|DUX)$")
1280)>;
1281
1282// Crypto Instructions
1283
1284// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1285// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1286// dispatch.
1287def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1288      (instrs
1289    (instregex "VPMSUM(B|H|W|D)$"),
1290    (instregex "V(N)?CIPHER(LAST)?$"),
1291    VSBOX
1292)>;
1293
1294// Branch Instructions
1295
1296// Two Cycle Branch
1297def : InstRW<[P9_BR_2C, DISP_BR_1C],
1298      (instrs
1299  (instregex "BCCCTR(L)?(8)?$"),
1300  (instregex "BCCL(A|R|RL)?$"),
1301  (instregex "BCCTR(L)?(8)?(n)?$"),
1302  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1303  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1304  (instregex "BL(_TLS|_NOP)?(_RM)?$"),
1305  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
1306  (instregex "BLA(8|8_NOP)?(_RM)?$"),
1307  (instregex "BLR(8|L)?$"),
1308  (instregex "TAILB(A)?(8)?$"),
1309  (instregex "TAILBCTR(8)?$"),
1310  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1311  (instregex "BCLR(L)?(n)?$"),
1312  (instregex "BCTR(L)?(8)?(_RM)?$"),
1313  B,
1314  BA,
1315  BC,
1316  BCC,
1317  BCCA,
1318  BCL,
1319  BCLalways,
1320  BCLn,
1321  BCTRL8_LDinto_toc,
1322  BCTRL_LWZinto_toc,
1323  BCTRL8_LDinto_toc_RM,
1324  BCTRL_LWZinto_toc_RM,
1325  BCn,
1326  CTRL_DEP
1327)>;
1328
1329// Five Cycle Branch with a 2 Cycle ALU Op
1330// Operations must be done consecutively and not in parallel.
1331def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1332      (instrs
1333    ADDPCIS
1334)>;
1335
1336// Special Extracted Instructions For Atomics
1337
1338// Atomic Load
1339def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1340              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1341              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1342              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1343      (instrs
1344    (instregex "L(D|W)AT$")
1345)>;
1346
1347// Atomic Store
1348def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1349              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1350      (instrs
1351    (instregex "ST(D|W)AT$")
1352)>;
1353
1354// Signal Processing Engine (SPE) Instructions
1355// These instructions are not supported on Power 9
1356def : InstRW<[],
1357    (instrs
1358  BRINC,
1359  EVABS,
1360  EVEQV,
1361  EVMRA,
1362  EVNAND,
1363  EVNEG,
1364  (instregex "EVADD(I)?W$"),
1365  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1366  (instregex "EVAND(C)?$"),
1367  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1368  (instregex "EVCNTL(S|Z)W$"),
1369  (instregex "EVDIVW(S|U)$"),
1370  (instregex "EVEXTS(B|H)$"),
1371  (instregex "EVLD(H|W|D)(X)?$"),
1372  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1373  (instregex "EVLWHE(X)?$"),
1374  (instregex "EVLWHO(S|U)(X)?$"),
1375  (instregex "EVLW(H|W)SPLAT(X)?$"),
1376  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1377  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1378  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1379  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1380  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1381  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1382  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1383  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1384  (instregex "EVMWHUMI(A)?$"),
1385  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1386  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1387  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1388  (instregex "EVMWSSF(A|AA|AN)?$"),
1389  (instregex "EVMWUMI(A|AA|AN)?$"),
1390  (instregex "EV(N|X)?OR(C)?$"),
1391  (instregex "EVR(LW|LWI|NDW)$"),
1392  (instregex "EVSLW(I)?$"),
1393  (instregex "EVSPLAT(F)?I$"),
1394  (instregex "EVSRW(I)?(S|U)$"),
1395  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1396  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1397  (instregex "EVSUB(I)?FW$")
1398)> { let Unsupported = 1; }
1399
1400// General Instructions without scheduling support.
1401def : InstRW<[],
1402    (instrs
1403  (instregex "(H)?RFI(D)?$"),
1404  (instregex "DSS(ALL)?$"),
1405  (instregex "DST(ST)?(T)?(64)?$"),
1406  (instregex "ICBL(C|Q)$"),
1407  (instregex "L(W|H|B)EPX$"),
1408  (instregex "ST(W|H|B)EPX$"),
1409  (instregex "(L|ST)FDEPX$"),
1410  (instregex "M(T|F)SR(IN)?$"),
1411  (instregex "M(T|F)DCR$"),
1412  (instregex "NOP_GT_PWR(6|7)$"),
1413  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1414  (instregex "WRTEE(I)?$"),
1415  (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
1416  ATTN,
1417  CLRBHRB,
1418  MFBHRBE,
1419  MBAR,
1420  MSYNC,
1421  SLBSYNC,
1422  SLBFEE_rec,
1423  NAP,
1424  STOP,
1425  TRAP,
1426  RFCI,
1427  RFDI,
1428  RFMCI,
1429  SC,
1430  DCBA,
1431  DCBI,
1432  DCCCI,
1433  ICCCI,
1434  ADDEX,
1435  ADDEX8,
1436  CDTBCD, CDTBCD8,
1437  CBCDTD, CBCDTD8,
1438  ADDG6S, ADDG6S8
1439)> { let Unsupported = 1; }
1440