1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "POPCNTB8$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
145    (instregex "NAND(8)?(_rec)?$"),
146    (instregex "AND(C)?(8)?(_rec)?$"),
147    (instregex "NOR(8)?(_rec)?$"),
148    (instregex "OR(C)?(8)?(_rec)?$"),
149    (instregex "EQV(8)?(_rec)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?(O)?$"),
153    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
154    (instregex "LA(8)?$"),
155    COPY,
156    MCRF,
157    MCRXRX,
158    XSNABSDP,
159    XSXEXPDP,
160    XSABSDP,
161    XSNEGDP,
162    XSCPSGNDP,
163    MFVSRWZ,
164    MFVRWZ,
165    EXTSWSLI,
166    SRADI_32,
167    RLDIC,
168    RFEBB,
169    TBEGIN,
170    TRECHKPT,
171    NOP,
172    WAIT
173)>;
174
175// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
176// single slice. However, since it is Restricted, it requires all 3 dispatches
177// (DISP) for that superslice.
178def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
179      (instrs
180    (instregex "RLDC(L|R)$"),
181    (instregex "RLWIMI(8)?$"),
182    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
183    (instregex "M(F|T)OCRF(8)?$"),
184    (instregex "CR(6)?(UN)?SET$"),
185    (instregex "CR(N)?(OR|AND)(C)?$"),
186    (instregex "S(L|R)W(8)?$"),
187    (instregex "RLW(INM|NM)(8)?$"),
188    (instregex "F(N)?ABS(D|S)$"),
189    (instregex "FNEG(D|S)$"),
190    (instregex "FCPSGN(D|S)$"),
191    (instregex "SRAW(I)?$"),
192    (instregex "ISEL(8)?$"),
193    RLDIMI,
194    XSIEXPDP,
195    FMR,
196    CREQV,
197    CRXOR,
198    TRECLAIM,
199    TSR,
200    TABORT
201)>;
202
203// Three cycle ALU vector operation that uses an entire superslice.
204// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
205// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
206def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
207      (instrs
208    (instregex "M(T|F)VSCR$"),
209    (instregex "VCMPNEZ(B|H|W)$"),
210    (instregex "VCMPEQU(B|H|W|D)$"),
211    (instregex "VCMPNE(B|H|W)$"),
212    (instregex "VABSDU(B|H|W)$"),
213    (instregex "VADDU(B|H|W)S$"),
214    (instregex "VAVG(S|U)(B|H|W)$"),
215    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
216    (instregex "VCMPBFP(_rec)?$"),
217    (instregex "VC(L|T)Z(B|H|W|D)$"),
218    (instregex "VADDS(B|H|W)S$"),
219    (instregex "V(MIN|MAX)FP$"),
220    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
221    VBPERMD,
222    VADDCUW,
223    VPOPCNTW,
224    VPOPCNTD,
225    VPRTYBD,
226    VPRTYBW,
227    VSHASIGMAD,
228    VSHASIGMAW,
229    VSUBSBS,
230    VSUBSHS,
231    VSUBSWS,
232    VSUBUBS,
233    VSUBUHS,
234    VSUBUWS,
235    VSUBCUW,
236    VCMPGTSB,
237    VCMPGTSB_rec,
238    VCMPGTSD,
239    VCMPGTSD_rec,
240    VCMPGTSH,
241    VCMPGTSH_rec,
242    VCMPGTSW,
243    VCMPGTSW_rec,
244    VCMPGTUB,
245    VCMPGTUB_rec,
246    VCMPGTUD,
247    VCMPGTUD_rec,
248    VCMPGTUH,
249    VCMPGTUH_rec,
250    VCMPGTUW,
251    VCMPGTUW_rec,
252    VCMPNEB_rec,
253    VCMPNEH_rec,
254    VCMPNEW_rec,
255    VCMPNEZB_rec,
256    VCMPNEZH_rec,
257    VCMPNEZW_rec,
258    VCMPEQUB_rec,
259    VCMPEQUD_rec,
260    VCMPEQUH_rec,
261    VCMPEQUW_rec,
262    XVCMPEQDP,
263    XVCMPEQDP_rec,
264    XVCMPEQSP,
265    XVCMPEQSP_rec,
266    XVCMPGEDP,
267    XVCMPGEDP_rec,
268    XVCMPGESP,
269    XVCMPGESP_rec,
270    XVCMPGTDP,
271    XVCMPGTDP_rec,
272    XVCMPGTSP,
273    XVCMPGTSP_rec,
274    XVMAXDP,
275    XVMAXSP,
276    XVMINDP,
277    XVMINSP,
278    XVTDIVDP,
279    XVTDIVSP,
280    XVTSQRTDP,
281    XVTSQRTSP,
282    XVTSTDCDP,
283    XVTSTDCSP,
284    XVXSIGDP,
285    XVXSIGSP
286)>;
287
288// 7 cycle DP vector operation that uses an entire superslice.
289// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
290// EXECO) and all three dispatches (DISP) to the given superslice.
291def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
292      (instrs
293    VADDFP,
294    VCTSXS,
295    VCTSXS_0,
296    VCTUXS,
297    VCTUXS_0,
298    VEXPTEFP,
299    VLOGEFP,
300    VMADDFP,
301    VMHADDSHS,
302    VNMSUBFP,
303    VREFP,
304    VRFIM,
305    VRFIN,
306    VRFIP,
307    VRFIZ,
308    VRSQRTEFP,
309    VSUBFP,
310    XVADDDP,
311    XVADDSP,
312    XVCVDPSP,
313    XVCVDPSXDS,
314    XVCVDPSXWS,
315    XVCVDPUXDS,
316    XVCVDPUXWS,
317    XVCVHPSP,
318    XVCVSPDP,
319    XVCVSPHP,
320    XVCVSPSXDS,
321    XVCVSPSXWS,
322    XVCVSPUXDS,
323    XVCVSPUXWS,
324    XVCVSXDDP,
325    XVCVSXDSP,
326    XVCVSXWDP,
327    XVCVSXWSP,
328    XVCVUXDDP,
329    XVCVUXDSP,
330    XVCVUXWDP,
331    XVCVUXWSP,
332    XVMADDADP,
333    XVMADDASP,
334    XVMADDMDP,
335    XVMADDMSP,
336    XVMSUBADP,
337    XVMSUBASP,
338    XVMSUBMDP,
339    XVMSUBMSP,
340    XVMULDP,
341    XVMULSP,
342    XVNMADDADP,
343    XVNMADDASP,
344    XVNMADDMDP,
345    XVNMADDMSP,
346    XVNMSUBADP,
347    XVNMSUBASP,
348    XVNMSUBMDP,
349    XVNMSUBMSP,
350    XVRDPI,
351    XVRDPIC,
352    XVRDPIM,
353    XVRDPIP,
354    XVRDPIZ,
355    XVREDP,
356    XVRESP,
357    XVRSPI,
358    XVRSPIC,
359    XVRSPIM,
360    XVRSPIP,
361    XVRSPIZ,
362    XVRSQRTEDP,
363    XVRSQRTESP,
364    XVSUBDP,
365    XVSUBSP,
366    VCFSX,
367    VCFSX_0,
368    VCFUX,
369    VCFUX_0,
370    VMHRADDSHS,
371    VMLADDUHM,
372    VMSUMMBM,
373    VMSUMSHM,
374    VMSUMSHS,
375    VMSUMUBM,
376    VMSUMUHM,
377    VMSUMUDM,
378    VMSUMUHS,
379    VMULESB,
380    VMULESH,
381    VMULESW,
382    VMULEUB,
383    VMULEUH,
384    VMULEUW,
385    VMULOSB,
386    VMULOSH,
387    VMULOSW,
388    VMULOUB,
389    VMULOUH,
390    VMULOUW,
391    VMULUWM,
392    VSUM2SWS,
393    VSUM4SBS,
394    VSUM4SHS,
395    VSUM4UBS,
396    VSUMSWS
397)>;
398
399// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
400// dispatch units for the superslice.
401def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
402      (instrs
403    (instregex "MADD(HD|HDU|LD|LD8)$"),
404    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
405)>;
406
407// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
408// dispatch units for the superslice.
409def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
410      (instrs
411    FRSP,
412    (instregex "FRI(N|P|Z|M)(D|S)$"),
413    (instregex "FRE(S)?$"),
414    (instregex "FADD(S)?$"),
415    (instregex "FMSUB(S)?$"),
416    (instregex "FMADD(S)?$"),
417    (instregex "FSUB(S)?$"),
418    (instregex "FCFID(U)?(S)?$"),
419    (instregex "FCTID(U)?(Z)?$"),
420    (instregex "FCTIW(U)?(Z)?$"),
421    (instregex "FRSQRTE(S)?$"),
422    FNMADDS,
423    FNMADD,
424    FNMSUBS,
425    FNMSUB,
426    FSELD,
427    FSELS,
428    FMULS,
429    FMUL,
430    XSMADDADP,
431    XSMADDASP,
432    XSMADDMDP,
433    XSMADDMSP,
434    XSMSUBADP,
435    XSMSUBASP,
436    XSMSUBMDP,
437    XSMSUBMSP,
438    XSMULDP,
439    XSMULSP,
440    XSNMADDADP,
441    XSNMADDASP,
442    XSNMADDMDP,
443    XSNMADDMSP,
444    XSNMSUBADP,
445    XSNMSUBASP,
446    XSNMSUBMDP,
447    XSNMSUBMSP
448)>;
449
450// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
451// These operations can be done in parallel. The DP is restricted so we need a
452// full 4 dispatches.
453def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
454              DISP_3SLOTS_1C, DISP_1C],
455      (instrs
456    (instregex "FSEL(D|S)_rec$")
457)>;
458
459// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
460def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
461              DISP_3SLOTS_1C, DISP_1C],
462      (instrs
463    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
464)>;
465
466// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
467// These operations must be done sequentially.The DP is restricted so we need a
468// full 4 dispatches.
469def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
470              DISP_3SLOTS_1C, DISP_1C],
471      (instrs
472    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
473    (instregex "FRE(S)?_rec$"),
474    (instregex "FADD(S)?_rec$"),
475    (instregex "FSUB(S)?_rec$"),
476    (instregex "F(N)?MSUB(S)?_rec$"),
477    (instregex "F(N)?MADD(S)?_rec$"),
478    (instregex "FCFID(U)?(S)?_rec$"),
479    (instregex "FCTID(U)?(Z)?_rec$"),
480    (instregex "FCTIW(U)?(Z)?_rec$"),
481    (instregex "FMUL(S)?_rec$"),
482    (instregex "FRSQRTE(S)?_rec$"),
483    FRSP_rec
484)>;
485
486// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
487def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
488      (instrs
489    XSADDDP,
490    XSADDSP,
491    XSCVDPHP,
492    XSCVDPSP,
493    XSCVDPSXDS,
494    XSCVDPSXDSs,
495    XSCVDPSXWS,
496    XSCVDPUXDS,
497    XSCVDPUXDSs,
498    XSCVDPUXWS,
499    XSCVDPSXWSs,
500    XSCVDPUXWSs,
501    XSCVHPDP,
502    XSCVSPDP,
503    XSCVSXDDP,
504    XSCVSXDSP,
505    XSCVUXDDP,
506    XSCVUXDSP,
507    XSRDPI,
508    XSRDPIC,
509    XSRDPIM,
510    XSRDPIP,
511    XSRDPIZ,
512    XSREDP,
513    XSRESP,
514    XSRSQRTEDP,
515    XSRSQRTESP,
516    XSSUBDP,
517    XSSUBSP,
518    XSCVDPSPN,
519    XSRSP
520)>;
521
522// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
523// superslice. That includes both exec pipelines (EXECO, EXECE) and one
524// dispatch.
525def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
526      (instrs
527    (instregex "LVS(L|R)$"),
528    (instregex "VSPLTIS(W|H|B)$"),
529    (instregex "VSPLT(W|H|B)(s)?$"),
530    (instregex "V_SETALLONES(B|H)?$"),
531    (instregex "VEXTRACTU(B|H|W)$"),
532    (instregex "VINSERT(B|H|W|D)$"),
533    MFVSRLD,
534    MTVSRWS,
535    VBPERMQ,
536    VCLZLSBB,
537    VCTZLSBB,
538    VEXTRACTD,
539    VEXTUBLX,
540    VEXTUBRX,
541    VEXTUHLX,
542    VEXTUHRX,
543    VEXTUWLX,
544    VEXTUWRX,
545    VGBBD,
546    VMRGHB,
547    VMRGHH,
548    VMRGHW,
549    VMRGLB,
550    VMRGLH,
551    VMRGLW,
552    VPERM,
553    VPERMR,
554    VPERMXOR,
555    VPKPX,
556    VPKSDSS,
557    VPKSDUS,
558    VPKSHSS,
559    VPKSHUS,
560    VPKSWSS,
561    VPKSWUS,
562    VPKUDUM,
563    VPKUDUS,
564    VPKUHUM,
565    VPKUHUS,
566    VPKUWUM,
567    VPKUWUS,
568    VPRTYBQ,
569    VSL,
570    VSLDOI,
571    VSLO,
572    VSLV,
573    VSR,
574    VSRO,
575    VSRV,
576    VUPKHPX,
577    VUPKHSB,
578    VUPKHSH,
579    VUPKHSW,
580    VUPKLPX,
581    VUPKLSB,
582    VUPKLSH,
583    VUPKLSW,
584    XXBRD,
585    XXBRH,
586    XXBRQ,
587    XXBRW,
588    XXEXTRACTUW,
589    XXINSERTW,
590    XXMRGHW,
591    XXMRGLW,
592    XXPERM,
593    XXPERMR,
594    XXSLDWI,
595    XXSLDWIs,
596    XXSPLTIB,
597    XXSPLTW,
598    XXSPLTWs,
599    XXPERMDI,
600    XXPERMDIs,
601    VADDCUQ,
602    VADDECUQ,
603    VADDEUQM,
604    VADDUQM,
605    VMUL10CUQ,
606    VMUL10ECUQ,
607    VMUL10EUQ,
608    VMUL10UQ,
609    VSUBCUQ,
610    VSUBECUQ,
611    VSUBEUQM,
612    VSUBUQM,
613    XSCMPEXPQP,
614    XSCMPOQP,
615    XSCMPUQP,
616    XSTSTDCQP,
617    XSXSIGQP,
618    BCDCFN_rec,
619    BCDCFZ_rec,
620    BCDCPSGN_rec,
621    BCDCTN_rec,
622    BCDCTZ_rec,
623    BCDSETSGN_rec,
624    BCDS_rec,
625    BCDTRUNC_rec,
626    BCDUS_rec,
627    BCDUTRUNC_rec,
628    BCDADD_rec,
629    BCDSUB_rec
630)>;
631
632// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
633// superslice. That includes both exec pipelines (EXECO, EXECE) and one
634// dispatch.
635def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
636      (instrs
637    BCDSR_rec,
638    XSADDQP,
639    XSADDQPO,
640    XSCVDPQP,
641    XSCVQPDP,
642    XSCVQPDPO,
643    XSCVQPSDZ,
644    XSCVQPSWZ,
645    XSCVQPUDZ,
646    XSCVQPUWZ,
647    XSCVSDQP,
648    XSCVUDQP,
649    XSRQPI,
650    XSRQPIX,
651    XSRQPXP,
652    XSSUBQP,
653    XSSUBQPO
654)>;
655
656// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
657// superslice. That includes both exec pipelines (EXECO, EXECE) and one
658// dispatch.
659def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
660      (instrs
661    BCDCTSQ_rec
662)>;
663
664// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
665// superslice. That includes both exec pipelines (EXECO, EXECE) and one
666// dispatch.
667def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
668      (instrs
669    XSMADDQP,
670    XSMADDQPO,
671    XSMSUBQP,
672    XSMSUBQPO,
673    XSMULQP,
674    XSMULQPO,
675    XSNMADDQP,
676    XSNMADDQPO,
677    XSNMSUBQP,
678    XSNMSUBQPO
679)>;
680
681// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
682// superslice. That includes both exec pipelines (EXECO, EXECE) and one
683// dispatch.
684def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
685      (instrs
686    BCDCFSQ_rec
687)>;
688
689// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
690// superslice. That includes both exec pipelines (EXECO, EXECE) and one
691// dispatch.
692def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
693      (instrs
694    XSDIVQP,
695    XSDIVQPO
696)>;
697
698// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
699// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
700// dispatches.
701def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
702      (instrs
703    XSSQRTQP,
704    XSSQRTQPO
705)>;
706
707// 6 Cycle Load uses a single slice.
708def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
709      (instrs
710    (instregex "LXVL(L)?")
711)>;
712
713// 5 Cycle Load uses a single slice.
714def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
715      (instrs
716    (instregex "LVE(B|H|W)X$"),
717    (instregex "LVX(L)?"),
718    (instregex "LXSI(B|H)ZX$"),
719    LXSDX,
720    LXVB16X,
721    LXVD2X,
722    LXVWSX,
723    LXSIWZX,
724    LXV,
725    LXVX,
726    LXSD,
727    DFLOADf64,
728    XFLOADf64,
729    LIWZX
730)>;
731
732// 4 Cycle Load uses a single slice.
733def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
734      (instrs
735    (instregex "DCB(F|T|ST)(EP)?$"),
736    (instregex "DCBZ(L)?(EP)?$"),
737    (instregex "DCBTST(EP)?$"),
738    (instregex "CP_COPY(8)?$"),
739    (instregex "ICBI(EP)?$"),
740    (instregex "ICBT(LS)?$"),
741    (instregex "LBARX(L)?$"),
742    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
743    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
744    (instregex "LH(A|B)RX(L)?(8)?$"),
745    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
746    (instregex "LWARX(L)?$"),
747    (instregex "LWBRX(8)?$"),
748    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
749    CP_ABORT,
750    DARN,
751    EnforceIEIO,
752    ISYNC,
753    MSGSYNC,
754    TLBSYNC,
755    SYNC,
756    LMW,
757    LSWI
758)>;
759
760// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
761// superslice.
762def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
763      (instrs
764    LFIWZX,
765    LFDX,
766    LFD
767)>;
768
769// Cracked Load Instructions.
770// Load instructions that can be done in parallel.
771def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
772              DISP_PAIR_1C],
773      (instrs
774    SLBIA,
775    SLBIE,
776    SLBMFEE,
777    SLBMFEV,
778    SLBMTE,
779    TLBIEL
780)>;
781
782// Cracked Load Instruction.
783// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
784// operations can be run in parallel.
785def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
786              DISP_PAIR_1C, DISP_PAIR_1C],
787      (instrs
788    (instregex "L(W|H)ZU(X)?(8)?$")
789)>;
790
791// Cracked TEND Instruction.
792// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
793// operations can be run in parallel.
794def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
795              DISP_1C, DISP_1C],
796      (instrs
797    TEND
798)>;
799
800
801// Cracked Store Instruction
802// Consecutive Store and ALU instructions. The store is restricted and requires
803// three dispatches.
804def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
805              DISP_3SLOTS_1C, DISP_1C],
806      (instrs
807    (instregex "ST(B|H|W|D)CX$")
808)>;
809
810// Cracked Load Instruction.
811// Two consecutive load operations for a total of 8 cycles.
812def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
813              DISP_1C, DISP_1C],
814      (instrs
815    LDMX
816)>;
817
818// Cracked Load instruction.
819// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
820// operations cannot be done at the same time and so their latencies are added.
821def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
822              DISP_1C, DISP_1C],
823      (instrs
824    (instregex "LHA(X)?(8)?$"),
825    (instregex "CP_PASTE(8)?_rec$"),
826    (instregex "LWA(X)?(_32)?$"),
827    TCHECK
828)>;
829
830// Cracked Restricted Load instruction.
831// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
832// operations cannot be done at the same time and so their latencies are added.
833// Full 6 dispatches are required as this is both cracked and restricted.
834def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
835              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
836      (instrs
837    LFIWAX
838)>;
839
840// Cracked Load instruction.
841// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
842// operations cannot be done at the same time and so their latencies are added.
843// Full 4 dispatches are required as this is a cracked instruction.
844def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
845      (instrs
846    LXSIWAX,
847    LIWAX
848)>;
849
850// Cracked Load instruction.
851// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
852// cycles. The Load and ALU operations cannot be done at the same time and so
853// their latencies are added.
854// Full 6 dispatches are required as this is a restricted instruction.
855def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
856              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
857      (instrs
858    LFSX,
859    LFS
860)>;
861
862// Cracked Load instruction.
863// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
864// operations cannot be done at the same time and so their latencies are added.
865// Full 4 dispatches are required as this is a cracked instruction.
866def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
867      (instrs
868    LXSSP,
869    LXSSPX,
870    XFLOADf32,
871    DFLOADf32
872)>;
873
874// Cracked 3-Way Load Instruction
875// Load with two ALU operations that depend on each other
876def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
877              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
878      (instrs
879    (instregex "LHAU(X)?(8)?$"),
880    LWAUX
881)>;
882
883// Cracked Load that requires the PM resource.
884// Since the Load and the PM cannot be done at the same time the latencies are
885// added. Requires 8 cycles. Since the PM requires the full superslice we need
886// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
887// requires the remaining 1 dispatch.
888def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
889              DISP_1C, DISP_1C],
890      (instrs
891    LXVH8X,
892    LXVDSX,
893    LXVW4X
894)>;
895
896// Single slice Restricted store operation. The restricted operation requires
897// all three dispatches for the superslice.
898def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
899      (instrs
900    (instregex "STF(S|D|IWX|SX|DX)$"),
901    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
902    (instregex "STW(8)?$"),
903    (instregex "(D|X)FSTORE(f32|f64)$"),
904    (instregex "ST(W|H|D)BRX$"),
905    (instregex "ST(B|H|D)(8)?$"),
906    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
907    STIWX,
908    SLBIEG,
909    STMW,
910    STSWI,
911    TLBIE
912)>;
913
914// Vector Store Instruction
915// Requires the whole superslice and therefore requires one dispatch
916// as well as both the Even and Odd exec pipelines.
917def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
918      (instrs
919    (instregex "STVE(B|H|W)X$"),
920    (instregex "STVX(L)?$"),
921    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
922)>;
923
924// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
925// superslice. That includes both exec pipelines (EXECO, EXECE) and two
926// dispatches.
927def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
928      (instrs
929    (instregex "MTCTR(8)?(loop)?$"),
930    (instregex "MTLR(8)?$")
931)>;
932
933// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
934// superslice. That includes both exec pipelines (EXECO, EXECE) and two
935// dispatches.
936def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
937      (instrs
938    (instregex "M(T|F)VRSAVE(v)?$"),
939    (instregex "M(T|F)PMR$"),
940    (instregex "M(T|F)TB(8)?$"),
941    (instregex "MF(SPR|CTR|LR)(8)?$"),
942    (instregex "M(T|F)MSR(D)?$"),
943    (instregex "MTSPR(8)?$")
944)>;
945
946// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
947// superslice. That includes both exec pipelines (EXECO, EXECE) and two
948// dispatches.
949def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
950      (instrs
951    DIVW,
952    DIVWO,
953    DIVWU,
954    DIVWUO,
955    MODSW
956)>;
957
958// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
959// superslice. That includes both exec pipelines (EXECO, EXECE) and two
960// dispatches.
961def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
962      (instrs
963    DIVWE,
964    DIVWEO,
965    DIVD,
966    DIVDO,
967    DIVWEU,
968    DIVWEUO,
969    DIVDU,
970    DIVDUO,
971    MODSD,
972    MODUD,
973    MODUW
974)>;
975
976// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
977// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
978// dispatches.
979def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
980      (instrs
981    DIVDE,
982    DIVDEO,
983    DIVDEU,
984    DIVDEUO
985)>;
986
987// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
988// and one full superslice for the DIV operation since there is only one DIV per
989// superslice. Latency of DIV plus ALU is 26.
990def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
991              DISP_EVEN_1C, DISP_1C],
992      (instrs
993    (instregex "DIVW(U)?(O)?_rec$")
994)>;
995
996// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
997// and one full superslice for the DIV operation since there is only one DIV per
998// superslice. Latency of DIV plus ALU is 26.
999def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1000              DISP_EVEN_1C, DISP_1C],
1001      (instrs
1002    DIVD_rec,
1003    DIVDO_rec,
1004    DIVDU_rec,
1005    DIVDUO_rec,
1006    DIVWE_rec,
1007    DIVWEO_rec,
1008    DIVWEU_rec,
1009    DIVWEUO_rec
1010)>;
1011
1012// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1013// and one full superslice for the DIV operation since there is only one DIV per
1014// superslice. Latency of DIV plus ALU is 42.
1015def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1016              DISP_EVEN_1C, DISP_1C],
1017      (instrs
1018    DIVDE_rec,
1019    DIVDEO_rec,
1020    DIVDEU_rec,
1021    DIVDEUO_rec
1022)>;
1023
1024// CR access instructions in _BrMCR, IIC_BrMCRX.
1025
1026// Cracked, restricted, ALU operations.
1027// Here the two ALU ops can actually be done in parallel and therefore the
1028// latencies are not added together. Otherwise this is like having two
1029// instructions running together on two pipelines and 6 dispatches. ALU ops are
1030// 2 cycles each.
1031def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1032              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1033      (instrs
1034    MTCRF,
1035    MTCRF8
1036)>;
1037
1038// Cracked ALU operations.
1039// Here the two ALU ops can actually be done in parallel and therefore the
1040// latencies are not added together. Otherwise this is like having two
1041// instructions running together on two pipelines and 2 dispatches. ALU ops are
1042// 2 cycles each.
1043def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1044              DISP_1C, DISP_1C],
1045      (instrs
1046    (instregex "ADDC(8)?(O)?_rec$"),
1047    (instregex "SUBFC(8)?(O)?_rec$")
1048)>;
1049
1050// Cracked ALU operations.
1051// Two ALU ops can be done in parallel.
1052// One is three cycle ALU the ohter is a two cycle ALU.
1053// One of the ALU ops is restricted the other is not so we have a total of
1054// 5 dispatches.
1055def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1056              DISP_3SLOTS_1C, DISP_1C],
1057      (instrs
1058    (instregex "F(N)?ABS(D|S)_rec$"),
1059    (instregex "FCPSGN(D|S)_rec$"),
1060    (instregex "FNEG(D|S)_rec$"),
1061    FMR_rec
1062)>;
1063
1064// Cracked ALU operations.
1065// Here the two ALU ops can actually be done in parallel and therefore the
1066// latencies are not added together. Otherwise this is like having two
1067// instructions running together on two pipelines and 2 dispatches.
1068// ALU ops are 3 cycles each.
1069def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1070              DISP_1C, DISP_1C],
1071      (instrs
1072    MCRFS
1073)>;
1074
1075// Cracked Restricted ALU operations.
1076// Here the two ALU ops can actually be done in parallel and therefore the
1077// latencies are not added together. Otherwise this is like having two
1078// instructions running together on two pipelines and 6 dispatches.
1079// ALU ops are 3 cycles each.
1080def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1081              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1082      (instrs
1083    (instregex "MTFSF(b|_rec)?$"),
1084    (instregex "MTFSFI(_rec)?$"),
1085    MTFSFIb
1086)>;
1087
1088// Cracked instruction made of two ALU ops.
1089// The two ops cannot be done in parallel.
1090// One of the ALU ops is restricted and takes 3 dispatches.
1091def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1092              DISP_3SLOTS_1C, DISP_1C],
1093      (instrs
1094    (instregex "RLD(I)?C(R|L)_rec$"),
1095    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1096    (instregex "SLW(8)?_rec$"),
1097    (instregex "SRAW(I)?_rec$"),
1098    (instregex "SRW(8)?_rec$"),
1099    RLDICL_32_rec,
1100    RLDIMI_rec
1101)>;
1102
1103// Cracked instruction made of two ALU ops.
1104// The two ops cannot be done in parallel.
1105// Both of the ALU ops are restricted and take 3 dispatches.
1106def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1107              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1108      (instrs
1109    (instregex "MFFS(L|CE|_rec)?$")
1110)>;
1111
1112// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1113// total of 6 cycles. All of the ALU operations are also restricted so each
1114// takes 3 dispatches for a total of 9.
1115def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1116              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1117      (instrs
1118    (instregex "MFCR(8)?$")
1119)>;
1120
1121// Cracked instruction made of two ALU ops.
1122// The two ops cannot be done in parallel.
1123def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1124      (instrs
1125    (instregex "EXTSWSLI_32_64_rec$"),
1126    (instregex "SRAD(I)?_rec$"),
1127    EXTSWSLI_rec,
1128    SLD_rec,
1129    SRD_rec,
1130    RLDIC_rec
1131)>;
1132
1133// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1134def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1135      (instrs
1136    FDIV
1137)>;
1138
1139// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1140def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1141              DISP_3SLOTS_1C, DISP_1C],
1142      (instrs
1143    FDIV_rec
1144)>;
1145
1146// 36 Cycle DP Instruction.
1147// Instruction can be done on a single slice.
1148def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1149      (instrs
1150    XSSQRTDP
1151)>;
1152
1153// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1154def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1155      (instrs
1156    FSQRT
1157)>;
1158
1159// 36 Cycle DP Vector Instruction.
1160def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1161              DISP_1C],
1162      (instrs
1163    XVSQRTDP
1164)>;
1165
1166// 27 Cycle DP Vector Instruction.
1167def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1168              DISP_1C],
1169      (instrs
1170    XVSQRTSP
1171)>;
1172
1173// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1174def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1175              DISP_3SLOTS_1C, DISP_1C],
1176      (instrs
1177    FSQRT_rec
1178)>;
1179
1180// 26 Cycle DP Instruction.
1181def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1182      (instrs
1183    XSSQRTSP
1184)>;
1185
1186// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1187def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1188      (instrs
1189    FSQRTS
1190)>;
1191
1192// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1193def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1194              DISP_3SLOTS_1C, DISP_1C],
1195      (instrs
1196    FSQRTS_rec
1197)>;
1198
1199// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1200def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1201      (instrs
1202    XSDIVDP
1203)>;
1204
1205// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1206def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1207      (instrs
1208    FDIVS
1209)>;
1210
1211// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1212def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1213              DISP_3SLOTS_1C, DISP_1C],
1214      (instrs
1215    FDIVS_rec
1216)>;
1217
1218// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1219def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1220      (instrs
1221    XSDIVSP
1222)>;
1223
1224// 24 Cycle DP Vector Instruction. Takes one full superslice.
1225// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1226// superslice.
1227def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1228              DISP_1C],
1229      (instrs
1230    XVDIVSP
1231)>;
1232
1233// 33 Cycle DP Vector Instruction. Takes one full superslice.
1234// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1235// superslice.
1236def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1237              DISP_1C],
1238      (instrs
1239    XVDIVDP
1240)>;
1241
1242// Instruction cracked into three pieces. One Load and two ALU operations.
1243// The Load and one of the ALU ops cannot be run at the same time and so the
1244// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1245// Both the load and the ALU that depends on it are restricted and so they take
1246// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1247// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1248def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1249              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1250              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1251      (instrs
1252    (instregex "LF(SU|SUX)$")
1253)>;
1254
1255// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1256// the store and so it can be run at the same time as the store. The store is
1257// also restricted.
1258def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1259              DISP_3SLOTS_1C, DISP_1C],
1260      (instrs
1261    (instregex "STF(S|D)U(X)?$"),
1262    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1263)>;
1264
1265// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1266// the load and so it can be run at the same time as the load.
1267def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1268              DISP_PAIR_1C, DISP_PAIR_1C],
1269      (instrs
1270    (instregex "LBZU(X)?(8)?$"),
1271    (instregex "LDU(X)?$")
1272)>;
1273
1274// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1275// the load and so it can be run at the same time as the load. The load is also
1276// restricted. 3 dispatches are from the restricted load while the other two
1277// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1278// is required for the ALU.
1279def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1280              DISP_3SLOTS_1C, DISP_1C],
1281      (instrs
1282    (instregex "LF(DU|DUX)$")
1283)>;
1284
1285// Crypto Instructions
1286
1287// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1288// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1289// dispatch.
1290def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1291      (instrs
1292    (instregex "VPMSUM(B|H|W|D)$"),
1293    (instregex "V(N)?CIPHER(LAST)?$"),
1294    VSBOX
1295)>;
1296
1297// Branch Instructions
1298
1299// Two Cycle Branch
1300def : InstRW<[P9_BR_2C, DISP_BR_1C],
1301      (instrs
1302  (instregex "BCCCTR(L)?(8)?$"),
1303  (instregex "BCCL(A|R|RL)?$"),
1304  (instregex "BCCTR(L)?(8)?(n)?$"),
1305  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1306  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1307  (instregex "BL(_TLS|_NOP)?(_RM)?$"),
1308  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
1309  (instregex "BLA(8|8_NOP)?(_RM)?$"),
1310  (instregex "BLR(8|L)?$"),
1311  (instregex "TAILB(A)?(8)?$"),
1312  (instregex "TAILBCTR(8)?$"),
1313  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1314  (instregex "BCLR(L)?(n)?$"),
1315  (instregex "BCTR(L)?(8)?(_RM)?$"),
1316  B,
1317  BA,
1318  BC,
1319  BCC,
1320  BCCA,
1321  BCL,
1322  BCLalways,
1323  BCLn,
1324  BCTRL8_LDinto_toc,
1325  BCTRL_LWZinto_toc,
1326  BCTRL8_LDinto_toc_RM,
1327  BCTRL_LWZinto_toc_RM,
1328  BCn,
1329  CTRL_DEP
1330)>;
1331
1332// Five Cycle Branch with a 2 Cycle ALU Op
1333// Operations must be done consecutively and not in parallel.
1334def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1335      (instrs
1336    ADDPCIS
1337)>;
1338
1339// Special Extracted Instructions For Atomics
1340
1341// Atomic Load
1342def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1343              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1344              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1345              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1346      (instrs
1347    (instregex "L(D|W)AT$")
1348)>;
1349
1350// Atomic Store
1351def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1352              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1353      (instrs
1354    (instregex "ST(D|W)AT$")
1355)>;
1356
1357// Signal Processing Engine (SPE) Instructions
1358// These instructions are not supported on Power 9
1359def : InstRW<[],
1360    (instrs
1361  BRINC,
1362  EVABS,
1363  EVEQV,
1364  EVMRA,
1365  EVNAND,
1366  EVNEG,
1367  (instregex "EVADD(I)?W$"),
1368  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1369  (instregex "EVAND(C)?$"),
1370  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1371  (instregex "EVCNTL(S|Z)W$"),
1372  (instregex "EVDIVW(S|U)$"),
1373  (instregex "EVEXTS(B|H)$"),
1374  (instregex "EVLD(H|W|D)(X)?$"),
1375  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1376  (instregex "EVLWHE(X)?$"),
1377  (instregex "EVLWHO(S|U)(X)?$"),
1378  (instregex "EVLW(H|W)SPLAT(X)?$"),
1379  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1380  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1381  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1382  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1383  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1384  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1385  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1386  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1387  (instregex "EVMWHUMI(A)?$"),
1388  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1389  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1390  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1391  (instregex "EVMWSSF(A|AA|AN)?$"),
1392  (instregex "EVMWUMI(A|AA|AN)?$"),
1393  (instregex "EV(N|X)?OR(C)?$"),
1394  (instregex "EVR(LW|LWI|NDW)$"),
1395  (instregex "EVSLW(I)?$"),
1396  (instregex "EVSPLAT(F)?I$"),
1397  (instregex "EVSRW(I)?(S|U)$"),
1398  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1399  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1400  (instregex "EVSUB(I)?FW$")
1401)> { let Unsupported = 1; }
1402
1403// General Instructions without scheduling support.
1404def : InstRW<[],
1405    (instrs
1406  (instregex "(H)?RFI(D)?$"),
1407  (instregex "DSS(ALL)?$"),
1408  (instregex "DST(ST)?(T)?(64)?$"),
1409  (instregex "ICBL(C|Q)$"),
1410  (instregex "L(W|H|B)EPX$"),
1411  (instregex "ST(W|H|B)EPX$"),
1412  (instregex "(L|ST)FDEPX$"),
1413  (instregex "M(T|F)SR(IN)?$"),
1414  (instregex "M(T|F)DCR$"),
1415  (instregex "NOP_GT_PWR(6|7)$"),
1416  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1417  (instregex "WRTEE(I)?$"),
1418  (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
1419  ATTN,
1420  CLRBHRB,
1421  MFBHRBE,
1422  MBAR,
1423  MSYNC,
1424  SLBSYNC,
1425  SLBFEE_rec,
1426  NAP,
1427  STOP,
1428  TRAP,
1429  RFCI,
1430  RFDI,
1431  RFMCI,
1432  SC,
1433  DCBA,
1434  DCBI,
1435  DCCCI,
1436  ICCCI,
1437  ADDEX,
1438  ADDEX8
1439)> { let Unsupported = 1; }
1440