1;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; AMD bdver3 Scheduling
20;;
21;; The bdver3 contains three pipelined FP units and two integer units.
22;; Fetching and decoding logic is different from previous fam15 processors.
23;; Fetching is done every two cycles rather than every cycle and
24;; two decode units are available. The decode units therefore decode
25;; four instructions in two cycles.
26;;
27;; The load/store queue unit is not attached to the schedulers but
28;; communicates with all the execution units separately instead.
29;;
30;; bdver3 belong to fam15 processors. We use the same insn attribute
31;; that was used for bdver1 decoding scheme.
32
33(define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu")
34
35(define_cpu_unit "bdver3-decode0" "bdver3")
36(define_cpu_unit "bdver3-decode1" "bdver3")
37(define_cpu_unit "bdver3-decodev" "bdver3")
38
39;; Double decoded instructions take two cycles whereas
40;; direct instructions take one cycle.
41;; Therefore four direct instructions can be decoded by
42;; two decoders in two cycles.
43;; Vectorpath instructions are single issue instructions.
44;; So, we have separate unit for vector instructions.
45(exclusion_set "bdver3-decodev" "bdver3-decode0,bdver3-decode1")
46
47(define_reservation "bdver3-vector" "bdver3-decodev")
48(define_reservation "bdver3-direct" "(bdver3-decode0|bdver3-decode1)")
49;; Double instructions take two cycles to decode.
50(define_reservation "bdver3-double" "(bdver3-decode0|bdver3-decode1)*2")
51
52(define_cpu_unit "bdver3-ieu0" "bdver3_ieu")
53(define_cpu_unit "bdver3-ieu1" "bdver3_ieu")
54(define_reservation "bdver3-ieu" "(bdver3-ieu0|bdver3-ieu1)")
55
56(define_cpu_unit "bdver3-agu0" "bdver3_agu")
57(define_cpu_unit "bdver3-agu1" "bdver3_agu")
58(define_reservation "bdver3-agu" "(bdver3-agu0|bdver3-agu1)")
59
60(define_cpu_unit "bdver3-load0" "bdver3_load")
61(define_cpu_unit "bdver3-load1" "bdver3_load")
62(define_reservation "bdver3-load" "bdver3-agu,
63				   (bdver3-load0|bdver3-load1),nothing")
64;; 128bit SSE instructions issue two loads at once.
65(define_reservation "bdver3-load2" "bdver3-agu,
66				   (bdver3-load0+bdver3-load1),nothing")
67
68(define_reservation "bdver3-store" "(bdver3-load0 | bdver3-load1)")
69;; 128bit SSE instructions issue two stores at once.
70(define_reservation "bdver3-store2" "(bdver3-load0+bdver3-load1)")
71
72;; vectorpath (microcoded) instructions are single issue instructions.
73;; So, they occupy all the integer units.
74(define_reservation "bdver3-ivector" "bdver3-ieu0+bdver3-ieu1+
75                                      bdver3-agu0+bdver3-agu1+
76                                      bdver3-load0+bdver3-load1")
77
78(define_reservation "bdver3-fpsched" "nothing,nothing,nothing")
79
80;; The floating point loads.
81(define_reservation "bdver3-fpload" "(bdver3-fpsched + bdver3-load)")
82(define_reservation "bdver3-fpload2" "(bdver3-fpsched + bdver3-load2)")
83
84;; Three FP units.
85(define_cpu_unit "bdver3-ffma0" "bdver3_fp")
86(define_cpu_unit "bdver3-ffma1" "bdver3_fp")
87(define_cpu_unit "bdver3-fpsto" "bdver3_fp")
88
89(define_reservation "bdver3-fvector" "bdver3-ffma0+bdver3-ffma1+
90                                      bdver3-fpsto+bdver3-load0+
91                                      bdver3-load1")
92
93(define_reservation "bdver3-ffma"     "(bdver3-ffma0 | bdver3-ffma1)")
94(define_reservation "bdver3-fcvt"     "bdver3-ffma0")
95(define_reservation "bdver3-fmma"     "bdver3-ffma0")
96(define_reservation "bdver3-fxbar"    "bdver3-ffma1")
97(define_reservation "bdver3-fmal"     "(bdver3-ffma0 | bdver3-fpsto)")
98(define_reservation "bdver3-fsto"     "bdver3-fpsto")
99(define_reservation "bdver3-fpshuf"    "bdver3-fpsto")
100
101;; Jump instructions are executed in the branch unit completely transparent to us.
102(define_insn_reservation "bdver3_call" 2
103			 (and (eq_attr "cpu" "bdver3")
104			      (eq_attr "type" "call,callv"))
105			 "bdver3-double,(bdver3-agu | bdver3-ieu),nothing")
106;; PUSH mem is double path.
107(define_insn_reservation "bdver3_push" 1
108			 (and (eq_attr "cpu" "bdver3")
109			      (eq_attr "type" "push"))
110			 "bdver3-direct,bdver3-ieu,bdver3-store")
111;; POP r16/mem are double path.
112(define_insn_reservation "bdver3_pop" 1
113                         (and (eq_attr "cpu" "bdver3")
114                              (eq_attr "type" "pop"))
115                         "bdver3-direct,bdver3-ivector")
116;; LEAVE no latency info so far, assume same with amdfam10.
117(define_insn_reservation "bdver3_leave" 3
118                         (and (eq_attr "cpu" "bdver3")
119                              (eq_attr "type" "leave"))
120                         "bdver3-vector,bdver3-ivector")
121;; LEA executes in AGU unit with 1 cycle latency on BDVER3.
122(define_insn_reservation "bdver3_lea" 1
123			 (and (eq_attr "cpu" "bdver3")
124			      (eq_attr "type" "lea"))
125			 "bdver3-direct,bdver3-ieu")
126;; MUL executes in special multiplier unit attached to IEU1.
127(define_insn_reservation "bdver3_imul_DI" 6
128			 (and (eq_attr "cpu" "bdver3")
129			      (and (eq_attr "type" "imul")
130				   (and (eq_attr "mode" "DI")
131					(eq_attr "memory" "none,unknown"))))
132			 "bdver3-direct,bdver3-ieu1")
133(define_insn_reservation "bdver3_imul" 4
134			 (and (eq_attr "cpu" "bdver3")
135			      (and (eq_attr "type" "imul")
136				   (eq_attr "memory" "none,unknown")))
137			 "bdver3-direct,bdver3-ieu1")
138(define_insn_reservation "bdver3_imul_mem_DI" 10
139			 (and (eq_attr "cpu" "bdver3")
140			      (and (eq_attr "type" "imul")
141				   (and (eq_attr "mode" "DI")
142					(eq_attr "memory" "load,both"))))
143			 "bdver3-direct,bdver3-load,bdver3-ieu1")
144(define_insn_reservation "bdver3_imul_mem" 8
145			 (and (eq_attr "cpu" "bdver3")
146			      (and (eq_attr "type" "imul")
147				   (eq_attr "memory" "load,both")))
148			 "bdver3-direct,bdver3-load,bdver3-ieu1")
149
150(define_insn_reservation "bdver3_str" 6
151			 (and (eq_attr "cpu" "bdver3")
152			      (and (eq_attr "type" "str")
153				   (eq_attr "memory" "load,both,store")))
154			 "bdver3-vector,bdver3-load,bdver3-ivector")
155
156;; Integer instructions.
157(define_insn_reservation "bdver3_idirect" 1
158			 (and (eq_attr "cpu" "bdver3")
159			      (and (eq_attr "bdver1_decode" "direct")
160				   (and (eq_attr "unit" "integer,unknown")
161					(eq_attr "memory" "none,unknown"))))
162			 "bdver3-direct,(bdver3-ieu|bdver3-agu)")
163(define_insn_reservation "bdver3_ivector" 2
164			 (and (eq_attr "cpu" "bdver3")
165			      (and (eq_attr "bdver1_decode" "vector")
166				   (and (eq_attr "unit" "integer,unknown")
167					(eq_attr "memory" "none,unknown"))))
168			 "bdver3-vector,bdver3-ivector")
169(define_insn_reservation "bdver3_idirect_loadmov" 4
170			 (and (eq_attr "cpu" "bdver3")
171			      (and (eq_attr "type" "imov")
172				   (eq_attr "memory" "load")))
173			 "bdver3-direct,bdver3-load")
174(define_insn_reservation "bdver3_idirect_load" 5
175			 (and (eq_attr "cpu" "bdver3")
176			      (and (eq_attr "bdver1_decode" "direct")
177				   (and (eq_attr "unit" "integer,unknown")
178					(eq_attr "memory" "load"))))
179			 "bdver3-direct,bdver3-load,bdver3-ieu")
180(define_insn_reservation "bdver3_idirect_movstore" 5
181			 (and (eq_attr "cpu" "bdver3")
182			      (and (eq_attr "type" "imov")
183				   (eq_attr "memory" "store")))
184			 "bdver3-direct,bdver3-ieu,bdver3-store")
185(define_insn_reservation "bdver3_idirect_both" 4
186			 (and (eq_attr "cpu" "bdver3")
187			      (and (eq_attr "bdver1_decode" "direct")
188				   (and (eq_attr "unit" "integer,unknown")
189					(eq_attr "memory" "both"))))
190			 "bdver3-direct,bdver3-load,
191			  bdver3-ieu,bdver3-store,
192			  bdver3-store")
193(define_insn_reservation "bdver3_idirect_store" 4
194			 (and (eq_attr "cpu" "bdver3")
195			      (and (eq_attr "bdver1_decode" "direct")
196				   (and (eq_attr "unit" "integer,unknown")
197					(eq_attr "memory" "store"))))
198			 "bdver3-direct,(bdver3-ieu+bdver3-agu),
199			  bdver3-store")
200;; BDVER3 floating point units.
201(define_insn_reservation "bdver3_fldxf" 13
202			 (and (eq_attr "cpu" "bdver3")
203			      (and (eq_attr "type" "fmov")
204				   (and (eq_attr "memory" "load")
205					(eq_attr "mode" "XF"))))
206			 "bdver3-vector,bdver3-fpload2,bdver3-fvector*9")
207(define_insn_reservation "bdver3_fld" 2
208			 (and (eq_attr "cpu" "bdver3")
209			      (and (eq_attr "type" "fmov")
210				   (eq_attr "memory" "load")))
211			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
212(define_insn_reservation "bdver3_fstxf" 4
213			 (and (eq_attr "cpu" "bdver3")
214			      (and (eq_attr "type" "fmov")
215				   (and (eq_attr "memory" "store,both")
216					(eq_attr "mode" "XF"))))
217			 "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))")
218(define_insn_reservation "bdver3_fst" 2
219			 (and (eq_attr "cpu" "bdver3")
220			      (and (eq_attr "type" "fmov")
221				   (eq_attr "memory" "store,both")))
222			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
223(define_insn_reservation "bdver3_fist" 2
224			 (and (eq_attr "cpu" "bdver3")
225			      (eq_attr "type" "fistp,fisttp"))
226			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
227(define_insn_reservation "bdver3_fmov_bdver3" 2
228			 (and (eq_attr "cpu" "bdver3")
229			      (eq_attr "type" "fmov"))
230			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
231(define_insn_reservation "bdver3_fadd_load" 10
232			 (and (eq_attr "cpu" "bdver3")
233			      (and (eq_attr "type" "fop")
234				   (eq_attr "memory" "load")))
235			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
236(define_insn_reservation "bdver3_fadd" 6
237			 (and (eq_attr "cpu" "bdver3")
238			      (eq_attr "type" "fop"))
239			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
240(define_insn_reservation "bdver3_fmul_load" 6
241			 (and (eq_attr "cpu" "bdver3")
242			      (and (eq_attr "type" "fmul")
243				   (eq_attr "memory" "load")))
244			 "bdver3-double,bdver3-fpload,bdver3-ffma")
245(define_insn_reservation "bdver3_fmul" 6
246			 (and (eq_attr "cpu" "bdver3")
247			      (eq_attr "type" "fmul"))
248			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
249(define_insn_reservation "bdver3_fsgn" 2
250			 (and (eq_attr "cpu" "bdver3")
251			      (eq_attr "type" "fsgn"))
252			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
253(define_insn_reservation "bdver3_fdiv_load" 42
254			 (and (eq_attr "cpu" "bdver3")
255			      (and (eq_attr "type" "fdiv")
256				   (eq_attr "memory" "load")))
257			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
258(define_insn_reservation "bdver3_fdiv" 42
259			 (and (eq_attr "cpu" "bdver3")
260			      (eq_attr "type" "fdiv"))
261			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
262(define_insn_reservation "bdver3_fpspc_load" 143
263			 (and (eq_attr "cpu" "bdver3")
264			      (and (eq_attr "type" "fpspc")
265				   (eq_attr "memory" "load")))
266			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
267(define_insn_reservation "bdver3_fcmov_load" 17
268			 (and (eq_attr "cpu" "bdver3")
269			      (and (eq_attr "type" "fcmov")
270				   (eq_attr "memory" "load")))
271			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
272(define_insn_reservation "bdver3_fcmov" 15
273			 (and (eq_attr "cpu" "bdver3")
274			      (eq_attr "type" "fcmov"))
275			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
276(define_insn_reservation "bdver3_fcomi_load" 6
277			 (and (eq_attr "cpu" "bdver3")
278			      (and (eq_attr "type" "fcmp")
279				   (and (eq_attr "bdver1_decode" "double")
280					(eq_attr "memory" "load"))))
281			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
282(define_insn_reservation "bdver3_fcomi" 2
283			 (and (eq_attr "cpu" "bdver3")
284			      (and (eq_attr "bdver1_decode" "double")
285				   (eq_attr "type" "fcmp")))
286			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
287(define_insn_reservation "bdver3_fcom_load" 6
288			 (and (eq_attr "cpu" "bdver3")
289			      (and (eq_attr "type" "fcmp")
290				   (eq_attr "memory" "load")))
291			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
292(define_insn_reservation "bdver3_fcom" 2
293			 (and (eq_attr "cpu" "bdver3")
294			      (eq_attr "type" "fcmp"))
295			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
296(define_insn_reservation "bdver3_fxch" 2
297			 (and (eq_attr "cpu" "bdver3")
298			      (eq_attr "type" "fxch"))
299			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
300
301;; SSE loads.
302(define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4
303			 (and (eq_attr "cpu" "bdver3")
304			      (and (eq_attr "type" "ssemov")
305				   (and (eq_attr "prefix" "vex")
306					(and (eq_attr "movu" "1")
307					     (and (eq_attr "mode" "V4SF,V2DF")
308						  (eq_attr "memory" "load"))))))
309			 "bdver3-direct,bdver3-fpload")
310(define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5
311			 (and (eq_attr "cpu" "bdver3")
312			      (and (eq_attr "type" "ssemov")
313				   (and (eq_attr "movu" "1")
314				        (and (eq_attr "mode" "V8SF,V4DF")
315				             (eq_attr "memory" "load")))))
316			 "bdver3-double,bdver3-fpload")
317(define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4
318			 (and (eq_attr "cpu" "bdver3")
319			      (and (eq_attr "type" "ssemov")
320				   (and (eq_attr "movu" "1")
321				        (and (eq_attr "mode" "V4SF,V2DF")
322				             (eq_attr "memory" "load")))))
323			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
324(define_insn_reservation "bdver3_ssevector_avx128_load" 4
325			 (and (eq_attr "cpu" "bdver3")
326			      (and (eq_attr "type" "ssemov")
327				   (and (eq_attr "prefix" "vex")
328				        (and (eq_attr "mode" "V4SF,V2DF,TI")
329				             (eq_attr "memory" "load")))))
330			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
331(define_insn_reservation "bdver3_ssevector_avx256_load" 5
332			 (and (eq_attr "cpu" "bdver3")
333			      (and (eq_attr "type" "ssemov")
334				   (and (eq_attr "mode" "V8SF,V4DF,OI")
335				        (eq_attr "memory" "load"))))
336			 "bdver3-double,bdver3-fpload,bdver3-fmal")
337(define_insn_reservation "bdver3_ssevector_sse128_load" 4
338			 (and (eq_attr "cpu" "bdver3")
339			      (and (eq_attr "type" "ssemov")
340				   (and (eq_attr "mode" "V4SF,V2DF,TI")
341				        (eq_attr "memory" "load"))))
342			 "bdver3-direct,bdver3-fpload")
343(define_insn_reservation "bdver3_ssescalar_movq_load" 4
344			 (and (eq_attr "cpu" "bdver3")
345			      (and (eq_attr "type" "ssemov")
346				   (and (eq_attr "mode" "DI")
347				        (eq_attr "memory" "load"))))
348			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
349(define_insn_reservation "bdver3_ssescalar_vmovss_load" 4
350			 (and (eq_attr "cpu" "bdver3")
351			      (and (eq_attr "type" "ssemov")
352				   (and (eq_attr "prefix" "vex")
353				        (and (eq_attr "mode" "SF")
354				             (eq_attr "memory" "load")))))
355			 "bdver3-direct,bdver3-fpload")
356(define_insn_reservation "bdver3_ssescalar_sse128_load" 4
357			 (and (eq_attr "cpu" "bdver3")
358			      (and (eq_attr "type" "ssemov")
359				   (and (eq_attr "mode" "SF,DF")
360				        (eq_attr "memory" "load"))))
361			 "bdver3-direct,bdver3-fpload, bdver3-ffma")
362(define_insn_reservation "bdver3_mmxsse_load" 4
363			 (and (eq_attr "cpu" "bdver3")
364			      (and (eq_attr "type" "mmxmov,ssemov")
365				   (eq_attr "memory" "load")))
366			 "bdver3-direct,bdver3-fpload, bdver3-fmal")
367
368;; SSE stores.
369(define_insn_reservation "bdver3_sse_store_avx256" 5
370			 (and (eq_attr "cpu" "bdver3")
371			      (and (eq_attr "type" "ssemov")
372				   (and (eq_attr "mode" "V8SF,V4DF,OI")
373					(eq_attr "memory" "store,both"))))
374			 "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
375(define_insn_reservation "bdver3_sse_store" 4
376			 (and (eq_attr "cpu" "bdver3")
377			      (and (eq_attr "type" "ssemov")
378				   (and (eq_attr "mode" "V4SF,V2DF,TI")
379					(eq_attr "memory" "store,both"))))
380			 "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
381(define_insn_reservation "bdver3_mmxsse_store_short" 4
382			 (and (eq_attr "cpu" "bdver3")
383			      (and (eq_attr "type" "mmxmov,ssemov")
384				   (eq_attr "memory" "store,both")))
385			 "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)")
386
387;; Register moves.
388(define_insn_reservation "bdver3_ssevector_avx256" 3
389			 (and (eq_attr "cpu" "bdver3")
390			      (and (eq_attr "type" "ssemov")
391				   (and (eq_attr "mode" "V8SF,V4DF,OI")
392					(eq_attr "memory" "none"))))
393			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
394(define_insn_reservation "bdver3_movss_movsd" 2
395			 (and (eq_attr "cpu" "bdver3")
396			      (and (eq_attr "type" "ssemov")
397				   (and (eq_attr "mode" "SF,DF")
398                                        (eq_attr "memory" "none"))))
399			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
400(define_insn_reservation "bdver3_mmxssemov" 2
401			 (and (eq_attr "cpu" "bdver3")
402			      (and (eq_attr "type" "mmxmov,ssemov")
403				   (eq_attr "memory" "none")))
404			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
405;; SSE logs.
406(define_insn_reservation "bdver3_sselog_load_256" 7
407			 (and (eq_attr "cpu" "bdver3")
408			      (and (eq_attr "type" "sselog,sselog1")
409				   (and (eq_attr "mode" "V8SF")
410				   (eq_attr "memory" "load"))))
411			 "bdver3-double,bdver3-fpload,bdver3-fmal")
412(define_insn_reservation "bdver3_sselog_256" 3
413			 (and (eq_attr "cpu" "bdver3")
414			      (and (eq_attr "type" "sselog,sselog1")
415                                   (eq_attr "mode" "V8SF")))
416			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
417(define_insn_reservation "bdver3_sselog_load" 6
418			 (and (eq_attr "cpu" "bdver3")
419			      (and (eq_attr "type" "sselog,sselog1")
420				   (eq_attr "memory" "load")))
421			 "bdver3-direct,bdver3-fpload,bdver3-fxbar")
422(define_insn_reservation "bdver3_sselog" 2
423			 (and (eq_attr "cpu" "bdver3")
424			      (eq_attr "type" "sselog,sselog1"))
425			 "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
426
427;; SSE Shuffles
428(define_insn_reservation "bdver3_sseshuf_load_256" 7
429                         (and (eq_attr "cpu" "bdver3")
430                              (and (eq_attr "type" "sseshuf,sseshuf1")
431                                   (and (eq_attr "mode" "V8SF")
432                                   (eq_attr "memory" "load"))))
433                         "bdver3-double,bdver3-fpload,bdver3-fpshuf")
434(define_insn_reservation "bdver3_sseshuf_load" 6
435                         (and (eq_attr "cpu" "bdver3")
436                              (and (eq_attr "type" "sseshuf,sseshuf1")
437                                   (eq_attr "memory" "load")))
438                         "bdver3-direct,bdver3-fpload,bdver3-fpshuf")
439
440(define_insn_reservation "bdver3_sseshuf_256" 3
441                         (and (eq_attr "cpu" "bdver3")
442                              (and (eq_attr "type" "sseshuf")
443                                   (eq_attr "mode" "V8SF")))
444                         "bdver3-double,bdver3-fpsched,bdver3-fpshuf")
445(define_insn_reservation "bdver3_sseshuf" 2
446                         (and (eq_attr "cpu" "bdver3")
447                              (eq_attr "type" "sseshuf,sseshuf1"))
448                         "bdver3-direct,bdver3-fpsched,bdver3-fpshuf")
449
450;; PCMP actually executes in FMAL.
451(define_insn_reservation "bdver3_ssecmp_load" 6
452			 (and (eq_attr "cpu" "bdver3")
453			      (and (eq_attr "type" "ssecmp")
454				   (eq_attr "memory" "load")))
455			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
456(define_insn_reservation "bdver3_ssecmp" 2
457			 (and (eq_attr "cpu" "bdver3")
458			      (eq_attr "type" "ssecmp"))
459			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
460(define_insn_reservation "bdver3_ssecomi_load" 6
461			 (and (eq_attr "cpu" "bdver3")
462			      (and (eq_attr "type" "ssecomi")
463				   (eq_attr "memory" "load")))
464			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
465(define_insn_reservation "bdver3_ssecomi" 2
466			 (and (eq_attr "cpu" "bdver3")
467			      (eq_attr "type" "ssecomi"))
468			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
469
470;; Conversions behaves very irregularly and the scheduling is critical here.
471;; Take each instruction separately.
472
473;; 256 bit conversion.
474(define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8
475			 (and (eq_attr "cpu" "bdver3")
476			      (and (eq_attr "type" "ssecvt")
477				   (and (eq_attr "memory" "load")
478					(ior (ior (match_operand:V4DF 0 "register_operand")
479					          (ior (match_operand:V8SF 0 "register_operand")
480						       (match_operand:V8SI 0 "register_operand")))
481					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
482						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
483						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
484			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
485(define_insn_reservation "bdver3_vcvtX2Y_avx256" 4
486			 (and (eq_attr "cpu" "bdver3")
487			      (and (eq_attr "type" "ssecvt")
488				   (and (eq_attr "memory" "none")
489					(ior (ior (match_operand:V4DF 0 "register_operand")
490					          (ior (match_operand:V8SF 0 "register_operand")
491						       (match_operand:V8SI 0 "register_operand")))
492					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
493						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
494						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
495			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
496;; CVTSS2SD, CVTSD2SS.
497(define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8
498			 (and (eq_attr "cpu" "bdver3")
499			      (and (eq_attr "type" "ssecvt")
500				   (and (eq_attr "mode" "SF,DF")
501					(eq_attr "memory" "load"))))
502			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
503(define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4
504			 (and (eq_attr "cpu" "bdver3")
505			      (and (eq_attr "type" "ssecvt")
506				   (and (eq_attr "mode" "SF,DF")
507					(eq_attr "memory" "none"))))
508			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
509;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
510(define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8
511			 (and (eq_attr "cpu" "bdver3")
512			      (and (eq_attr "type" "sseicvt")
513				   (and (eq_attr "mode" "SF,DF")
514					(eq_attr "memory" "load"))))
515			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
516(define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4
517			 (and (eq_attr "cpu" "bdver3")
518			      (and (eq_attr "type" "sseicvt")
519				   (and (eq_attr "mode" "SF,DF")
520					(eq_attr "memory" "none"))))
521			 "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)")
522;; CVTPD2PS.
523(define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8
524			 (and (eq_attr "cpu" "bdver3")
525			      (and (eq_attr "type" "ssecvt")
526				   (and (eq_attr "memory" "load")
527                                        (and (match_operand:V4SF 0 "register_operand")
528					     (match_operand:V2DF 1 "nonimmediate_operand")))))
529			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
530(define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4
531			 (and (eq_attr "cpu" "bdver3")
532			      (and (eq_attr "type" "ssecvt")
533				   (and (eq_attr "memory" "none")
534                                        (and (match_operand:V4SF 0 "register_operand")
535					     (match_operand:V2DF 1 "nonimmediate_operand")))))
536			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
537;; CVTPI2PS, CVTDQ2PS.
538(define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8
539			 (and (eq_attr "cpu" "bdver3")
540			      (and (eq_attr "type" "ssecvt")
541				   (and (eq_attr "memory" "load")
542                                        (and (match_operand:V4SF 0 "register_operand")
543					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
544					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
545			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
546(define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4
547			 (and (eq_attr "cpu" "bdver3")
548			      (and (eq_attr "type" "ssecvt")
549				   (and (eq_attr "memory" "none")
550                                        (and (match_operand:V4SF 0 "register_operand")
551					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
552					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
553			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
554;; CVTDQ2PD.
555(define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8
556			 (and (eq_attr "cpu" "bdver3")
557			      (and (eq_attr "type" "ssecvt")
558				   (and (eq_attr "memory" "load")
559                                        (and (match_operand:V2DF 0 "register_operand")
560					     (match_operand:V4SI 1 "nonimmediate_operand")))))
561			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
562(define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4
563			 (and (eq_attr "cpu" "bdver3")
564			      (and (eq_attr "type" "ssecvt")
565				   (and (eq_attr "memory" "none")
566                                        (and (match_operand:V2DF 0 "register_operand")
567					     (match_operand:V4SI 1 "nonimmediate_operand")))))
568			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
569;; CVTPS2PD, CVTPI2PD.
570(define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6
571			 (and (eq_attr "cpu" "bdver3")
572			      (and (eq_attr "type" "ssecvt")
573				   (and (eq_attr "memory" "load")
574                                        (and (match_operand:V2DF 0 "register_operand")
575					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
576					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
577			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
578(define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2
579			 (and (eq_attr "cpu" "bdver3")
580			      (and (eq_attr "type" "ssecvt")
581				   (and (eq_attr "memory" "load")
582                                        (and (match_operand:V2DF 0 "register_operand")
583					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
584					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
585			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
586;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
587(define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8
588			 (and (eq_attr "cpu" "bdver3")
589			      (and (eq_attr "type" "sseicvt")
590				   (and (eq_attr "mode" "SI,DI")
591					(eq_attr "memory" "load"))))
592			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)")
593(define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4
594			 (and (eq_attr "cpu" "bdver3")
595			      (and (eq_attr "type" "sseicvt")
596				   (and (eq_attr "mode" "SI,DI")
597					(eq_attr "memory" "none"))))
598			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)")
599;; CVTPD2PI, CVTTPD2PI.
600(define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8
601			 (and (eq_attr "cpu" "bdver3")
602			      (and (eq_attr "type" "ssecvt")
603				   (and (eq_attr "memory" "load")
604				        (and (match_operand:V2DF 1 "nonimmediate_operand")
605					     (match_operand:V2SI 0 "register_operand")))))
606			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
607(define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4
608			 (and (eq_attr "cpu" "bdver3")
609			      (and (eq_attr "type" "ssecvt")
610				   (and (eq_attr "memory" "none")
611				        (and (match_operand:V2DF 1 "nonimmediate_operand")
612					     (match_operand:V2SI 0 "register_operand")))))
613			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
614;; CVTPD2DQ, CVTTPD2DQ.
615(define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6
616			 (and (eq_attr "cpu" "bdver3")
617			      (and (eq_attr "type" "ssecvt")
618				   (and (eq_attr "memory" "load")
619				        (and (match_operand:V2DF 1 "nonimmediate_operand")
620					     (match_operand:V4SI 0 "register_operand")))))
621			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
622(define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2
623			 (and (eq_attr "cpu" "bdver3")
624			      (and (eq_attr "type" "ssecvt")
625				   (and (eq_attr "memory" "none")
626				        (and (match_operand:V2DF 1 "nonimmediate_operand")
627					     (match_operand:V4SI 0 "register_operand")))))
628			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
629;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
630(define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8
631			 (and (eq_attr "cpu" "bdver3")
632			      (and (eq_attr "type" "ssecvt")
633                                   (and (eq_attr "memory" "load")
634				        (and (match_operand:V4SF 1 "nonimmediate_operand")
635				             (ior (match_operand: V2SI 0 "register_operand")
636						  (match_operand: V4SI 0 "register_operand"))))))
637			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
638(define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4
639			 (and (eq_attr "cpu" "bdver3")
640			      (and (eq_attr "type" "ssecvt")
641				   (and (eq_attr "memory" "none")
642				        (and (match_operand:V4SF 1 "nonimmediate_operand")
643				             (ior (match_operand: V2SI 0 "register_operand")
644						  (match_operand: V4SI 0 "register_operand"))))))
645			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
646
647;; SSE MUL, ADD, and MULADD.
648(define_insn_reservation "bdver3_ssemuladd_load_256" 11
649			 (and (eq_attr "cpu" "bdver3")
650			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
651				   (and (eq_attr "mode" "V8SF,V4DF")
652					(eq_attr "memory" "load"))))
653			 "bdver3-double,bdver3-fpload,bdver3-ffma")
654(define_insn_reservation "bdver3_ssemuladd_256" 7
655			 (and (eq_attr "cpu" "bdver3")
656			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
657				   (and (eq_attr "mode" "V8SF,V4DF")
658					(eq_attr "memory" "none"))))
659			 "bdver3-double,bdver3-fpsched,bdver3-ffma")
660(define_insn_reservation "bdver3_ssemuladd_load" 10
661			 (and (eq_attr "cpu" "bdver3")
662			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
663				   (eq_attr "memory" "load")))
664			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
665(define_insn_reservation "bdver3_ssemuladd" 6
666			 (and (eq_attr "cpu" "bdver3")
667			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
668				   (eq_attr "memory" "none")))
669			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
670(define_insn_reservation "bdver3_sseimul_load" 8
671			 (and (eq_attr "cpu" "bdver3")
672			      (and (eq_attr "type" "sseimul")
673				   (eq_attr "memory" "load")))
674			 "bdver3-direct,bdver3-fpload,bdver3-fmma")
675(define_insn_reservation "bdver3_sseimul" 4
676			 (and (eq_attr "cpu" "bdver3")
677			      (and (eq_attr "type" "sseimul")
678				   (eq_attr "memory" "none")))
679			 "bdver3-direct,bdver3-fpsched,bdver3-fmma")
680(define_insn_reservation "bdver3_sseiadd_load" 6
681			 (and (eq_attr "cpu" "bdver3")
682			      (and (eq_attr "type" "sseiadd")
683				   (eq_attr "memory" "load")))
684			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
685(define_insn_reservation "bdver3_sseiadd" 2
686			 (and (eq_attr "cpu" "bdver3")
687			      (and (eq_attr "type" "sseiadd")
688				   (eq_attr "memory" "none")))
689			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
690
691;; SSE DIV: no throughput information (assume same as amdfam10).
692(define_insn_reservation "bdver3_ssediv_double_load_256" 27
693			 (and (eq_attr "cpu" "bdver3")
694			      (and (eq_attr "type" "ssediv")
695				   (and (eq_attr "mode" "V4DF")
696				        (eq_attr "memory" "load"))))
697			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
698(define_insn_reservation "bdver3_ssediv_double_256" 27
699			 (and (eq_attr "cpu" "bdver3")
700			      (and (eq_attr "type" "ssediv")
701				   (and (eq_attr "mode" "V4DF")
702				        (eq_attr "memory" "none"))))
703			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
704(define_insn_reservation "bdver3_ssediv_single_load_256" 27
705			 (and (eq_attr "cpu" "bdver3")
706			      (and (eq_attr "type" "ssediv")
707				   (and (eq_attr "mode" "V8SF")
708				        (eq_attr "memory" "load"))))
709			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
710(define_insn_reservation "bdver3_ssediv_single_256" 24
711			 (and (eq_attr "cpu" "bdver3")
712			      (and (eq_attr "type" "ssediv")
713				   (and (eq_attr "mode" "V8SF")
714				        (eq_attr "memory" "none"))))
715			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
716(define_insn_reservation "bdver3_ssediv_double_load" 27
717			 (and (eq_attr "cpu" "bdver3")
718			      (and (eq_attr "type" "ssediv")
719				   (and (eq_attr "mode" "DF,V2DF")
720					(eq_attr "memory" "load"))))
721			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
722(define_insn_reservation "bdver3_ssediv_double" 27
723			 (and (eq_attr "cpu" "bdver3")
724			      (and (eq_attr "type" "ssediv")
725				   (and (eq_attr "mode" "DF,V2DF")
726					(eq_attr "memory" "none"))))
727			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
728(define_insn_reservation "bdver3_ssediv_single_load" 27
729			 (and (eq_attr "cpu" "bdver3")
730			      (and (eq_attr "type" "ssediv")
731				   (and (eq_attr "mode" "SF,V4SF")
732					(eq_attr "memory" "load"))))
733			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
734(define_insn_reservation "bdver3_ssediv_single" 24
735			 (and (eq_attr "cpu" "bdver3")
736			      (and (eq_attr "type" "ssediv")
737				   (and (eq_attr "mode" "SF,V4SF")
738					(eq_attr "memory" "none"))))
739			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
740
741(define_insn_reservation "bdver3_sseins" 3
742                         (and (eq_attr "cpu" "bdver3")
743                              (and (eq_attr "type" "sseins")
744                                   (eq_attr "mode" "TI")))
745                         "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
746
747