1;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; AMD bdver3 and bdver4 Scheduling
20;;
21;; The bdver3 and bdver4 contains three pipelined FP units and two integer
22;; units. ;; Fetching and decoding logic is different from previous fam15
23;; processors. Fetching is done every two cycles rather than every cycle
24;; and two decode units are available. The decode units therefore decode
25;; four instructions in two cycles.
26;;
27;; The load/store queue unit is not attached to the schedulers but
28;; communicates with all the execution units separately instead.
29;;
30;; bdver3 and bdver4 belong to fam15 processors. We use the same insn
31;; attribute that was used for bdver1 decoding scheme.
32
33(define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu")
34
35(define_cpu_unit "bdver3-decode0" "bdver3")
36(define_cpu_unit "bdver3-decode1" "bdver3")
37(define_cpu_unit "bdver3-decode2" "bdver3")
38(define_cpu_unit "bdver3-decode3" "bdver3")
39
40;; Double decoded instructions take two cycles whereas
41;; direct instructions take one cycle.
42;; Vectorpath instructions are single issue instructions.
43;; So, we engage all units vector instructions.
44(define_reservation "bdver3-vector" "bdver3-decode0+bdver3-decode1+bdver3-decode2+bdver3-decode3")
45
46;; Direct instructions can be issued to any of the four decoders
47(define_reservation "bdver3-direct" "(bdver3-decode0|bdver3-decode1|bdver3-decode2|bdver3-decode3)")
48
49;; Double instructions take two cycles to decode.
50(define_reservation "bdver3-double" "(bdver3-decode0,bdver3-decode0)|
51               (bdver3-decode1,bdver3-decode1)| (bdver3-decode2,bdver3-decode2)|
52               (bdver3-decode3,bdver3-decode3)")
53
54(define_cpu_unit "bdver3-ieu0" "bdver3_ieu")
55(define_cpu_unit "bdver3-ieu1" "bdver3_ieu")
56(define_reservation "bdver3-ieu" "(bdver3-ieu0|bdver3-ieu1)")
57
58(define_cpu_unit "bdver3-agu0" "bdver3_agu")
59(define_cpu_unit "bdver3-agu1" "bdver3_agu")
60(define_reservation "bdver3-agu" "(bdver3-agu0|bdver3-agu1)")
61
62(define_cpu_unit "bdver3-load0" "bdver3_load")
63(define_cpu_unit "bdver3-load1" "bdver3_load")
64(define_reservation "bdver3-load" "bdver3-agu,
65				   (bdver3-load0|bdver3-load1),nothing")
66;; 128bit SSE instructions issue two loads at once.
67(define_reservation "bdver3-load2" "bdver3-agu,
68				   (bdver3-load0+bdver3-load1),nothing")
69
70(define_reservation "bdver3-store" "(bdver3-load0 | bdver3-load1)")
71;; 128bit SSE instructions issue two stores at once.
72(define_reservation "bdver3-store2" "(bdver3-load0+bdver3-load1)")
73
74;; vectorpath (microcoded) instructions are single issue instructions.
75;; So, they occupy all the integer units.
76(define_reservation "bdver3-ivector" "bdver3-ieu0+bdver3-ieu1+
77                                      bdver3-agu0+bdver3-agu1+
78                                      bdver3-load0+bdver3-load1")
79
80(define_reservation "bdver3-fpsched" "nothing,nothing,nothing")
81
82;; The floating point loads.
83(define_reservation "bdver3-fpload" "(bdver3-fpsched + bdver3-load)")
84(define_reservation "bdver3-fpload2" "(bdver3-fpsched + bdver3-load2)")
85
86;; Three FP units.
87(define_cpu_unit "bdver3-ffma0" "bdver3_fp")
88(define_cpu_unit "bdver3-ffma1" "bdver3_fp")
89(define_cpu_unit "bdver3-fpsto" "bdver3_fp")
90
91(define_reservation "bdver3-fvector" "bdver3-ffma0+bdver3-ffma1+
92                                      bdver3-fpsto+bdver3-load0+
93                                      bdver3-load1")
94
95(define_reservation "bdver3-ffma"     "(bdver3-ffma0 | bdver3-ffma1)")
96(define_reservation "bdver3-fcvt"     "bdver3-ffma0")
97(define_reservation "bdver3-fmma"     "bdver3-ffma0")
98(define_reservation "bdver3-fxbar"    "bdver3-ffma1")
99(define_reservation "bdver3-fmal"     "(bdver3-ffma0 | bdver3-fpsto)")
100(define_reservation "bdver3-fsto"     "bdver3-fpsto")
101(define_reservation "bdver3-fpshuf"    "bdver3-fpsto")
102
103;; Jump instructions are executed in the branch unit completely transparent to us.
104(define_insn_reservation "bdver3_call" 2
105			 (and (eq_attr "cpu" "bdver3,bdver4")
106			      (eq_attr "type" "call,callv"))
107			 "bdver3-double,(bdver3-agu | bdver3-ieu),nothing")
108;; PUSH mem is double path.
109(define_insn_reservation "bdver3_push" 1
110			 (and (eq_attr "cpu" "bdver3,bdver4")
111			      (eq_attr "type" "push"))
112			 "bdver3-direct,bdver3-ieu,bdver3-store")
113;; POP r16/mem are double path.
114(define_insn_reservation "bdver3_pop" 1
115                         (and (eq_attr "cpu" "bdver3,bdver4")
116                              (eq_attr "type" "pop"))
117                         "bdver3-direct,bdver3-ivector")
118;; LEAVE no latency info so far, assume same with amdfam10.
119(define_insn_reservation "bdver3_leave" 3
120                         (and (eq_attr "cpu" "bdver3,bdver4")
121                              (eq_attr "type" "leave"))
122                         "bdver3-vector,bdver3-ivector")
123;; LEA executes in AGU unit with 1 cycle latency on BDVER3.
124(define_insn_reservation "bdver3_lea" 1
125			 (and (eq_attr "cpu" "bdver3,bdver4")
126			      (eq_attr "type" "lea"))
127			 "bdver3-direct,bdver3-ieu")
128;; MUL executes in special multiplier unit attached to IEU1.
129(define_insn_reservation "bdver3_imul_DI" 6
130			 (and (eq_attr "cpu" "bdver3,bdver4")
131			      (and (eq_attr "type" "imul")
132				   (and (eq_attr "mode" "DI")
133					(eq_attr "memory" "none,unknown"))))
134			 "bdver3-direct,bdver3-ieu1")
135(define_insn_reservation "bdver3_imul" 4
136			 (and (eq_attr "cpu" "bdver3,bdver4")
137			      (and (eq_attr "type" "imul")
138				   (eq_attr "memory" "none,unknown")))
139			 "bdver3-direct,bdver3-ieu1")
140(define_insn_reservation "bdver3_imul_mem_DI" 10
141			 (and (eq_attr "cpu" "bdver3,bdver4")
142			      (and (eq_attr "type" "imul")
143				   (and (eq_attr "mode" "DI")
144					(eq_attr "memory" "load,both"))))
145			 "bdver3-direct,bdver3-load,bdver3-ieu1")
146(define_insn_reservation "bdver3_imul_mem" 8
147			 (and (eq_attr "cpu" "bdver3,bdver4")
148			      (and (eq_attr "type" "imul")
149				   (eq_attr "memory" "load,both")))
150			 "bdver3-direct,bdver3-load,bdver3-ieu1")
151
152(define_insn_reservation "bdver3_str" 6
153			 (and (eq_attr "cpu" "bdver3,bdver4")
154			      (and (eq_attr "type" "str")
155				   (eq_attr "memory" "load,both,store")))
156			 "bdver3-vector,bdver3-load,bdver3-ivector")
157
158;; Integer instructions.
159(define_insn_reservation "bdver3_idirect" 1
160			 (and (eq_attr "cpu" "bdver3,bdver4")
161			      (and (eq_attr "bdver1_decode" "direct")
162				   (and (eq_attr "unit" "integer,unknown")
163					(eq_attr "memory" "none,unknown"))))
164			 "bdver3-direct,(bdver3-ieu|bdver3-agu)")
165(define_insn_reservation "bdver3_ivector" 2
166			 (and (eq_attr "cpu" "bdver3,bdver4")
167			      (and (eq_attr "bdver1_decode" "vector")
168				   (and (eq_attr "unit" "integer,unknown")
169					(eq_attr "memory" "none,unknown"))))
170			 "bdver3-vector,bdver3-ivector")
171(define_insn_reservation "bdver3_idirect_loadmov" 4
172			 (and (eq_attr "cpu" "bdver3,bdver4")
173			      (and (eq_attr "type" "imov")
174				   (eq_attr "memory" "load")))
175			 "bdver3-direct,bdver3-load")
176(define_insn_reservation "bdver3_idirect_load" 5
177			 (and (eq_attr "cpu" "bdver3,bdver4")
178			      (and (eq_attr "bdver1_decode" "direct")
179				   (and (eq_attr "unit" "integer,unknown")
180					(eq_attr "memory" "load"))))
181			 "bdver3-direct,bdver3-load,bdver3-ieu")
182(define_insn_reservation "bdver3_idirect_movstore" 5
183			 (and (eq_attr "cpu" "bdver3,bdver4")
184			      (and (eq_attr "type" "imov")
185				   (eq_attr "memory" "store")))
186			 "bdver3-direct,bdver3-ieu,bdver3-store")
187(define_insn_reservation "bdver3_idirect_both" 4
188			 (and (eq_attr "cpu" "bdver3,bdver4")
189			      (and (eq_attr "bdver1_decode" "direct")
190				   (and (eq_attr "unit" "integer,unknown")
191					(eq_attr "memory" "both"))))
192			 "bdver3-direct,bdver3-load,
193			  bdver3-ieu,bdver3-store,
194			  bdver3-store")
195(define_insn_reservation "bdver3_idirect_store" 4
196			 (and (eq_attr "cpu" "bdver3,bdver4")
197			      (and (eq_attr "bdver1_decode" "direct")
198				   (and (eq_attr "unit" "integer,unknown")
199					(eq_attr "memory" "store"))))
200			 "bdver3-direct,(bdver3-ieu+bdver3-agu),
201			  bdver3-store")
202;; BDVER3 floating point units.
203(define_insn_reservation "bdver3_fldxf" 13
204			 (and (eq_attr "cpu" "bdver3,bdver4")
205			      (and (eq_attr "type" "fmov")
206				   (and (eq_attr "memory" "load")
207					(eq_attr "mode" "XF"))))
208			 "bdver3-vector,bdver3-fpload2,bdver3-fvector*9")
209(define_insn_reservation "bdver3_fld" 2
210			 (and (eq_attr "cpu" "bdver3,bdver4")
211			      (and (eq_attr "type" "fmov")
212				   (eq_attr "memory" "load")))
213			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
214(define_insn_reservation "bdver3_fstxf" 4
215			 (and (eq_attr "cpu" "bdver3,bdver4")
216			      (and (eq_attr "type" "fmov")
217				   (and (eq_attr "memory" "store,both")
218					(eq_attr "mode" "XF"))))
219			 "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))")
220(define_insn_reservation "bdver3_fst" 2
221			 (and (eq_attr "cpu" "bdver3,bdver4")
222			      (and (eq_attr "type" "fmov")
223				   (eq_attr "memory" "store,both")))
224			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
225(define_insn_reservation "bdver3_fist" 2
226			 (and (eq_attr "cpu" "bdver3,bdver4")
227			      (eq_attr "type" "fistp,fisttp"))
228			 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)")
229(define_insn_reservation "bdver3_fmov_bdver3" 2
230			 (and (eq_attr "cpu" "bdver3,bdver4")
231			      (eq_attr "type" "fmov"))
232			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
233(define_insn_reservation "bdver3_fadd_load" 10
234			 (and (eq_attr "cpu" "bdver3,bdver4")
235			      (and (eq_attr "type" "fop")
236				   (eq_attr "memory" "load")))
237			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
238(define_insn_reservation "bdver3_fadd" 6
239			 (and (eq_attr "cpu" "bdver3,bdver4")
240			      (eq_attr "type" "fop"))
241			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
242(define_insn_reservation "bdver3_fmul_load" 6
243			 (and (eq_attr "cpu" "bdver3,bdver4")
244			      (and (eq_attr "type" "fmul")
245				   (eq_attr "memory" "load")))
246			 "bdver3-double,bdver3-fpload,bdver3-ffma")
247(define_insn_reservation "bdver3_fmul" 6
248			 (and (eq_attr "cpu" "bdver3,bdver4")
249			      (eq_attr "type" "fmul"))
250			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
251(define_insn_reservation "bdver3_fsgn" 2
252			 (and (eq_attr "cpu" "bdver3,bdver4")
253			      (eq_attr "type" "fsgn"))
254			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
255(define_insn_reservation "bdver3_fdiv_load" 42
256			 (and (eq_attr "cpu" "bdver3,bdver4")
257			      (and (eq_attr "type" "fdiv")
258				   (eq_attr "memory" "load")))
259			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
260(define_insn_reservation "bdver3_fdiv" 42
261			 (and (eq_attr "cpu" "bdver3,bdver4")
262			      (eq_attr "type" "fdiv"))
263			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
264(define_insn_reservation "bdver3_fpspc_load" 143
265			 (and (eq_attr "cpu" "bdver3,bdver4")
266			      (and (eq_attr "type" "fpspc")
267				   (eq_attr "memory" "load")))
268			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
269(define_insn_reservation "bdver3_fcmov_load" 17
270			 (and (eq_attr "cpu" "bdver3,bdver4")
271			      (and (eq_attr "type" "fcmov")
272				   (eq_attr "memory" "load")))
273			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
274(define_insn_reservation "bdver3_fcmov" 15
275			 (and (eq_attr "cpu" "bdver3,bdver4")
276			      (eq_attr "type" "fcmov"))
277			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
278(define_insn_reservation "bdver3_fcomi_load" 6
279			 (and (eq_attr "cpu" "bdver3,bdver4")
280			      (and (eq_attr "type" "fcmp")
281				   (and (eq_attr "bdver1_decode" "double")
282					(eq_attr "memory" "load"))))
283			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
284(define_insn_reservation "bdver3_fcomi" 2
285			 (and (eq_attr "cpu" "bdver3,bdver4")
286			      (and (eq_attr "bdver1_decode" "double")
287				   (eq_attr "type" "fcmp")))
288			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
289(define_insn_reservation "bdver3_fcom_load" 6
290			 (and (eq_attr "cpu" "bdver3,bdver4")
291			      (and (eq_attr "type" "fcmp")
292				   (eq_attr "memory" "load")))
293			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
294(define_insn_reservation "bdver3_fcom" 2
295			 (and (eq_attr "cpu" "bdver3,bdver4")
296			      (eq_attr "type" "fcmp"))
297			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
298(define_insn_reservation "bdver3_fxch" 2
299			 (and (eq_attr "cpu" "bdver3,bdver4")
300			      (eq_attr "type" "fxch"))
301			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
302
303;; SSE loads.
304(define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4
305			 (and (eq_attr "cpu" "bdver3,bdver4")
306			      (and (eq_attr "type" "ssemov")
307				   (and (eq_attr "prefix" "vex")
308					(and (eq_attr "movu" "1")
309					     (and (eq_attr "mode" "V4SF,V2DF")
310						  (eq_attr "memory" "load"))))))
311			 "bdver3-direct,bdver3-fpload")
312(define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5
313			 (and (eq_attr "cpu" "bdver3,bdver4")
314			      (and (eq_attr "type" "ssemov")
315				   (and (eq_attr "movu" "1")
316				        (and (eq_attr "mode" "V8SF,V4DF")
317				             (eq_attr "memory" "load")))))
318			 "bdver3-double,bdver3-fpload")
319(define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4
320			 (and (eq_attr "cpu" "bdver3,bdver4")
321			      (and (eq_attr "type" "ssemov")
322				   (and (eq_attr "movu" "1")
323				        (and (eq_attr "mode" "V4SF,V2DF")
324				             (eq_attr "memory" "load")))))
325			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
326(define_insn_reservation "bdver3_ssevector_avx128_load" 4
327			 (and (eq_attr "cpu" "bdver3,bdver4")
328			      (and (eq_attr "type" "ssemov")
329				   (and (eq_attr "prefix" "vex")
330				        (and (eq_attr "mode" "V4SF,V2DF,TI")
331				             (eq_attr "memory" "load")))))
332			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
333(define_insn_reservation "bdver3_ssevector_avx256_load" 5
334			 (and (eq_attr "cpu" "bdver3,bdver4")
335			      (and (eq_attr "type" "ssemov")
336				   (and (eq_attr "mode" "V8SF,V4DF,OI")
337				        (eq_attr "memory" "load"))))
338			 "bdver3-double,bdver3-fpload,bdver3-fmal")
339(define_insn_reservation "bdver3_ssevector_sse128_load" 4
340			 (and (eq_attr "cpu" "bdver3,bdver4")
341			      (and (eq_attr "type" "ssemov")
342				   (and (eq_attr "mode" "V4SF,V2DF,TI")
343				        (eq_attr "memory" "load"))))
344			 "bdver3-direct,bdver3-fpload")
345(define_insn_reservation "bdver3_ssescalar_movq_load" 4
346			 (and (eq_attr "cpu" "bdver3,bdver4")
347			      (and (eq_attr "type" "ssemov")
348				   (and (eq_attr "mode" "DI")
349				        (eq_attr "memory" "load"))))
350			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
351(define_insn_reservation "bdver3_ssescalar_vmovss_load" 4
352			 (and (eq_attr "cpu" "bdver3,bdver4")
353			      (and (eq_attr "type" "ssemov")
354				   (and (eq_attr "prefix" "vex")
355				        (and (eq_attr "mode" "SF")
356				             (eq_attr "memory" "load")))))
357			 "bdver3-direct,bdver3-fpload")
358(define_insn_reservation "bdver3_ssescalar_sse128_load" 4
359			 (and (eq_attr "cpu" "bdver3,bdver4")
360			      (and (eq_attr "type" "ssemov")
361				   (and (eq_attr "mode" "SF,DF")
362				        (eq_attr "memory" "load"))))
363			 "bdver3-direct,bdver3-fpload, bdver3-ffma")
364(define_insn_reservation "bdver3_mmxsse_load" 4
365			 (and (eq_attr "cpu" "bdver3,bdver4")
366			      (and (eq_attr "type" "mmxmov,ssemov")
367				   (eq_attr "memory" "load")))
368			 "bdver3-direct,bdver3-fpload, bdver3-fmal")
369
370;; SSE stores.
371(define_insn_reservation "bdver3_sse_store_avx256" 5
372			 (and (eq_attr "cpu" "bdver3,bdver4")
373			      (and (eq_attr "type" "ssemov")
374				   (and (eq_attr "mode" "V8SF,V4DF,OI")
375					(eq_attr "memory" "store,both"))))
376			 "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
377(define_insn_reservation "bdver3_sse_store" 4
378			 (and (eq_attr "cpu" "bdver3,bdver4")
379			      (and (eq_attr "type" "ssemov")
380				   (and (eq_attr "mode" "V4SF,V2DF,TI")
381					(eq_attr "memory" "store,both"))))
382			 "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)")
383(define_insn_reservation "bdver3_mmxsse_store_short" 4
384			 (and (eq_attr "cpu" "bdver3,bdver4")
385			      (and (eq_attr "type" "mmxmov,ssemov")
386				   (eq_attr "memory" "store,both")))
387			 "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)")
388
389;; Register moves.
390(define_insn_reservation "bdver3_ssevector_avx256" 3
391			 (and (eq_attr "cpu" "bdver3,bdver4")
392			      (and (eq_attr "type" "ssemov")
393				   (and (eq_attr "mode" "V8SF,V4DF,OI")
394					(eq_attr "memory" "none"))))
395			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
396(define_insn_reservation "bdver3_movss_movsd" 2
397			 (and (eq_attr "cpu" "bdver3,bdver4")
398			      (and (eq_attr "type" "ssemov")
399				   (and (eq_attr "mode" "SF,DF")
400                                        (eq_attr "memory" "none"))))
401			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
402(define_insn_reservation "bdver3_mmxssemov" 2
403			 (and (eq_attr "cpu" "bdver3,bdver4")
404			      (and (eq_attr "type" "mmxmov,ssemov")
405				   (eq_attr "memory" "none")))
406			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
407;; SSE logs.
408(define_insn_reservation "bdver3_sselog_load_256" 7
409			 (and (eq_attr "cpu" "bdver3,bdver4")
410			      (and (eq_attr "type" "sselog,sselog1")
411				   (and (eq_attr "mode" "V8SF")
412				   (eq_attr "memory" "load"))))
413			 "bdver3-double,bdver3-fpload,bdver3-fmal")
414(define_insn_reservation "bdver3_sselog_256" 3
415			 (and (eq_attr "cpu" "bdver3,bdver4")
416			      (and (eq_attr "type" "sselog,sselog1")
417                                   (eq_attr "mode" "V8SF")))
418			 "bdver3-double,bdver3-fpsched,bdver3-fmal")
419(define_insn_reservation "bdver3_sselog_load" 6
420			 (and (eq_attr "cpu" "bdver3,bdver4")
421			      (and (eq_attr "type" "sselog,sselog1")
422				   (eq_attr "memory" "load")))
423			 "bdver3-direct,bdver3-fpload,bdver3-fxbar")
424(define_insn_reservation "bdver3_sselog" 2
425			 (and (eq_attr "cpu" "bdver3,bdver4")
426			      (eq_attr "type" "sselog,sselog1"))
427			 "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
428
429;; SSE Shuffles
430(define_insn_reservation "bdver3_sseshuf_load_256" 7
431                         (and (eq_attr "cpu" "bdver3,bdver4")
432                              (and (eq_attr "type" "sseshuf,sseshuf1")
433                                   (and (eq_attr "mode" "V8SF")
434                                   (eq_attr "memory" "load"))))
435                         "bdver3-double,bdver3-fpload,bdver3-fpshuf")
436(define_insn_reservation "bdver3_sseshuf_load" 6
437                         (and (eq_attr "cpu" "bdver3,bdver4")
438                              (and (eq_attr "type" "sseshuf,sseshuf1")
439                                   (eq_attr "memory" "load")))
440                         "bdver3-direct,bdver3-fpload,bdver3-fpshuf")
441
442(define_insn_reservation "bdver3_sseshuf_256" 3
443                         (and (eq_attr "cpu" "bdver3,bdver4")
444                              (and (eq_attr "type" "sseshuf")
445                                   (eq_attr "mode" "V8SF")))
446                         "bdver3-double,bdver3-fpsched,bdver3-fpshuf")
447(define_insn_reservation "bdver3_sseshuf" 2
448                         (and (eq_attr "cpu" "bdver3,bdver4")
449                              (eq_attr "type" "sseshuf,sseshuf1"))
450                         "bdver3-direct,bdver3-fpsched,bdver3-fpshuf")
451
452;; PCMP actually executes in FMAL.
453(define_insn_reservation "bdver3_ssecmp_load" 6
454			 (and (eq_attr "cpu" "bdver3,bdver4")
455			      (and (eq_attr "type" "ssecmp")
456				   (eq_attr "memory" "load")))
457			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
458(define_insn_reservation "bdver3_ssecmp" 2
459			 (and (eq_attr "cpu" "bdver3,bdver4")
460			      (eq_attr "type" "ssecmp"))
461			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
462(define_insn_reservation "bdver3_ssecomi_load" 6
463			 (and (eq_attr "cpu" "bdver3,bdver4")
464			      (and (eq_attr "type" "ssecomi")
465				   (eq_attr "memory" "load")))
466			 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)")
467(define_insn_reservation "bdver3_ssecomi" 2
468			 (and (eq_attr "cpu" "bdver3,bdver4")
469			      (eq_attr "type" "ssecomi"))
470			 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)")
471
472;; Conversions behaves very irregularly and the scheduling is critical here.
473;; Take each instruction separately.
474
475;; 256 bit conversion.
476(define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8
477			 (and (eq_attr "cpu" "bdver3,bdver4")
478			      (and (eq_attr "type" "ssecvt")
479				   (and (eq_attr "memory" "load")
480					(ior (ior (match_operand:V4DF 0 "register_operand")
481					          (ior (match_operand:V8SF 0 "register_operand")
482						       (match_operand:V8SI 0 "register_operand")))
483					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
484						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
485						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
486			 "bdver3-vector,bdver3-fpload,bdver3-fvector")
487(define_insn_reservation "bdver3_vcvtX2Y_avx256" 4
488			 (and (eq_attr "cpu" "bdver3,bdver4")
489			      (and (eq_attr "type" "ssecvt")
490				   (and (eq_attr "memory" "none")
491					(ior (ior (match_operand:V4DF 0 "register_operand")
492					          (ior (match_operand:V8SF 0 "register_operand")
493						       (match_operand:V8SI 0 "register_operand")))
494					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
495						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
496						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
497			 "bdver3-vector,bdver3-fpsched,bdver3-fvector")
498;; CVTSS2SD, CVTSD2SS.
499(define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8
500			 (and (eq_attr "cpu" "bdver3,bdver4")
501			      (and (eq_attr "type" "ssecvt")
502				   (and (eq_attr "mode" "SF,DF")
503					(eq_attr "memory" "load"))))
504			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
505(define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4
506			 (and (eq_attr "cpu" "bdver3,bdver4")
507			      (and (eq_attr "type" "ssecvt")
508				   (and (eq_attr "mode" "SF,DF")
509					(eq_attr "memory" "none"))))
510			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
511;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
512(define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8
513			 (and (eq_attr "cpu" "bdver3,bdver4")
514			      (and (eq_attr "type" "sseicvt")
515				   (and (eq_attr "mode" "SF,DF")
516					(eq_attr "memory" "load"))))
517			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
518(define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4
519			 (and (eq_attr "cpu" "bdver3,bdver4")
520			      (and (eq_attr "type" "sseicvt")
521				   (and (eq_attr "mode" "SF,DF")
522					(eq_attr "memory" "none"))))
523			 "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)")
524;; CVTPD2PS.
525(define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8
526			 (and (eq_attr "cpu" "bdver3,bdver4")
527			      (and (eq_attr "type" "ssecvt")
528				   (and (eq_attr "memory" "load")
529                                        (and (match_operand:V4SF 0 "register_operand")
530					     (match_operand:V2DF 1 "nonimmediate_operand")))))
531			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
532(define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4
533			 (and (eq_attr "cpu" "bdver3,bdver4")
534			      (and (eq_attr "type" "ssecvt")
535				   (and (eq_attr "memory" "none")
536                                        (and (match_operand:V4SF 0 "register_operand")
537					     (match_operand:V2DF 1 "nonimmediate_operand")))))
538			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
539;; CVTPI2PS, CVTDQ2PS.
540(define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8
541			 (and (eq_attr "cpu" "bdver3,bdver4")
542			      (and (eq_attr "type" "ssecvt")
543				   (and (eq_attr "memory" "load")
544                                        (and (match_operand:V4SF 0 "register_operand")
545					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
546					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
547			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
548(define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4
549			 (and (eq_attr "cpu" "bdver3,bdver4")
550			      (and (eq_attr "type" "ssecvt")
551				   (and (eq_attr "memory" "none")
552                                        (and (match_operand:V4SF 0 "register_operand")
553					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
554					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
555			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
556;; CVTDQ2PD.
557(define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8
558			 (and (eq_attr "cpu" "bdver3,bdver4")
559			      (and (eq_attr "type" "ssecvt")
560				   (and (eq_attr "memory" "load")
561                                        (and (match_operand:V2DF 0 "register_operand")
562					     (match_operand:V4SI 1 "nonimmediate_operand")))))
563			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
564(define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4
565			 (and (eq_attr "cpu" "bdver3,bdver4")
566			      (and (eq_attr "type" "ssecvt")
567				   (and (eq_attr "memory" "none")
568                                        (and (match_operand:V2DF 0 "register_operand")
569					     (match_operand:V4SI 1 "nonimmediate_operand")))))
570			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
571;; CVTPS2PD, CVTPI2PD.
572(define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6
573			 (and (eq_attr "cpu" "bdver3,bdver4")
574			      (and (eq_attr "type" "ssecvt")
575				   (and (eq_attr "memory" "load")
576                                        (and (match_operand:V2DF 0 "register_operand")
577					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
578					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
579			 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)")
580(define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2
581			 (and (eq_attr "cpu" "bdver3,bdver4")
582			      (and (eq_attr "type" "ssecvt")
583				   (and (eq_attr "memory" "load")
584                                        (and (match_operand:V2DF 0 "register_operand")
585					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
586					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
587			 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)")
588;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
589(define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8
590			 (and (eq_attr "cpu" "bdver3,bdver4")
591			      (and (eq_attr "type" "sseicvt")
592				   (and (eq_attr "mode" "SI,DI")
593					(eq_attr "memory" "load"))))
594			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)")
595(define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4
596			 (and (eq_attr "cpu" "bdver3,bdver4")
597			      (and (eq_attr "type" "sseicvt")
598				   (and (eq_attr "mode" "SI,DI")
599					(eq_attr "memory" "none"))))
600			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)")
601;; CVTPD2PI, CVTTPD2PI.
602(define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8
603			 (and (eq_attr "cpu" "bdver3,bdver4")
604			      (and (eq_attr "type" "ssecvt")
605				   (and (eq_attr "memory" "load")
606				        (and (match_operand:V2DF 1 "nonimmediate_operand")
607					     (match_operand:V2SI 0 "register_operand")))))
608			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
609(define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4
610			 (and (eq_attr "cpu" "bdver3,bdver4")
611			      (and (eq_attr "type" "ssecvt")
612				   (and (eq_attr "memory" "none")
613				        (and (match_operand:V2DF 1 "nonimmediate_operand")
614					     (match_operand:V2SI 0 "register_operand")))))
615			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
616;; CVTPD2DQ, CVTTPD2DQ.
617(define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6
618			 (and (eq_attr "cpu" "bdver3,bdver4")
619			      (and (eq_attr "type" "ssecvt")
620				   (and (eq_attr "memory" "load")
621				        (and (match_operand:V2DF 1 "nonimmediate_operand")
622					     (match_operand:V4SI 0 "register_operand")))))
623			 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)")
624(define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2
625			 (and (eq_attr "cpu" "bdver3,bdver4")
626			      (and (eq_attr "type" "ssecvt")
627				   (and (eq_attr "memory" "none")
628				        (and (match_operand:V2DF 1 "nonimmediate_operand")
629					     (match_operand:V4SI 0 "register_operand")))))
630			 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)")
631;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
632(define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8
633			 (and (eq_attr "cpu" "bdver3,bdver4")
634			      (and (eq_attr "type" "ssecvt")
635                                   (and (eq_attr "memory" "load")
636				        (and (match_operand:V4SF 1 "nonimmediate_operand")
637				             (ior (match_operand: V2SI 0 "register_operand")
638						  (match_operand: V4SI 0 "register_operand"))))))
639			 "bdver3-direct,bdver3-fpload,bdver3-fcvt")
640(define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4
641			 (and (eq_attr "cpu" "bdver3,bdver4")
642			      (and (eq_attr "type" "ssecvt")
643				   (and (eq_attr "memory" "none")
644				        (and (match_operand:V4SF 1 "nonimmediate_operand")
645				             (ior (match_operand: V2SI 0 "register_operand")
646						  (match_operand: V4SI 0 "register_operand"))))))
647			 "bdver3-direct,bdver3-fpsched,bdver3-fcvt")
648
649;; SSE MUL, ADD, and MULADD.
650(define_insn_reservation "bdver3_ssemuladd_load_256" 11
651			 (and (eq_attr "cpu" "bdver3,bdver4")
652			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
653				   (and (eq_attr "mode" "V8SF,V4DF")
654					(eq_attr "memory" "load"))))
655			 "bdver3-double,bdver3-fpload,bdver3-ffma")
656(define_insn_reservation "bdver3_ssemuladd_256" 7
657			 (and (eq_attr "cpu" "bdver3,bdver4")
658			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
659				   (and (eq_attr "mode" "V8SF,V4DF")
660					(eq_attr "memory" "none"))))
661			 "bdver3-double,bdver3-fpsched,bdver3-ffma")
662(define_insn_reservation "bdver3_ssemuladd_load" 10
663			 (and (eq_attr "cpu" "bdver3,bdver4")
664			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
665				   (eq_attr "memory" "load")))
666			 "bdver3-direct,bdver3-fpload,bdver3-ffma")
667(define_insn_reservation "bdver3_ssemuladd" 6
668			 (and (eq_attr "cpu" "bdver3,bdver4")
669			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
670				   (eq_attr "memory" "none")))
671			 "bdver3-direct,bdver3-fpsched,bdver3-ffma")
672(define_insn_reservation "bdver3_sseimul_load" 8
673			 (and (eq_attr "cpu" "bdver3,bdver4")
674			      (and (eq_attr "type" "sseimul")
675				   (eq_attr "memory" "load")))
676			 "bdver3-direct,bdver3-fpload,bdver3-fmma")
677(define_insn_reservation "bdver3_sseimul" 4
678			 (and (eq_attr "cpu" "bdver3,bdver4")
679			      (and (eq_attr "type" "sseimul")
680				   (eq_attr "memory" "none")))
681			 "bdver3-direct,bdver3-fpsched,bdver3-fmma")
682(define_insn_reservation "bdver3_sseiadd_load" 6
683			 (and (eq_attr "cpu" "bdver3,bdver4")
684			      (and (eq_attr "type" "sseiadd")
685				   (eq_attr "memory" "load")))
686			 "bdver3-direct,bdver3-fpload,bdver3-fmal")
687(define_insn_reservation "bdver3_sseiadd" 2
688			 (and (eq_attr "cpu" "bdver3,bdver4")
689			      (and (eq_attr "type" "sseiadd")
690				   (eq_attr "memory" "none")))
691			 "bdver3-direct,bdver3-fpsched,bdver3-fmal")
692
693;; SSE DIV: no throughput information (assume same as amdfam10).
694(define_insn_reservation "bdver3_ssediv_double_load_256" 27
695			 (and (eq_attr "cpu" "bdver3,bdver4")
696			      (and (eq_attr "type" "ssediv")
697				   (and (eq_attr "mode" "V4DF")
698				        (eq_attr "memory" "load"))))
699			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
700(define_insn_reservation "bdver3_ssediv_double_256" 27
701			 (and (eq_attr "cpu" "bdver3,bdver4")
702			      (and (eq_attr "type" "ssediv")
703				   (and (eq_attr "mode" "V4DF")
704				        (eq_attr "memory" "none"))))
705			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
706(define_insn_reservation "bdver3_ssediv_single_load_256" 27
707			 (and (eq_attr "cpu" "bdver3,bdver4")
708			      (and (eq_attr "type" "ssediv")
709				   (and (eq_attr "mode" "V8SF")
710				        (eq_attr "memory" "load"))))
711			 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
712(define_insn_reservation "bdver3_ssediv_single_256" 24
713			 (and (eq_attr "cpu" "bdver3,bdver4")
714			      (and (eq_attr "type" "ssediv")
715				   (and (eq_attr "mode" "V8SF")
716				        (eq_attr "memory" "none"))))
717			 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
718(define_insn_reservation "bdver3_ssediv_double_load" 27
719			 (and (eq_attr "cpu" "bdver3,bdver4")
720			      (and (eq_attr "type" "ssediv")
721				   (and (eq_attr "mode" "DF,V2DF")
722					(eq_attr "memory" "load"))))
723			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
724(define_insn_reservation "bdver3_ssediv_double" 27
725			 (and (eq_attr "cpu" "bdver3,bdver4")
726			      (and (eq_attr "type" "ssediv")
727				   (and (eq_attr "mode" "DF,V2DF")
728					(eq_attr "memory" "none"))))
729			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
730(define_insn_reservation "bdver3_ssediv_single_load" 27
731			 (and (eq_attr "cpu" "bdver3,bdver4")
732			      (and (eq_attr "type" "ssediv")
733				   (and (eq_attr "mode" "SF,V4SF")
734					(eq_attr "memory" "load"))))
735			 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)")
736(define_insn_reservation "bdver3_ssediv_single" 24
737			 (and (eq_attr "cpu" "bdver3,bdver4")
738			      (and (eq_attr "type" "ssediv")
739				   (and (eq_attr "mode" "SF,V4SF")
740					(eq_attr "memory" "none"))))
741			 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)")
742
743(define_insn_reservation "bdver3_sseins" 3
744                         (and (eq_attr "cpu" "bdver3,bdver4")
745                              (and (eq_attr "type" "sseins")
746                                   (eq_attr "mode" "TI")))
747                         "bdver3-direct,bdver3-fpsched,bdver3-fxbar")
748
749