1;; Copyright (C) 2010, Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; AMD bdver1 Scheduling
20;;
21;; The bdver1 contains four pipelined FP units, two integer units and
22;; two address generation units.
23;;
24;; The predecode logic is determining boundaries of instructions in the 64
25;; byte cache line.  So the cache line straddling problem of K6 might be issue
26;; here as well, but it is not noted in the documentation.
27;;
28;; Three DirectPath instructions decoders and only one VectorPath decoder
29;; is available.  They can decode three DirectPath instructions or one
30;; VectorPath instruction per cycle.
31;;
32;; The load/store queue unit is not attached to the schedulers but
33;; communicates with all the execution units separately instead.
34
35
36(define_attr "bdver1_decode" "direct,vector,double"
37  (const_string "direct"))
38
39(define_automaton "bdver1,bdver1_int,bdver1_load,bdver1_mult,bdver1_fp")
40
41(define_cpu_unit "bdver1-decode0" "bdver1")
42(define_cpu_unit "bdver1-decode1" "bdver1")
43(define_cpu_unit "bdver1-decode2" "bdver1")
44(define_cpu_unit "bdver1-decodev" "bdver1")
45
46;; Model the fact that double decoded instruction may take 2 cycles
47;; to decode when decoder2 and decoder0 in next cycle
48;; is used (this is needed to allow throughput of 1.5 double decoded
49;; instructions per cycle).
50;;
51;; In order to avoid dependence between reservation of decoder
52;; and other units, we model decoder as two stage fully pipelined unit
53;; and only double decoded instruction may occupy unit in the first cycle.
54;; With this scheme however two double instructions can be issued cycle0.
55;;
56;; Avoid this by using presence set requiring decoder0 to be allocated
57;; too.  Vector decoded instructions then can't be issued when modeled
58;; as consuming decoder0+decoder1+decoder2.
59;; We solve that by specialized vector decoder unit and exclusion set.
60(presence_set "bdver1-decode2" "bdver1-decode0")
61(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
62
63(define_reservation "bdver1-vector" "nothing,bdver1-decodev")
64(define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
65(define_reservation "bdver1-direct" "nothing,
66				     (bdver1-decode0 | bdver1-decode1
67				     | bdver1-decode2)")
68;; Double instructions behaves like two direct instructions.
69(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
70				     | (nothing,(bdver1-decode0 + bdver1-decode1))
71				     | (nothing,(bdver1-decode1 + bdver1-decode2)))")
72
73
74(define_cpu_unit "bdver1-ieu0" "bdver1_int")
75(define_cpu_unit "bdver1-ieu1" "bdver1_int")
76(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
77
78(define_cpu_unit "bdver1-agu0" "bdver1_int")
79(define_cpu_unit "bdver1-agu1" "bdver1_int")
80(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
81
82(define_cpu_unit "bdver1-mult" "bdver1_mult")
83
84(define_cpu_unit "bdver1-load0" "bdver1_load")
85(define_cpu_unit "bdver1-load1" "bdver1_load")
86(define_reservation "bdver1-load" "bdver1-agu,
87				   (bdver1-load0 | bdver1-load1),nothing")
88;; 128bit SSE instructions issue two loads at once.
89(define_reservation "bdver1-load2" "bdver1-agu,
90				   (bdver1-load0 + bdver1-load1),nothing")
91
92(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
93;; 128bit SSE instructions issue two stores at once.
94(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
95
96;; The FP operations start to execute at stage 12 in the pipeline, while
97;; integer operations start to execute at stage 9 for athlon and 11 for K8
98;; Compensate the difference for athlon because it results in significantly
99;; smaller automata.
100;; NOTE: the above information was just copied from athlon.md, and was not
101;; actually verified for bdver1.
102(define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
103;; The floating point loads.
104(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
105(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
106
107;; Four FP units.
108(define_cpu_unit "bdver1-ffma0" "bdver1_fp")
109(define_cpu_unit "bdver1-ffma1" "bdver1_fp")
110(define_cpu_unit "bdver1-fmal0" "bdver1_fp")
111(define_cpu_unit "bdver1-fmal1" "bdver1_fp")
112
113(define_reservation "bdver1-ffma"     "(bdver1-ffma0 | bdver1-ffma1)")
114(define_reservation "bdver1-fcvt"     "bdver1-ffma0")
115(define_reservation "bdver1-fmma"     "bdver1-ffma0")
116(define_reservation "bdver1-fxbar"    "bdver1-ffma1")
117(define_reservation "bdver1-fmal"     "(bdver1-fmal0 | bdver1-fmal1)")
118(define_reservation "bdver1-fsto"     "bdver1-fmal1")
119
120;; Vector operations usually consume many of pipes.
121(define_reservation "bdver1-fvector"  "(bdver1-ffma0 + bdver1-ffma1
122					+ bdver1-fmal0 + bdver1-fmal1)")
123
124;; Jump instructions are executed in the branch unit completely transparent to us.
125(define_insn_reservation "bdver1_call" 0
126			 (and (eq_attr "cpu" "bdver1,bdver2")
127			      (eq_attr "type" "call,callv"))
128			 "bdver1-double,bdver1-agu,bdver1-ieu")
129;; PUSH mem is double path.
130(define_insn_reservation "bdver1_push" 1
131			 (and (eq_attr "cpu" "bdver1,bdver2")
132			      (eq_attr "type" "push"))
133			 "bdver1-direct,bdver1-agu,bdver1-store")
134;; POP r16/mem are double path.
135(define_insn_reservation "bdver1_pop" 1
136			 (and (eq_attr "cpu" "bdver1,bdver2")
137			      (eq_attr "type" "pop"))
138			 "bdver1-direct,(bdver1-ieu+bdver1-load)")
139;; LEAVE no latency info so far, assume same with amdfam10.
140(define_insn_reservation "bdver1_leave" 3
141			 (and (eq_attr "cpu" "bdver1,bdver2")
142			      (eq_attr "type" "leave"))
143			 "bdver1-vector,(bdver1-ieu+bdver1-load)")
144;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
145(define_insn_reservation "bdver1_lea" 1
146			 (and (eq_attr "cpu" "bdver1,bdver2")
147			      (eq_attr "type" "lea"))
148			 "bdver1-direct,bdver1-agu,nothing")
149
150;; MUL executes in special multiplier unit attached to IEU1.
151(define_insn_reservation "bdver1_imul_DI" 6
152			 (and (eq_attr "cpu" "bdver1,bdver2")
153			      (and (eq_attr "type" "imul")
154				   (and (eq_attr "mode" "DI")
155					(eq_attr "memory" "none,unknown"))))
156			 "bdver1-direct1,bdver1-ieu1,bdver1-mult,nothing,bdver1-ieu1")
157(define_insn_reservation "bdver1_imul" 4
158			 (and (eq_attr "cpu" "bdver1,bdver2")
159			      (and (eq_attr "type" "imul")
160				   (eq_attr "memory" "none,unknown")))
161			 "bdver1-direct1,bdver1-ieu1,bdver1-mult,bdver1-ieu1")
162(define_insn_reservation "bdver1_imul_mem_DI" 10
163			 (and (eq_attr "cpu" "bdver1,bdver2")
164			      (and (eq_attr "type" "imul")
165				   (and (eq_attr "mode" "DI")
166					(eq_attr "memory" "load,both"))))
167			 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,nothing,bdver1-ieu")
168(define_insn_reservation "bdver1_imul_mem" 8
169			 (and (eq_attr "cpu" "bdver1,bdver2")
170			      (and (eq_attr "type" "imul")
171				   (eq_attr "memory" "load,both")))
172			 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,bdver1-ieu")
173
174;; IDIV cannot execute in parallel with other instructions.  Dealing with it
175;; as with short latency vector instruction is good approximation avoiding
176;; scheduler from trying too hard to can hide it's latency by overlap with
177;; other instructions.
178;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
179;; of the other code.
180(define_insn_reservation "bdver1_idiv" 6
181			 (and (eq_attr "cpu" "bdver1,bdver2")
182			      (and (eq_attr "type" "idiv")
183				   (eq_attr "memory" "none,unknown")))
184			 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
185
186(define_insn_reservation "bdver1_idiv_mem" 10
187			 (and (eq_attr "cpu" "bdver1,bdver2")
188			      (and (eq_attr "type" "idiv")
189				   (eq_attr "memory" "load,both")))
190			 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
191
192;; The parallelism of string instructions is not documented.  Model it same way
193;; as IDIV to create smaller automata.  This probably does not matter much.
194;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
195(define_insn_reservation "bdver1_str" 6
196			 (and (eq_attr "cpu" "bdver1,bdver2")
197			      (and (eq_attr "type" "str")
198				   (eq_attr "memory" "load,both,store")))
199			 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
200
201;; Integer instructions.
202(define_insn_reservation "bdver1_idirect" 1
203			 (and (eq_attr "cpu" "bdver1,bdver2")
204			      (and (eq_attr "bdver1_decode" "direct")
205				   (and (eq_attr "unit" "integer,unknown")
206					(eq_attr "memory" "none,unknown"))))
207			 "bdver1-direct,bdver1-ieu")
208(define_insn_reservation "bdver1_ivector" 2
209			 (and (eq_attr "cpu" "bdver1,bdver2")
210			      (and (eq_attr "bdver1_decode" "vector")
211				   (and (eq_attr "unit" "integer,unknown")
212					(eq_attr "memory" "none,unknown"))))
213			 "bdver1-vector,bdver1-ieu,bdver1-ieu")
214(define_insn_reservation "bdver1_idirect_loadmov" 4
215			 (and (eq_attr "cpu" "bdver1,bdver2")
216			      (and (eq_attr "type" "imov")
217				   (eq_attr "memory" "load")))
218			 "bdver1-direct,bdver1-load")
219(define_insn_reservation "bdver1_idirect_load" 5
220			 (and (eq_attr "cpu" "bdver1,bdver2")
221			      (and (eq_attr "bdver1_decode" "direct")
222				   (and (eq_attr "unit" "integer,unknown")
223					(eq_attr "memory" "load"))))
224			 "bdver1-direct,bdver1-load,bdver1-ieu")
225(define_insn_reservation "bdver1_ivector_load" 6
226			 (and (eq_attr "cpu" "bdver1,bdver2")
227			      (and (eq_attr "bdver1_decode" "vector")
228				   (and (eq_attr "unit" "integer,unknown")
229					(eq_attr "memory" "load"))))
230			 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
231(define_insn_reservation "bdver1_idirect_movstore" 4
232			 (and (eq_attr "cpu" "bdver1,bdver2")
233			      (and (eq_attr "type" "imov")
234				   (eq_attr "memory" "store")))
235			 "bdver1-direct,bdver1-agu,bdver1-store")
236(define_insn_reservation "bdver1_idirect_both" 4
237			 (and (eq_attr "cpu" "bdver1,bdver2")
238			      (and (eq_attr "bdver1_decode" "direct")
239				   (and (eq_attr "unit" "integer,unknown")
240					(eq_attr "memory" "both"))))
241			 "bdver1-direct,bdver1-load,
242			  bdver1-ieu,bdver1-store,
243			  bdver1-store")
244(define_insn_reservation "bdver1_ivector_both" 5
245			 (and (eq_attr "cpu" "bdver1,bdver2")
246			      (and (eq_attr "bdver1_decode" "vector")
247				   (and (eq_attr "unit" "integer,unknown")
248					(eq_attr "memory" "both"))))
249			 "bdver1-vector,bdver1-load,
250			  bdver1-ieu,
251			  bdver1-ieu,
252			  bdver1-store")
253(define_insn_reservation "bdver1_idirect_store" 4
254			 (and (eq_attr "cpu" "bdver1,bdver2")
255			      (and (eq_attr "bdver1_decode" "direct")
256				   (and (eq_attr "unit" "integer,unknown")
257					(eq_attr "memory" "store"))))
258			 "bdver1-direct,(bdver1-ieu+bdver1-agu),
259			  bdver1-store")
260(define_insn_reservation "bdver1_ivector_store" 5
261			 (and (eq_attr "cpu" "bdver1,bdver2")
262			      (and (eq_attr "bdver1_decode" "vector")
263				   (and (eq_attr "unit" "integer,unknown")
264					(eq_attr "memory" "store"))))
265			 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
266			  bdver1-store")
267
268;; BDVER1 floating point units.
269(define_insn_reservation "bdver1_fldxf" 13
270			 (and (eq_attr "cpu" "bdver1,bdver2")
271			      (and (eq_attr "type" "fmov")
272				   (and (eq_attr "memory" "load")
273					(eq_attr "mode" "XF"))))
274			 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
275(define_insn_reservation "bdver1_fld" 5
276			 (and (eq_attr "cpu" "bdver1,bdver2")
277			      (and (eq_attr "type" "fmov")
278				   (eq_attr "memory" "load")))
279			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
280(define_insn_reservation "bdver1_fstxf" 8
281			 (and (eq_attr "cpu" "bdver1,bdver2")
282			      (and (eq_attr "type" "fmov")
283				   (and (eq_attr "memory" "store,both")
284					(eq_attr "mode" "XF"))))
285			 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
286(define_insn_reservation "bdver1_fst" 2
287			 (and (eq_attr "cpu" "bdver1,bdver2")
288			      (and (eq_attr "type" "fmov")
289				   (eq_attr "memory" "store,both")))
290			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
291(define_insn_reservation "bdver1_fist" 2
292			 (and (eq_attr "cpu" "bdver1,bdver2")
293			      (eq_attr "type" "fistp,fisttp"))
294			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
295(define_insn_reservation "bdver1_fmov_bdver1" 2
296			 (and (eq_attr "cpu" "bdver1,bdver2")
297			      (eq_attr "type" "fmov"))
298			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
299(define_insn_reservation "bdver1_fadd_load" 10
300			 (and (eq_attr "cpu" "bdver1,bdver2")
301			      (and (eq_attr "type" "fop")
302				   (eq_attr "memory" "load")))
303			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
304(define_insn_reservation "bdver1_fadd" 6
305			 (and (eq_attr "cpu" "bdver1,bdver2")
306			      (eq_attr "type" "fop"))
307			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
308(define_insn_reservation "bdver1_fmul_load" 10
309			 (and (eq_attr "cpu" "bdver1,bdver2")
310			      (and (eq_attr "type" "fmul")
311				   (eq_attr "memory" "load")))
312			 "bdver1-double,bdver1-fpload,bdver1-ffma")
313(define_insn_reservation "bdver1_fmul" 6
314			 (and (eq_attr "cpu" "bdver1,bdver2")
315			      (eq_attr "type" "fmul"))
316			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
317(define_insn_reservation "bdver1_fsgn" 2
318			 (and (eq_attr "cpu" "bdver1,bdver2")
319			      (eq_attr "type" "fsgn"))
320			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
321(define_insn_reservation "bdver1_fdiv_load" 46
322			 (and (eq_attr "cpu" "bdver1,bdver2")
323			      (and (eq_attr "type" "fdiv")
324				   (eq_attr "memory" "load")))
325			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
326(define_insn_reservation "bdver1_fdiv" 42
327			 (and (eq_attr "cpu" "bdver1,bdver2")
328			      (eq_attr "type" "fdiv"))
329			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
330(define_insn_reservation "bdver1_fpspc_load" 103
331			 (and (eq_attr "cpu" "bdver1,bdver2")
332			      (and (eq_attr "type" "fpspc")
333				   (eq_attr "memory" "load")))
334			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
335(define_insn_reservation "bdver1_fpspc" 100
336			 (and (eq_attr "cpu" "bdver1,bdver2")
337			      (and (eq_attr "type" "fpspc")
338				   (eq_attr "memory" "load")))
339			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
340(define_insn_reservation "bdver1_fcmov_load" 17
341			 (and (eq_attr "cpu" "bdver1,bdver2")
342			      (and (eq_attr "type" "fcmov")
343				   (eq_attr "memory" "load")))
344			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
345(define_insn_reservation "bdver1_fcmov" 15
346			 (and (eq_attr "cpu" "bdver1,bdver2")
347			      (eq_attr "type" "fcmov"))
348			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
349(define_insn_reservation "bdver1_fcomi_load" 6
350			 (and (eq_attr "cpu" "bdver1,bdver2")
351			      (and (eq_attr "type" "fcmp")
352				   (and (eq_attr "bdver1_decode" "double")
353					(eq_attr "memory" "load"))))
354			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
355(define_insn_reservation "bdver1_fcomi" 2
356			 (and (eq_attr "cpu" "bdver1,bdver2")
357			      (and (eq_attr "bdver1_decode" "double")
358				   (eq_attr "type" "fcmp")))
359			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
360(define_insn_reservation "bdver1_fcom_load" 6
361			 (and (eq_attr "cpu" "bdver1,bdver2")
362			      (and (eq_attr "type" "fcmp")
363				   (eq_attr "memory" "load")))
364			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
365(define_insn_reservation "bdver1_fcom" 2
366			 (and (eq_attr "cpu" "bdver1,bdver2")
367			      (eq_attr "type" "fcmp"))
368			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
369(define_insn_reservation "bdver1_fxch" 2
370			 (and (eq_attr "cpu" "bdver1,bdver2")
371			      (eq_attr "type" "fxch"))
372			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
373
374;; SSE loads.
375(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
376			 (and (eq_attr "cpu" "bdver1,bdver2")
377			      (and (eq_attr "type" "ssemov")
378				   (and (eq_attr "prefix" "vex")
379					(and (eq_attr "movu" "1")
380					     (and (eq_attr "mode" "V4SF,V2DF")
381						  (eq_attr "memory" "load"))))))
382			 "bdver1-direct,bdver1-fpload")
383(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
384			 (and (eq_attr "cpu" "bdver1,bdver2")
385			      (and (eq_attr "type" "ssemov")
386				   (and (eq_attr "movu" "1")
387				        (and (eq_attr "mode" "V8SF,V4DF")
388				             (eq_attr "memory" "load")))))
389			 "bdver1-double,bdver1-fpload")
390(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
391			 (and (eq_attr "cpu" "bdver1,bdver2")
392			      (and (eq_attr "type" "ssemov")
393				   (and (eq_attr "movu" "1")
394				        (and (eq_attr "mode" "V4SF,V2DF")
395				             (eq_attr "memory" "load")))))
396			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
397(define_insn_reservation "bdver1_ssevector_avx128_load" 4
398			 (and (eq_attr "cpu" "bdver1,bdver2")
399			      (and (eq_attr "type" "ssemov")
400				   (and (eq_attr "prefix" "vex")
401				        (and (eq_attr "mode" "V4SF,V2DF,TI")
402				             (eq_attr "memory" "load")))))
403			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
404(define_insn_reservation "bdver1_ssevector_avx256_load" 5
405			 (and (eq_attr "cpu" "bdver1,bdver2")
406			      (and (eq_attr "type" "ssemov")
407				   (and (eq_attr "mode" "V8SF,V4DF,OI")
408				        (eq_attr "memory" "load"))))
409			 "bdver1-double,bdver1-fpload,bdver1-fmal")
410(define_insn_reservation "bdver1_ssevector_sse128_load" 4
411			 (and (eq_attr "cpu" "bdver1,bdver2")
412			      (and (eq_attr "type" "ssemov")
413				   (and (eq_attr "mode" "V4SF,V2DF,TI")
414				        (eq_attr "memory" "load"))))
415			 "bdver1-direct,bdver1-fpload")
416(define_insn_reservation "bdver1_ssescalar_movq_load" 4
417			 (and (eq_attr "cpu" "bdver1,bdver2")
418			      (and (eq_attr "type" "ssemov")
419				   (and (eq_attr "mode" "DI")
420				        (eq_attr "memory" "load"))))
421			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
422(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
423			 (and (eq_attr "cpu" "bdver1,bdver2")
424			      (and (eq_attr "type" "ssemov")
425				   (and (eq_attr "prefix" "vex")
426				        (and (eq_attr "mode" "SF")
427				             (eq_attr "memory" "load")))))
428			 "bdver1-direct,bdver1-fpload")
429(define_insn_reservation "bdver1_ssescalar_sse128_load" 4
430			 (and (eq_attr "cpu" "bdver1,bdver2")
431			      (and (eq_attr "type" "ssemov")
432				   (and (eq_attr "mode" "SF,DF")
433				        (eq_attr "memory" "load"))))
434			 "bdver1-direct,bdver1-fpload, bdver1-ffma")
435(define_insn_reservation "bdver1_mmxsse_load" 4
436			 (and (eq_attr "cpu" "bdver1,bdver2")
437			      (and (eq_attr "type" "mmxmov,ssemov")
438				   (eq_attr "memory" "load")))
439			 "bdver1-direct,bdver1-fpload, bdver1-fmal")
440
441;; SSE stores.
442(define_insn_reservation "bdver1_sse_store_avx256" 5
443			 (and (eq_attr "cpu" "bdver1,bdver2")
444			      (and (eq_attr "type" "ssemov")
445				   (and (eq_attr "mode" "V8SF,V4DF,OI")
446					(eq_attr "memory" "store,both"))))
447			 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
448(define_insn_reservation "bdver1_sse_store" 4
449			 (and (eq_attr "cpu" "bdver1,bdver2")
450			      (and (eq_attr "type" "ssemov")
451				   (and (eq_attr "mode" "V4SF,V2DF,TI")
452					(eq_attr "memory" "store,both"))))
453			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
454(define_insn_reservation "bdver1_mmxsse_store_short" 4
455			 (and (eq_attr "cpu" "bdver1,bdver2")
456			      (and (eq_attr "type" "mmxmov,ssemov")
457				   (eq_attr "memory" "store,both")))
458			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
459
460;; Register moves.
461(define_insn_reservation "bdver1_ssevector_avx256" 3
462			 (and (eq_attr "cpu" "bdver1,bdver2")
463			      (and (eq_attr "type" "ssemov")
464				   (and (eq_attr "mode" "V8SF,V4DF,OI")
465					(eq_attr "memory" "none"))))
466			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
467(define_insn_reservation "bdver1_movss_movsd" 2
468			 (and (eq_attr "cpu" "bdver1,bdver2")
469			      (and (eq_attr "type" "ssemov")
470				   (and (eq_attr "mode" "SF,DF")
471                                        (eq_attr "memory" "none"))))
472			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
473(define_insn_reservation "bdver1_mmxssemov" 2
474			 (and (eq_attr "cpu" "bdver1,bdver2")
475			      (and (eq_attr "type" "mmxmov,ssemov")
476				   (eq_attr "memory" "none")))
477			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
478;; SSE logs.
479(define_insn_reservation "bdver1_sselog_load_256" 7
480			 (and (eq_attr "cpu" "bdver1,bdver2")
481			      (and (eq_attr "type" "sselog,sselog1")
482				   (and (eq_attr "mode" "V8SF")
483				   (eq_attr "memory" "load"))))
484			 "bdver1-double,bdver1-fpload,bdver1-fmal")
485(define_insn_reservation "bdver1_sselog_256" 3
486			 (and (eq_attr "cpu" "bdver1,bdver2")
487			      (and (eq_attr "type" "sselog,sselog1")
488                                   (eq_attr "mode" "V8SF")))
489			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
490(define_insn_reservation "bdver1_sselog_load" 6
491			 (and (eq_attr "cpu" "bdver1,bdver2")
492			      (and (eq_attr "type" "sselog,sselog1")
493				   (eq_attr "memory" "load")))
494			 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
495(define_insn_reservation "bdver1_sselog" 2
496			 (and (eq_attr "cpu" "bdver1,bdver2")
497			      (eq_attr "type" "sselog,sselog1"))
498			 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
499
500;; PCMP actually executes in FMAL.
501(define_insn_reservation "bdver1_ssecmp_load" 6
502			 (and (eq_attr "cpu" "bdver1,bdver2")
503			      (and (eq_attr "type" "ssecmp")
504				   (eq_attr "memory" "load")))
505			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
506(define_insn_reservation "bdver1_ssecmp" 2
507			 (and (eq_attr "cpu" "bdver1,bdver2")
508			      (eq_attr "type" "ssecmp"))
509			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
510(define_insn_reservation "bdver1_ssecomi_load" 6
511			 (and (eq_attr "cpu" "bdver1,bdver2")
512			      (and (eq_attr "type" "ssecomi")
513				   (eq_attr "memory" "load")))
514			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
515(define_insn_reservation "bdver1_ssecomi" 2
516			 (and (eq_attr "cpu" "bdver1,bdver2")
517			      (eq_attr "type" "ssecomi"))
518			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
519
520;; Conversions behaves very irregularly and the scheduling is critical here.
521;; Take each instruction separately.
522
523;; 256 bit conversion.
524(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
525			 (and (eq_attr "cpu" "bdver1,bdver2")
526			      (and (eq_attr "type" "ssecvt")
527				   (and (eq_attr "memory" "load")
528					(ior (ior (match_operand:V4DF 0 "register_operand")
529					          (ior (match_operand:V8SF 0 "register_operand")
530						       (match_operand:V8SI 0 "register_operand")))
531					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
532						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
533						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
534			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
535(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
536			 (and (eq_attr "cpu" "bdver1,bdver2")
537			      (and (eq_attr "type" "ssecvt")
538				   (and (eq_attr "memory" "none")
539					(ior (ior (match_operand:V4DF 0 "register_operand")
540					          (ior (match_operand:V8SF 0 "register_operand")
541						       (match_operand:V8SI 0 "register_operand")))
542					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
543						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
544						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
545			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
546;; CVTSS2SD, CVTSD2SS.
547(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
548			 (and (eq_attr "cpu" "bdver1,bdver2")
549			      (and (eq_attr "type" "ssecvt")
550				   (and (eq_attr "mode" "SF,DF")
551					(eq_attr "memory" "load"))))
552			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
553(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
554			 (and (eq_attr "cpu" "bdver1,bdver2")
555			      (and (eq_attr "type" "ssecvt")
556				   (and (eq_attr "mode" "SF,DF")
557					(eq_attr "memory" "none"))))
558			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
559;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
560(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
561			 (and (eq_attr "cpu" "bdver1,bdver2")
562			      (and (eq_attr "type" "sseicvt")
563				   (and (eq_attr "mode" "SF,DF")
564					(eq_attr "memory" "load"))))
565			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
566(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
567			 (and (eq_attr "cpu" "bdver1,bdver2")
568			      (and (eq_attr "type" "sseicvt")
569				   (and (eq_attr "mode" "SF,DF")
570					(eq_attr "memory" "none"))))
571			 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
572;; CVTPD2PS.
573(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
574			 (and (eq_attr "cpu" "bdver1,bdver2")
575			      (and (eq_attr "type" "ssecvt")
576				   (and (eq_attr "memory" "load")
577                                        (and (match_operand:V4SF 0 "register_operand")
578					     (match_operand:V2DF 1 "nonimmediate_operand")))))
579			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
580(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
581			 (and (eq_attr "cpu" "bdver1,bdver2")
582			      (and (eq_attr "type" "ssecvt")
583				   (and (eq_attr "memory" "none")
584                                        (and (match_operand:V4SF 0 "register_operand")
585					     (match_operand:V2DF 1 "nonimmediate_operand")))))
586			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
587;; CVTPI2PS, CVTDQ2PS.
588(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
589			 (and (eq_attr "cpu" "bdver1,bdver2")
590			      (and (eq_attr "type" "ssecvt")
591				   (and (eq_attr "memory" "load")
592                                        (and (match_operand:V4SF 0 "register_operand")
593					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
594					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
595			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
596(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
597			 (and (eq_attr "cpu" "bdver1,bdver2")
598			      (and (eq_attr "type" "ssecvt")
599				   (and (eq_attr "memory" "none")
600                                        (and (match_operand:V4SF 0 "register_operand")
601					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
602					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
603			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
604;; CVTDQ2PD.
605(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
606			 (and (eq_attr "cpu" "bdver1,bdver2")
607			      (and (eq_attr "type" "ssecvt")
608				   (and (eq_attr "memory" "load")
609                                        (and (match_operand:V2DF 0 "register_operand")
610					     (match_operand:V4SI 1 "nonimmediate_operand")))))
611			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
612(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
613			 (and (eq_attr "cpu" "bdver1,bdver2")
614			      (and (eq_attr "type" "ssecvt")
615				   (and (eq_attr "memory" "none")
616                                        (and (match_operand:V2DF 0 "register_operand")
617					     (match_operand:V4SI 1 "nonimmediate_operand")))))
618			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
619;; CVTPS2PD, CVTPI2PD.
620(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
621			 (and (eq_attr "cpu" "bdver1,bdver2")
622			      (and (eq_attr "type" "ssecvt")
623				   (and (eq_attr "memory" "load")
624                                        (and (match_operand:V2DF 0 "register_operand")
625					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
626					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
627			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
628(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
629			 (and (eq_attr "cpu" "bdver1,bdver2")
630			      (and (eq_attr "type" "ssecvt")
631				   (and (eq_attr "memory" "load")
632                                        (and (match_operand:V2DF 0 "register_operand")
633					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
634					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
635			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
636;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
637(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
638			 (and (eq_attr "cpu" "bdver1,bdver2")
639			      (and (eq_attr "type" "sseicvt")
640				   (and (eq_attr "mode" "SI,DI")
641					(eq_attr "memory" "load"))))
642			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
643(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
644			 (and (eq_attr "cpu" "bdver1,bdver2")
645			      (and (eq_attr "type" "sseicvt")
646				   (and (eq_attr "mode" "SI,DI")
647					(eq_attr "memory" "none"))))
648			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
649;; CVTPD2PI, CVTTPD2PI.
650(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
651			 (and (eq_attr "cpu" "bdver1,bdver2")
652			      (and (eq_attr "type" "ssecvt")
653				   (and (eq_attr "memory" "load")
654				        (and (match_operand:V2DF 1 "nonimmediate_operand")
655					     (match_operand:V2SI 0 "register_operand")))))
656			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
657(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
658			 (and (eq_attr "cpu" "bdver1,bdver2")
659			      (and (eq_attr "type" "ssecvt")
660				   (and (eq_attr "memory" "none")
661				        (and (match_operand:V2DF 1 "nonimmediate_operand")
662					     (match_operand:V2SI 0 "register_operand")))))
663			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
664;; CVTPD2DQ, CVTTPD2DQ.
665(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
666			 (and (eq_attr "cpu" "bdver1,bdver2")
667			      (and (eq_attr "type" "ssecvt")
668				   (and (eq_attr "memory" "load")
669				        (and (match_operand:V2DF 1 "nonimmediate_operand")
670					     (match_operand:V4SI 0 "register_operand")))))
671			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
672(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
673			 (and (eq_attr "cpu" "bdver1,bdver2")
674			      (and (eq_attr "type" "ssecvt")
675				   (and (eq_attr "memory" "none")
676				        (and (match_operand:V2DF 1 "nonimmediate_operand")
677					     (match_operand:V4SI 0 "register_operand")))))
678			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
679;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
680(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
681			 (and (eq_attr "cpu" "bdver1,bdver2")
682			      (and (eq_attr "type" "ssecvt")
683                                   (and (eq_attr "memory" "load")
684				        (and (match_operand:V4SF 1 "nonimmediate_operand")
685				             (ior (match_operand: V2SI 0 "register_operand")
686						  (match_operand: V4SI 0 "register_operand"))))))
687			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
688(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
689			 (and (eq_attr "cpu" "bdver1,bdver2")
690			      (and (eq_attr "type" "ssecvt")
691				   (and (eq_attr "memory" "none")
692				        (and (match_operand:V4SF 1 "nonimmediate_operand")
693				             (ior (match_operand: V2SI 0 "register_operand")
694						  (match_operand: V4SI 0 "register_operand"))))))
695			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
696
697;; SSE MUL, ADD, and MULADD.
698(define_insn_reservation "bdver1_ssemuladd_load_256" 11
699			 (and (eq_attr "cpu" "bdver1,bdver2")
700			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
701				   (and (eq_attr "mode" "V8SF,V4DF")
702					(eq_attr "memory" "load"))))
703			 "bdver1-double,bdver1-fpload,bdver1-ffma")
704(define_insn_reservation "bdver1_ssemuladd_256" 7
705			 (and (eq_attr "cpu" "bdver1,bdver2")
706			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
707				   (and (eq_attr "mode" "V8SF,V4DF")
708					(eq_attr "memory" "none"))))
709			 "bdver1-double,bdver1-fpsched,bdver1-ffma")
710(define_insn_reservation "bdver1_ssemuladd_load" 10
711			 (and (eq_attr "cpu" "bdver1,bdver2")
712			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
713				   (eq_attr "memory" "load")))
714			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
715(define_insn_reservation "bdver1_ssemuladd" 6
716			 (and (eq_attr "cpu" "bdver1,bdver2")
717			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
718				   (eq_attr "memory" "none")))
719			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
720(define_insn_reservation "bdver1_sseimul_load" 8
721			 (and (eq_attr "cpu" "bdver1,bdver2")
722			      (and (eq_attr "type" "sseimul")
723				   (eq_attr "memory" "load")))
724			 "bdver1-direct,bdver1-fpload,bdver1-fmma")
725(define_insn_reservation "bdver1_sseimul" 4
726			 (and (eq_attr "cpu" "bdver1,bdver2")
727			      (and (eq_attr "type" "sseimul")
728				   (eq_attr "memory" "none")))
729			 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
730(define_insn_reservation "bdver1_sseiadd_load" 6
731			 (and (eq_attr "cpu" "bdver1,bdver2")
732			      (and (eq_attr "type" "sseiadd")
733				   (eq_attr "memory" "load")))
734			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
735(define_insn_reservation "bdver1_sseiadd" 2
736			 (and (eq_attr "cpu" "bdver1,bdver2")
737			      (and (eq_attr "type" "sseiadd")
738				   (eq_attr "memory" "none")))
739			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
740
741;; SSE DIV: no throughput information (assume same as amdfam10).
742(define_insn_reservation "bdver1_ssediv_double_load_256" 31
743			 (and (eq_attr "cpu" "bdver1,bdver2")
744			      (and (eq_attr "type" "ssediv")
745				   (and (eq_attr "mode" "V4DF")
746				        (eq_attr "memory" "load"))))
747			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
748(define_insn_reservation "bdver1_ssediv_double_256" 27
749			 (and (eq_attr "cpu" "bdver1,bdver2")
750			      (and (eq_attr "type" "ssediv")
751				   (and (eq_attr "mode" "V4DF")
752				        (eq_attr "memory" "none"))))
753			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
754(define_insn_reservation "bdver1_ssediv_single_load_256" 28
755			 (and (eq_attr "cpu" "bdver1,bdver2")
756			      (and (eq_attr "type" "ssediv")
757				   (and (eq_attr "mode" "V8SF")
758				        (eq_attr "memory" "load"))))
759			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
760(define_insn_reservation "bdver1_ssediv_single_256" 24
761			 (and (eq_attr "cpu" "bdver1,bdver2")
762			      (and (eq_attr "type" "ssediv")
763				   (and (eq_attr "mode" "V8SF")
764				        (eq_attr "memory" "none"))))
765			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
766(define_insn_reservation "bdver1_ssediv_double_load" 31
767			 (and (eq_attr "cpu" "bdver1,bdver2")
768			      (and (eq_attr "type" "ssediv")
769				   (and (eq_attr "mode" "DF,V2DF")
770					(eq_attr "memory" "load"))))
771			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
772(define_insn_reservation "bdver1_ssediv_double" 27
773			 (and (eq_attr "cpu" "bdver1,bdver2")
774			      (and (eq_attr "type" "ssediv")
775				   (and (eq_attr "mode" "DF,V2DF")
776					(eq_attr "memory" "none"))))
777			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
778(define_insn_reservation "bdver1_ssediv_single_load" 28
779			 (and (eq_attr "cpu" "bdver1,bdver2")
780			      (and (eq_attr "type" "ssediv")
781				   (and (eq_attr "mode" "SF,V4SF")
782					(eq_attr "memory" "load"))))
783			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
784(define_insn_reservation "bdver1_ssediv_single" 24
785			 (and (eq_attr "cpu" "bdver1,bdver2")
786			      (and (eq_attr "type" "ssediv")
787				   (and (eq_attr "mode" "SF,V4SF")
788					(eq_attr "memory" "none"))))
789			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
790
791(define_insn_reservation "bdver1_sseins" 3
792                         (and (eq_attr "cpu" "bdver1,bdver2")
793                              (and (eq_attr "type" "sseins")
794                                   (eq_attr "mode" "TI")))
795                         "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
796
797