1;; Copyright (C) 2010-2018 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; AMD bdver1 Scheduling
20;;
21;; The bdver1 contains four pipelined FP units, two integer units and
22;; two address generation units.
23;;
24;; The predecode logic is determining boundaries of instructions in the 64
25;; byte cache line.  So the cache line straddling problem of K6 might be issue
26;; here as well, but it is not noted in the documentation.
27;;
28;; Three DirectPath instructions decoders and only one VectorPath decoder
29;; is available.  They can decode three DirectPath instructions or one
30;; VectorPath instruction per cycle.
31;;
32;; The load/store queue unit is not attached to the schedulers but
33;; communicates with all the execution units separately instead.
34
35
36(define_attr "bdver1_decode" "direct,vector,double"
37  (const_string "direct"))
38
39(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu")
40
41(define_cpu_unit "bdver1-decode0" "bdver1")
42(define_cpu_unit "bdver1-decode1" "bdver1")
43(define_cpu_unit "bdver1-decode2" "bdver1")
44(define_cpu_unit "bdver1-decodev" "bdver1")
45
46;; Model the fact that double decoded instruction may take 2 cycles
47;; to decode when decoder2 and decoder0 in next cycle
48;; is used (this is needed to allow throughput of 1.5 double decoded
49;; instructions per cycle).
50;;
51;; In order to avoid dependence between reservation of decoder
52;; and other units, we model decoder as two stage fully pipelined unit
53;; and only double decoded instruction may occupy unit in the first cycle.
54;; With this scheme however two double instructions can be issued cycle0.
55;;
56;; Avoid this by using presence set requiring decoder0 to be allocated
57;; too.  Vector decoded instructions then can't be issued when modeled
58;; as consuming decoder0+decoder1+decoder2.
59;; We solve that by specialized vector decoder unit and exclusion set.
60(presence_set "bdver1-decode2" "bdver1-decode0")
61(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
62
63(define_reservation "bdver1-vector" "nothing,bdver1-decodev")
64(define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
65(define_reservation "bdver1-direct" "nothing,
66				     (bdver1-decode0 | bdver1-decode1
67				     | bdver1-decode2)")
68;; Double instructions behaves like two direct instructions.
69(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
70				     | (nothing,(bdver1-decode0 + bdver1-decode1))
71				     | (nothing,(bdver1-decode1 + bdver1-decode2)))")
72
73
74(define_cpu_unit "bdver1-ieu0" "bdver1_ieu")
75(define_cpu_unit "bdver1-ieu1" "bdver1_ieu")
76(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
77
78(define_cpu_unit "bdver1-agu0" "bdver1_agu")
79(define_cpu_unit "bdver1-agu1" "bdver1_agu")
80(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
81
82(define_cpu_unit "bdver1-load0" "bdver1_load")
83(define_cpu_unit "bdver1-load1" "bdver1_load")
84(define_reservation "bdver1-load" "bdver1-agu,
85				   (bdver1-load0 | bdver1-load1),nothing")
86;; 128bit SSE instructions issue two loads at once.
87(define_reservation "bdver1-load2" "bdver1-agu,
88				   (bdver1-load0 + bdver1-load1),nothing")
89
90(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
91;; 128bit SSE instructions issue two stores at once.
92(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
93
94;; vectorpath (microcoded) instructions are single issue instructions.
95;; So, they occupy all the integer units.
96(define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+
97                                      bdver1-agu0+bdver1-agu1+
98                                      bdver1-load0+bdver1-load1")
99
100;; The FP operations start to execute at stage 12 in the pipeline, while
101;; integer operations start to execute at stage 9 for athlon and 11 for K8
102;; Compensate the difference for athlon because it results in significantly
103;; smaller automata.
104;; NOTE: the above information was just copied from athlon.md, and was not
105;; actually verified for bdver1.
106(define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
107;; The floating point loads.
108(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
109(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
110
111;; Four FP units.
112(define_cpu_unit "bdver1-ffma0" "bdver1_fp")
113(define_cpu_unit "bdver1-ffma1" "bdver1_fp")
114(define_cpu_unit "bdver1-fmal0" "bdver1_fp")
115(define_cpu_unit "bdver1-fmal1" "bdver1_fp")
116
117(define_reservation "bdver1-ffma"     "(bdver1-ffma0 | bdver1-ffma1)")
118(define_reservation "bdver1-fcvt"     "bdver1-ffma0")
119(define_reservation "bdver1-fmma"     "bdver1-ffma0")
120(define_reservation "bdver1-fxbar"    "bdver1-ffma1")
121(define_reservation "bdver1-fmal"     "(bdver1-fmal0 | bdver1-fmal1)")
122(define_reservation "bdver1-fsto"     "bdver1-fmal1")
123
124;; Vector operations usually consume many of pipes.
125(define_reservation "bdver1-fvector"  "(bdver1-ffma0 + bdver1-ffma1
126					+ bdver1-fmal0 + bdver1-fmal1)")
127
128;; Jump instructions are executed in the branch unit completely transparent to us.
129(define_insn_reservation "bdver1_call" 0
130			 (and (eq_attr "cpu" "bdver1,bdver2")
131			      (eq_attr "type" "call,callv"))
132			 "bdver1-double,bdver1-agu")
133;; PUSH mem is double path.
134(define_insn_reservation "bdver1_push" 1
135			 (and (eq_attr "cpu" "bdver1,bdver2")
136			      (eq_attr "type" "push"))
137			 "bdver1-direct,bdver1-agu,bdver1-store")
138;; POP r16/mem are double path.
139(define_insn_reservation "bdver1_pop" 1
140			 (and (eq_attr "cpu" "bdver1,bdver2")
141			      (eq_attr "type" "pop"))
142			 "bdver1-direct,bdver1-ivector")
143;; LEAVE no latency info so far, assume same with amdfam10.
144(define_insn_reservation "bdver1_leave" 3
145			 (and (eq_attr "cpu" "bdver1,bdver2")
146			      (eq_attr "type" "leave"))
147			 "bdver1-vector,bdver1-ivector")
148;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
149(define_insn_reservation "bdver1_lea" 1
150			 (and (eq_attr "cpu" "bdver1,bdver2")
151			      (eq_attr "type" "lea"))
152			 "bdver1-direct,bdver1-agu")
153
154;; MUL executes in special multiplier unit attached to IEU1.
155(define_insn_reservation "bdver1_imul_DI" 6
156			 (and (eq_attr "cpu" "bdver1,bdver2")
157			      (and (eq_attr "type" "imul")
158				   (and (eq_attr "mode" "DI")
159					(eq_attr "memory" "none,unknown"))))
160			 "bdver1-direct1,bdver1-ieu1")
161(define_insn_reservation "bdver1_imul" 4
162			 (and (eq_attr "cpu" "bdver1,bdver2")
163			      (and (eq_attr "type" "imul")
164				   (eq_attr "memory" "none,unknown")))
165			 "bdver1-direct1,bdver1-ieu1")
166(define_insn_reservation "bdver1_imul_mem_DI" 10
167			 (and (eq_attr "cpu" "bdver1,bdver2")
168			      (and (eq_attr "type" "imul")
169				   (and (eq_attr "mode" "DI")
170					(eq_attr "memory" "load,both"))))
171                         "bdver1-direct1,bdver1-load,bdver1-ieu1")
172(define_insn_reservation "bdver1_imul_mem" 8
173			 (and (eq_attr "cpu" "bdver1,bdver2")
174			      (and (eq_attr "type" "imul")
175				   (eq_attr "memory" "load,both")))
176			 "bdver1-direct1,bdver1-load,bdver1-ieu1")
177
178;; IDIV cannot execute in parallel with other instructions.  Dealing with it
179;; as with short latency vector instruction is good approximation avoiding
180;; scheduler from trying too hard to can hide it's latency by overlap with
181;; other instructions.
182;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
183;; of the other code.
184(define_insn_reservation "bdver1_idiv" 6
185			 (and (eq_attr "cpu" "bdver1,bdver2")
186			      (and (eq_attr "type" "idiv")
187				   (eq_attr "memory" "none,unknown")))
188			 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
189
190(define_insn_reservation "bdver1_idiv_mem" 10
191			 (and (eq_attr "cpu" "bdver1,bdver2")
192			      (and (eq_attr "type" "idiv")
193				   (eq_attr "memory" "load,both")))
194			 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
195
196;; The parallelism of string instructions is not documented.  Model it same way
197;; as IDIV to create smaller automata.  This probably does not matter much.
198;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
199(define_insn_reservation "bdver1_str" 6
200			 (and (eq_attr "cpu" "bdver1,bdver2")
201			      (and (eq_attr "type" "str")
202				   (eq_attr "memory" "load,both,store")))
203			 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
204
205;; Integer instructions.
206(define_insn_reservation "bdver1_idirect" 1
207			 (and (eq_attr "cpu" "bdver1,bdver2")
208			      (and (eq_attr "bdver1_decode" "direct")
209				   (and (eq_attr "unit" "integer,unknown")
210					(eq_attr "memory" "none,unknown"))))
211			 "bdver1-direct,bdver1-ieu")
212(define_insn_reservation "bdver1_ivector" 2
213			 (and (eq_attr "cpu" "bdver1,bdver2")
214			      (and (eq_attr "bdver1_decode" "vector")
215				   (and (eq_attr "unit" "integer,unknown")
216					(eq_attr "memory" "none,unknown"))))
217			 "bdver1-vector,bdver1-ieu,bdver1-ieu")
218(define_insn_reservation "bdver1_idirect_loadmov" 4
219			 (and (eq_attr "cpu" "bdver1,bdver2")
220			      (and (eq_attr "type" "imov")
221				   (eq_attr "memory" "load")))
222			 "bdver1-direct,bdver1-load")
223(define_insn_reservation "bdver1_idirect_load" 5
224			 (and (eq_attr "cpu" "bdver1,bdver2")
225			      (and (eq_attr "bdver1_decode" "direct")
226				   (and (eq_attr "unit" "integer,unknown")
227					(eq_attr "memory" "load"))))
228			 "bdver1-direct,bdver1-load,bdver1-ieu")
229(define_insn_reservation "bdver1_ivector_load" 6
230			 (and (eq_attr "cpu" "bdver1,bdver2")
231			      (and (eq_attr "bdver1_decode" "vector")
232				   (and (eq_attr "unit" "integer,unknown")
233					(eq_attr "memory" "load"))))
234			 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
235(define_insn_reservation "bdver1_idirect_movstore" 4
236			 (and (eq_attr "cpu" "bdver1,bdver2")
237			      (and (eq_attr "type" "imov")
238				   (eq_attr "memory" "store")))
239			 "bdver1-direct,bdver1-agu,bdver1-store")
240(define_insn_reservation "bdver1_idirect_both" 4
241			 (and (eq_attr "cpu" "bdver1,bdver2")
242			      (and (eq_attr "bdver1_decode" "direct")
243				   (and (eq_attr "unit" "integer,unknown")
244					(eq_attr "memory" "both"))))
245			 "bdver1-direct,bdver1-load,
246			  bdver1-ieu,bdver1-store,
247			  bdver1-store")
248(define_insn_reservation "bdver1_ivector_both" 5
249			 (and (eq_attr "cpu" "bdver1,bdver2")
250			      (and (eq_attr "bdver1_decode" "vector")
251				   (and (eq_attr "unit" "integer,unknown")
252					(eq_attr "memory" "both"))))
253			 "bdver1-vector,bdver1-load,
254			  bdver1-ieu,
255			  bdver1-ieu,
256			  bdver1-store")
257(define_insn_reservation "bdver1_idirect_store" 4
258			 (and (eq_attr "cpu" "bdver1,bdver2")
259			      (and (eq_attr "bdver1_decode" "direct")
260				   (and (eq_attr "unit" "integer,unknown")
261					(eq_attr "memory" "store"))))
262			 "bdver1-direct,(bdver1-ieu+bdver1-agu),
263			  bdver1-store")
264(define_insn_reservation "bdver1_ivector_store" 5
265			 (and (eq_attr "cpu" "bdver1,bdver2")
266			      (and (eq_attr "bdver1_decode" "vector")
267				   (and (eq_attr "unit" "integer,unknown")
268					(eq_attr "memory" "store"))))
269			 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
270			  bdver1-store")
271
272;; BDVER1 floating point units.
273(define_insn_reservation "bdver1_fldxf" 13
274			 (and (eq_attr "cpu" "bdver1,bdver2")
275			      (and (eq_attr "type" "fmov")
276				   (and (eq_attr "memory" "load")
277					(eq_attr "mode" "XF"))))
278			 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
279(define_insn_reservation "bdver1_fld" 5
280			 (and (eq_attr "cpu" "bdver1,bdver2")
281			      (and (eq_attr "type" "fmov")
282				   (eq_attr "memory" "load")))
283			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
284(define_insn_reservation "bdver1_fstxf" 8
285			 (and (eq_attr "cpu" "bdver1,bdver2")
286			      (and (eq_attr "type" "fmov")
287				   (and (eq_attr "memory" "store,both")
288					(eq_attr "mode" "XF"))))
289			 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
290(define_insn_reservation "bdver1_fst" 2
291			 (and (eq_attr "cpu" "bdver1,bdver2")
292			      (and (eq_attr "type" "fmov")
293				   (eq_attr "memory" "store,both")))
294			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
295(define_insn_reservation "bdver1_fist" 2
296			 (and (eq_attr "cpu" "bdver1,bdver2")
297			      (eq_attr "type" "fistp,fisttp"))
298			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
299(define_insn_reservation "bdver1_fmov_bdver1" 2
300			 (and (eq_attr "cpu" "bdver1,bdver2")
301			      (eq_attr "type" "fmov"))
302			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
303(define_insn_reservation "bdver1_fadd_load" 10
304			 (and (eq_attr "cpu" "bdver1,bdver2")
305			      (and (eq_attr "type" "fop")
306				   (eq_attr "memory" "load")))
307			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
308(define_insn_reservation "bdver1_fadd" 6
309			 (and (eq_attr "cpu" "bdver1,bdver2")
310			      (eq_attr "type" "fop"))
311			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
312(define_insn_reservation "bdver1_fmul_load" 10
313			 (and (eq_attr "cpu" "bdver1,bdver2")
314			      (and (eq_attr "type" "fmul")
315				   (eq_attr "memory" "load")))
316			 "bdver1-double,bdver1-fpload,bdver1-ffma")
317(define_insn_reservation "bdver1_fmul" 6
318			 (and (eq_attr "cpu" "bdver1,bdver2")
319			      (eq_attr "type" "fmul"))
320			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
321(define_insn_reservation "bdver1_fsgn" 2
322			 (and (eq_attr "cpu" "bdver1,bdver2")
323			      (eq_attr "type" "fsgn"))
324			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
325(define_insn_reservation "bdver1_fdiv_load" 46
326			 (and (eq_attr "cpu" "bdver1,bdver2")
327			      (and (eq_attr "type" "fdiv")
328				   (eq_attr "memory" "load")))
329			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
330(define_insn_reservation "bdver1_fdiv" 42
331			 (and (eq_attr "cpu" "bdver1,bdver2")
332			      (eq_attr "type" "fdiv"))
333			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
334(define_insn_reservation "bdver1_fpspc_load" 103
335			 (and (eq_attr "cpu" "bdver1,bdver2")
336			      (and (eq_attr "type" "fpspc")
337				   (eq_attr "memory" "load")))
338			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
339(define_insn_reservation "bdver1_fpspc" 100
340			 (and (eq_attr "cpu" "bdver1,bdver2")
341			      (and (eq_attr "type" "fpspc")
342				   (eq_attr "memory" "load")))
343			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
344(define_insn_reservation "bdver1_fcmov_load" 17
345			 (and (eq_attr "cpu" "bdver1,bdver2")
346			      (and (eq_attr "type" "fcmov")
347				   (eq_attr "memory" "load")))
348			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
349(define_insn_reservation "bdver1_fcmov" 15
350			 (and (eq_attr "cpu" "bdver1,bdver2")
351			      (eq_attr "type" "fcmov"))
352			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
353(define_insn_reservation "bdver1_fcomi_load" 6
354			 (and (eq_attr "cpu" "bdver1,bdver2")
355			      (and (eq_attr "type" "fcmp")
356				   (and (eq_attr "bdver1_decode" "double")
357					(eq_attr "memory" "load"))))
358			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
359(define_insn_reservation "bdver1_fcomi" 2
360			 (and (eq_attr "cpu" "bdver1,bdver2")
361			      (and (eq_attr "bdver1_decode" "double")
362				   (eq_attr "type" "fcmp")))
363			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
364(define_insn_reservation "bdver1_fcom_load" 6
365			 (and (eq_attr "cpu" "bdver1,bdver2")
366			      (and (eq_attr "type" "fcmp")
367				   (eq_attr "memory" "load")))
368			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
369(define_insn_reservation "bdver1_fcom" 2
370			 (and (eq_attr "cpu" "bdver1,bdver2")
371			      (eq_attr "type" "fcmp"))
372			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
373(define_insn_reservation "bdver1_fxch" 2
374			 (and (eq_attr "cpu" "bdver1,bdver2")
375			      (eq_attr "type" "fxch"))
376			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
377
378;; SSE loads.
379(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
380			 (and (eq_attr "cpu" "bdver1,bdver2")
381			      (and (eq_attr "type" "ssemov")
382				   (and (eq_attr "prefix" "vex")
383					(and (eq_attr "movu" "1")
384					     (and (eq_attr "mode" "V4SF,V2DF")
385						  (eq_attr "memory" "load"))))))
386			 "bdver1-direct,bdver1-fpload")
387(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
388			 (and (eq_attr "cpu" "bdver1,bdver2")
389			      (and (eq_attr "type" "ssemov")
390				   (and (eq_attr "movu" "1")
391				        (and (eq_attr "mode" "V8SF,V4DF")
392				             (eq_attr "memory" "load")))))
393			 "bdver1-double,bdver1-fpload")
394(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
395			 (and (eq_attr "cpu" "bdver1,bdver2")
396			      (and (eq_attr "type" "ssemov")
397				   (and (eq_attr "movu" "1")
398				        (and (eq_attr "mode" "V4SF,V2DF")
399				             (eq_attr "memory" "load")))))
400			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
401(define_insn_reservation "bdver1_ssevector_avx128_load" 4
402			 (and (eq_attr "cpu" "bdver1,bdver2")
403			      (and (eq_attr "type" "ssemov")
404				   (and (eq_attr "prefix" "vex")
405				        (and (eq_attr "mode" "V4SF,V2DF,TI")
406				             (eq_attr "memory" "load")))))
407			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
408(define_insn_reservation "bdver1_ssevector_avx256_load" 5
409			 (and (eq_attr "cpu" "bdver1,bdver2")
410			      (and (eq_attr "type" "ssemov")
411				   (and (eq_attr "mode" "V8SF,V4DF,OI")
412				        (eq_attr "memory" "load"))))
413			 "bdver1-double,bdver1-fpload,bdver1-fmal")
414(define_insn_reservation "bdver1_ssevector_sse128_load" 4
415			 (and (eq_attr "cpu" "bdver1,bdver2")
416			      (and (eq_attr "type" "ssemov")
417				   (and (eq_attr "mode" "V4SF,V2DF,TI")
418				        (eq_attr "memory" "load"))))
419			 "bdver1-direct,bdver1-fpload")
420(define_insn_reservation "bdver1_ssescalar_movq_load" 4
421			 (and (eq_attr "cpu" "bdver1,bdver2")
422			      (and (eq_attr "type" "ssemov")
423				   (and (eq_attr "mode" "DI")
424				        (eq_attr "memory" "load"))))
425			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
426(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
427			 (and (eq_attr "cpu" "bdver1,bdver2")
428			      (and (eq_attr "type" "ssemov")
429				   (and (eq_attr "prefix" "vex")
430				        (and (eq_attr "mode" "SF")
431				             (eq_attr "memory" "load")))))
432			 "bdver1-direct,bdver1-fpload")
433(define_insn_reservation "bdver1_ssescalar_sse128_load" 4
434			 (and (eq_attr "cpu" "bdver1,bdver2")
435			      (and (eq_attr "type" "ssemov")
436				   (and (eq_attr "mode" "SF,DF")
437				        (eq_attr "memory" "load"))))
438			 "bdver1-direct,bdver1-fpload, bdver1-ffma")
439(define_insn_reservation "bdver1_mmxsse_load" 4
440			 (and (eq_attr "cpu" "bdver1,bdver2")
441			      (and (eq_attr "type" "mmxmov,ssemov")
442				   (eq_attr "memory" "load")))
443			 "bdver1-direct,bdver1-fpload, bdver1-fmal")
444
445;; SSE stores.
446(define_insn_reservation "bdver1_sse_store_avx256" 5
447			 (and (eq_attr "cpu" "bdver1,bdver2")
448			      (and (eq_attr "type" "ssemov")
449				   (and (eq_attr "mode" "V8SF,V4DF,OI")
450					(eq_attr "memory" "store,both"))))
451			 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
452(define_insn_reservation "bdver1_sse_store" 4
453			 (and (eq_attr "cpu" "bdver1,bdver2")
454			      (and (eq_attr "type" "ssemov")
455				   (and (eq_attr "mode" "V4SF,V2DF,TI")
456					(eq_attr "memory" "store,both"))))
457			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
458(define_insn_reservation "bdver1_mmxsse_store_short" 4
459			 (and (eq_attr "cpu" "bdver1,bdver2")
460			      (and (eq_attr "type" "mmxmov,ssemov")
461				   (eq_attr "memory" "store,both")))
462			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
463
464;; Register moves.
465(define_insn_reservation "bdver1_ssevector_avx256" 3
466			 (and (eq_attr "cpu" "bdver1,bdver2")
467			      (and (eq_attr "type" "ssemov")
468				   (and (eq_attr "mode" "V8SF,V4DF,OI")
469					(eq_attr "memory" "none"))))
470			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
471(define_insn_reservation "bdver1_movss_movsd" 2
472			 (and (eq_attr "cpu" "bdver1,bdver2")
473			      (and (eq_attr "type" "ssemov")
474				   (and (eq_attr "mode" "SF,DF")
475                                        (eq_attr "memory" "none"))))
476			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
477(define_insn_reservation "bdver1_mmxssemov" 2
478			 (and (eq_attr "cpu" "bdver1,bdver2")
479			      (and (eq_attr "type" "mmxmov,ssemov")
480				   (eq_attr "memory" "none")))
481			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
482;; SSE logs.
483(define_insn_reservation "bdver1_sselog_load_256" 7
484			 (and (eq_attr "cpu" "bdver1,bdver2")
485			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
486				   (and (eq_attr "mode" "V8SF")
487				   (eq_attr "memory" "load"))))
488			 "bdver1-double,bdver1-fpload,bdver1-fmal")
489(define_insn_reservation "bdver1_sselog_256" 3
490			 (and (eq_attr "cpu" "bdver1,bdver2")
491			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
492                                   (eq_attr "mode" "V8SF")))
493			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
494(define_insn_reservation "bdver1_sselog_load" 6
495			 (and (eq_attr "cpu" "bdver1,bdver2")
496			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
497				   (eq_attr "memory" "load")))
498			 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
499(define_insn_reservation "bdver1_sselog" 2
500			 (and (eq_attr "cpu" "bdver1,bdver2")
501			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
502			 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
503
504;; PCMP actually executes in FMAL.
505(define_insn_reservation "bdver1_ssecmp_load" 6
506			 (and (eq_attr "cpu" "bdver1,bdver2")
507			      (and (eq_attr "type" "ssecmp")
508				   (eq_attr "memory" "load")))
509			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
510(define_insn_reservation "bdver1_ssecmp" 2
511			 (and (eq_attr "cpu" "bdver1,bdver2")
512			      (eq_attr "type" "ssecmp"))
513			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
514(define_insn_reservation "bdver1_ssecomi_load" 6
515			 (and (eq_attr "cpu" "bdver1,bdver2")
516			      (and (eq_attr "type" "ssecomi")
517				   (eq_attr "memory" "load")))
518			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
519(define_insn_reservation "bdver1_ssecomi" 2
520			 (and (eq_attr "cpu" "bdver1,bdver2")
521			      (eq_attr "type" "ssecomi"))
522			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
523
524;; Conversions behaves very irregularly and the scheduling is critical here.
525;; Take each instruction separately.
526
527;; 256 bit conversion.
528(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
529			 (and (eq_attr "cpu" "bdver1,bdver2")
530			      (and (eq_attr "type" "ssecvt")
531				   (and (eq_attr "memory" "load")
532					(ior (ior (match_operand:V4DF 0 "register_operand")
533					          (ior (match_operand:V8SF 0 "register_operand")
534						       (match_operand:V8SI 0 "register_operand")))
535					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
536						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
537						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
538			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
539(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
540			 (and (eq_attr "cpu" "bdver1,bdver2")
541			      (and (eq_attr "type" "ssecvt")
542				   (and (eq_attr "memory" "none")
543					(ior (ior (match_operand:V4DF 0 "register_operand")
544					          (ior (match_operand:V8SF 0 "register_operand")
545						       (match_operand:V8SI 0 "register_operand")))
546					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
547						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
548						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
549			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
550;; CVTSS2SD, CVTSD2SS.
551(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
552			 (and (eq_attr "cpu" "bdver1,bdver2")
553			      (and (eq_attr "type" "ssecvt")
554				   (and (eq_attr "mode" "SF,DF")
555					(eq_attr "memory" "load"))))
556			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
557(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
558			 (and (eq_attr "cpu" "bdver1,bdver2")
559			      (and (eq_attr "type" "ssecvt")
560				   (and (eq_attr "mode" "SF,DF")
561					(eq_attr "memory" "none"))))
562			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
563;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
564(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
565			 (and (eq_attr "cpu" "bdver1,bdver2")
566			      (and (eq_attr "type" "sseicvt")
567				   (and (eq_attr "mode" "SF,DF")
568					(eq_attr "memory" "load"))))
569			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
570(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
571			 (and (eq_attr "cpu" "bdver1,bdver2")
572			      (and (eq_attr "type" "sseicvt")
573				   (and (eq_attr "mode" "SF,DF")
574					(eq_attr "memory" "none"))))
575			 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
576;; CVTPD2PS.
577(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
578			 (and (eq_attr "cpu" "bdver1,bdver2")
579			      (and (eq_attr "type" "ssecvt")
580				   (and (eq_attr "memory" "load")
581                                        (and (match_operand:V4SF 0 "register_operand")
582					     (match_operand:V2DF 1 "nonimmediate_operand")))))
583			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
584(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
585			 (and (eq_attr "cpu" "bdver1,bdver2")
586			      (and (eq_attr "type" "ssecvt")
587				   (and (eq_attr "memory" "none")
588                                        (and (match_operand:V4SF 0 "register_operand")
589					     (match_operand:V2DF 1 "nonimmediate_operand")))))
590			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
591;; CVTPI2PS, CVTDQ2PS.
592(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
593			 (and (eq_attr "cpu" "bdver1,bdver2")
594			      (and (eq_attr "type" "ssecvt")
595				   (and (eq_attr "memory" "load")
596                                        (and (match_operand:V4SF 0 "register_operand")
597					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
598					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
599			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
600(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
601			 (and (eq_attr "cpu" "bdver1,bdver2")
602			      (and (eq_attr "type" "ssecvt")
603				   (and (eq_attr "memory" "none")
604                                        (and (match_operand:V4SF 0 "register_operand")
605					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
606					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
607			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
608;; CVTDQ2PD.
609(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
610			 (and (eq_attr "cpu" "bdver1,bdver2")
611			      (and (eq_attr "type" "ssecvt")
612				   (and (eq_attr "memory" "load")
613                                        (and (match_operand:V2DF 0 "register_operand")
614					     (match_operand:V4SI 1 "nonimmediate_operand")))))
615			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
616(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
617			 (and (eq_attr "cpu" "bdver1,bdver2")
618			      (and (eq_attr "type" "ssecvt")
619				   (and (eq_attr "memory" "none")
620                                        (and (match_operand:V2DF 0 "register_operand")
621					     (match_operand:V4SI 1 "nonimmediate_operand")))))
622			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
623;; CVTPS2PD, CVTPI2PD.
624(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
625			 (and (eq_attr "cpu" "bdver1,bdver2")
626			      (and (eq_attr "type" "ssecvt")
627				   (and (eq_attr "memory" "load")
628                                        (and (match_operand:V2DF 0 "register_operand")
629					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
630					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
631			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
632(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
633			 (and (eq_attr "cpu" "bdver1,bdver2")
634			      (and (eq_attr "type" "ssecvt")
635				   (and (eq_attr "memory" "load")
636                                        (and (match_operand:V2DF 0 "register_operand")
637					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
638					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
639			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
640;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
641(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
642			 (and (eq_attr "cpu" "bdver1,bdver2")
643			      (and (eq_attr "type" "sseicvt")
644				   (and (eq_attr "mode" "SI,DI")
645					(eq_attr "memory" "load"))))
646			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
647(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
648			 (and (eq_attr "cpu" "bdver1,bdver2")
649			      (and (eq_attr "type" "sseicvt")
650				   (and (eq_attr "mode" "SI,DI")
651					(eq_attr "memory" "none"))))
652			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
653;; CVTPD2PI, CVTTPD2PI.
654(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
655			 (and (eq_attr "cpu" "bdver1,bdver2")
656			      (and (eq_attr "type" "ssecvt")
657				   (and (eq_attr "memory" "load")
658				        (and (match_operand:V2DF 1 "nonimmediate_operand")
659					     (match_operand:V2SI 0 "register_operand")))))
660			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
661(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
662			 (and (eq_attr "cpu" "bdver1,bdver2")
663			      (and (eq_attr "type" "ssecvt")
664				   (and (eq_attr "memory" "none")
665				        (and (match_operand:V2DF 1 "nonimmediate_operand")
666					     (match_operand:V2SI 0 "register_operand")))))
667			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
668;; CVTPD2DQ, CVTTPD2DQ.
669(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
670			 (and (eq_attr "cpu" "bdver1,bdver2")
671			      (and (eq_attr "type" "ssecvt")
672				   (and (eq_attr "memory" "load")
673				        (and (match_operand:V2DF 1 "nonimmediate_operand")
674					     (match_operand:V4SI 0 "register_operand")))))
675			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
676(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
677			 (and (eq_attr "cpu" "bdver1,bdver2")
678			      (and (eq_attr "type" "ssecvt")
679				   (and (eq_attr "memory" "none")
680				        (and (match_operand:V2DF 1 "nonimmediate_operand")
681					     (match_operand:V4SI 0 "register_operand")))))
682			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
683;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
684(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
685			 (and (eq_attr "cpu" "bdver1,bdver2")
686			      (and (eq_attr "type" "ssecvt")
687                                   (and (eq_attr "memory" "load")
688				        (and (match_operand:V4SF 1 "nonimmediate_operand")
689				             (ior (match_operand: V2SI 0 "register_operand")
690						  (match_operand: V4SI 0 "register_operand"))))))
691			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
692(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
693			 (and (eq_attr "cpu" "bdver1,bdver2")
694			      (and (eq_attr "type" "ssecvt")
695				   (and (eq_attr "memory" "none")
696				        (and (match_operand:V4SF 1 "nonimmediate_operand")
697				             (ior (match_operand: V2SI 0 "register_operand")
698						  (match_operand: V4SI 0 "register_operand"))))))
699			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
700
701;; SSE MUL, ADD, and MULADD.
702(define_insn_reservation "bdver1_ssemuladd_load_256" 11
703			 (and (eq_attr "cpu" "bdver1,bdver2")
704			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
705				   (and (eq_attr "mode" "V8SF,V4DF")
706					(eq_attr "memory" "load"))))
707			 "bdver1-double,bdver1-fpload,bdver1-ffma")
708(define_insn_reservation "bdver1_ssemuladd_256" 7
709			 (and (eq_attr "cpu" "bdver1,bdver2")
710			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
711				   (and (eq_attr "mode" "V8SF,V4DF")
712					(eq_attr "memory" "none"))))
713			 "bdver1-double,bdver1-fpsched,bdver1-ffma")
714(define_insn_reservation "bdver1_ssemuladd_load" 10
715			 (and (eq_attr "cpu" "bdver1,bdver2")
716			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
717				   (eq_attr "memory" "load")))
718			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
719(define_insn_reservation "bdver1_ssemuladd" 6
720			 (and (eq_attr "cpu" "bdver1,bdver2")
721			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
722				   (eq_attr "memory" "none")))
723			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
724(define_insn_reservation "bdver1_sseimul_load" 8
725			 (and (eq_attr "cpu" "bdver1,bdver2")
726			      (and (eq_attr "type" "sseimul")
727				   (eq_attr "memory" "load")))
728			 "bdver1-direct,bdver1-fpload,bdver1-fmma")
729(define_insn_reservation "bdver1_sseimul" 4
730			 (and (eq_attr "cpu" "bdver1,bdver2")
731			      (and (eq_attr "type" "sseimul")
732				   (eq_attr "memory" "none")))
733			 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
734(define_insn_reservation "bdver1_sseiadd_load" 6
735			 (and (eq_attr "cpu" "bdver1,bdver2")
736			      (and (eq_attr "type" "sseiadd")
737				   (eq_attr "memory" "load")))
738			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
739(define_insn_reservation "bdver1_sseiadd" 2
740			 (and (eq_attr "cpu" "bdver1,bdver2")
741			      (and (eq_attr "type" "sseiadd")
742				   (eq_attr "memory" "none")))
743			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
744
745;; SSE DIV: no throughput information (assume same as amdfam10).
746(define_insn_reservation "bdver1_ssediv_double_load_256" 31
747			 (and (eq_attr "cpu" "bdver1,bdver2")
748			      (and (eq_attr "type" "ssediv")
749				   (and (eq_attr "mode" "V4DF")
750				        (eq_attr "memory" "load"))))
751			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
752(define_insn_reservation "bdver1_ssediv_double_256" 27
753			 (and (eq_attr "cpu" "bdver1,bdver2")
754			      (and (eq_attr "type" "ssediv")
755				   (and (eq_attr "mode" "V4DF")
756				        (eq_attr "memory" "none"))))
757			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
758(define_insn_reservation "bdver1_ssediv_single_load_256" 28
759			 (and (eq_attr "cpu" "bdver1,bdver2")
760			      (and (eq_attr "type" "ssediv")
761				   (and (eq_attr "mode" "V8SF")
762				        (eq_attr "memory" "load"))))
763			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
764(define_insn_reservation "bdver1_ssediv_single_256" 24
765			 (and (eq_attr "cpu" "bdver1,bdver2")
766			      (and (eq_attr "type" "ssediv")
767				   (and (eq_attr "mode" "V8SF")
768				        (eq_attr "memory" "none"))))
769			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
770(define_insn_reservation "bdver1_ssediv_double_load" 31
771			 (and (eq_attr "cpu" "bdver1,bdver2")
772			      (and (eq_attr "type" "ssediv")
773				   (and (eq_attr "mode" "DF,V2DF")
774					(eq_attr "memory" "load"))))
775			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
776(define_insn_reservation "bdver1_ssediv_double" 27
777			 (and (eq_attr "cpu" "bdver1,bdver2")
778			      (and (eq_attr "type" "ssediv")
779				   (and (eq_attr "mode" "DF,V2DF")
780					(eq_attr "memory" "none"))))
781			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
782(define_insn_reservation "bdver1_ssediv_single_load" 28
783			 (and (eq_attr "cpu" "bdver1,bdver2")
784			      (and (eq_attr "type" "ssediv")
785				   (and (eq_attr "mode" "SF,V4SF")
786					(eq_attr "memory" "load"))))
787			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
788(define_insn_reservation "bdver1_ssediv_single" 24
789			 (and (eq_attr "cpu" "bdver1,bdver2")
790			      (and (eq_attr "type" "ssediv")
791				   (and (eq_attr "mode" "SF,V4SF")
792					(eq_attr "memory" "none"))))
793			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
794
795(define_insn_reservation "bdver1_sseins" 3
796                         (and (eq_attr "cpu" "bdver1,bdver2")
797                              (and (eq_attr "type" "sseins")
798                                   (eq_attr "mode" "TI")))
799                         "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
800
801