1*38fd1498Szrj;; Copyright (C) 2010-2018 Free Software Foundation, Inc.
2*38fd1498Szrj;;
3*38fd1498Szrj;; This file is part of GCC.
4*38fd1498Szrj;;
5*38fd1498Szrj;; GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj;; it under the terms of the GNU General Public License as published by
7*38fd1498Szrj;; the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj;; any later version.
9*38fd1498Szrj;;
10*38fd1498Szrj;; GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj;; GNU General Public License for more details.
14*38fd1498Szrj;;
15*38fd1498Szrj;; You should have received a copy of the GNU General Public License
16*38fd1498Szrj;; along with GCC; see the file COPYING3.  If not see
17*38fd1498Szrj;; <http://www.gnu.org/licenses/>.
18*38fd1498Szrj;;
19*38fd1498Szrj;; AMD bdver1 Scheduling
20*38fd1498Szrj;;
21*38fd1498Szrj;; The bdver1 contains four pipelined FP units, two integer units and
22*38fd1498Szrj;; two address generation units.
23*38fd1498Szrj;;
24*38fd1498Szrj;; The predecode logic is determining boundaries of instructions in the 64
25*38fd1498Szrj;; byte cache line.  So the cache line straddling problem of K6 might be issue
26*38fd1498Szrj;; here as well, but it is not noted in the documentation.
27*38fd1498Szrj;;
28*38fd1498Szrj;; Three DirectPath instructions decoders and only one VectorPath decoder
29*38fd1498Szrj;; is available.  They can decode three DirectPath instructions or one
30*38fd1498Szrj;; VectorPath instruction per cycle.
31*38fd1498Szrj;;
32*38fd1498Szrj;; The load/store queue unit is not attached to the schedulers but
33*38fd1498Szrj;; communicates with all the execution units separately instead.
34*38fd1498Szrj
35*38fd1498Szrj
36*38fd1498Szrj(define_attr "bdver1_decode" "direct,vector,double"
37*38fd1498Szrj  (const_string "direct"))
38*38fd1498Szrj
39*38fd1498Szrj(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu")
40*38fd1498Szrj
41*38fd1498Szrj(define_cpu_unit "bdver1-decode0" "bdver1")
42*38fd1498Szrj(define_cpu_unit "bdver1-decode1" "bdver1")
43*38fd1498Szrj(define_cpu_unit "bdver1-decode2" "bdver1")
44*38fd1498Szrj(define_cpu_unit "bdver1-decodev" "bdver1")
45*38fd1498Szrj
46*38fd1498Szrj;; Model the fact that double decoded instruction may take 2 cycles
47*38fd1498Szrj;; to decode when decoder2 and decoder0 in next cycle
48*38fd1498Szrj;; is used (this is needed to allow throughput of 1.5 double decoded
49*38fd1498Szrj;; instructions per cycle).
50*38fd1498Szrj;;
51*38fd1498Szrj;; In order to avoid dependence between reservation of decoder
52*38fd1498Szrj;; and other units, we model decoder as two stage fully pipelined unit
53*38fd1498Szrj;; and only double decoded instruction may occupy unit in the first cycle.
54*38fd1498Szrj;; With this scheme however two double instructions can be issued cycle0.
55*38fd1498Szrj;;
56*38fd1498Szrj;; Avoid this by using presence set requiring decoder0 to be allocated
57*38fd1498Szrj;; too.  Vector decoded instructions then can't be issued when modeled
58*38fd1498Szrj;; as consuming decoder0+decoder1+decoder2.
59*38fd1498Szrj;; We solve that by specialized vector decoder unit and exclusion set.
60*38fd1498Szrj(presence_set "bdver1-decode2" "bdver1-decode0")
61*38fd1498Szrj(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
62*38fd1498Szrj
63*38fd1498Szrj(define_reservation "bdver1-vector" "nothing,bdver1-decodev")
64*38fd1498Szrj(define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
65*38fd1498Szrj(define_reservation "bdver1-direct" "nothing,
66*38fd1498Szrj				     (bdver1-decode0 | bdver1-decode1
67*38fd1498Szrj				     | bdver1-decode2)")
68*38fd1498Szrj;; Double instructions behaves like two direct instructions.
69*38fd1498Szrj(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
70*38fd1498Szrj				     | (nothing,(bdver1-decode0 + bdver1-decode1))
71*38fd1498Szrj				     | (nothing,(bdver1-decode1 + bdver1-decode2)))")
72*38fd1498Szrj
73*38fd1498Szrj
74*38fd1498Szrj(define_cpu_unit "bdver1-ieu0" "bdver1_ieu")
75*38fd1498Szrj(define_cpu_unit "bdver1-ieu1" "bdver1_ieu")
76*38fd1498Szrj(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
77*38fd1498Szrj
78*38fd1498Szrj(define_cpu_unit "bdver1-agu0" "bdver1_agu")
79*38fd1498Szrj(define_cpu_unit "bdver1-agu1" "bdver1_agu")
80*38fd1498Szrj(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
81*38fd1498Szrj
82*38fd1498Szrj(define_cpu_unit "bdver1-load0" "bdver1_load")
83*38fd1498Szrj(define_cpu_unit "bdver1-load1" "bdver1_load")
84*38fd1498Szrj(define_reservation "bdver1-load" "bdver1-agu,
85*38fd1498Szrj				   (bdver1-load0 | bdver1-load1),nothing")
86*38fd1498Szrj;; 128bit SSE instructions issue two loads at once.
87*38fd1498Szrj(define_reservation "bdver1-load2" "bdver1-agu,
88*38fd1498Szrj				   (bdver1-load0 + bdver1-load1),nothing")
89*38fd1498Szrj
90*38fd1498Szrj(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
91*38fd1498Szrj;; 128bit SSE instructions issue two stores at once.
92*38fd1498Szrj(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
93*38fd1498Szrj
94*38fd1498Szrj;; vectorpath (microcoded) instructions are single issue instructions.
95*38fd1498Szrj;; So, they occupy all the integer units.
96*38fd1498Szrj(define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+
97*38fd1498Szrj                                      bdver1-agu0+bdver1-agu1+
98*38fd1498Szrj                                      bdver1-load0+bdver1-load1")
99*38fd1498Szrj
100*38fd1498Szrj;; The FP operations start to execute at stage 12 in the pipeline, while
101*38fd1498Szrj;; integer operations start to execute at stage 9 for athlon and 11 for K8
102*38fd1498Szrj;; Compensate the difference for athlon because it results in significantly
103*38fd1498Szrj;; smaller automata.
104*38fd1498Szrj;; NOTE: the above information was just copied from athlon.md, and was not
105*38fd1498Szrj;; actually verified for bdver1.
106*38fd1498Szrj(define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
107*38fd1498Szrj;; The floating point loads.
108*38fd1498Szrj(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
109*38fd1498Szrj(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
110*38fd1498Szrj
111*38fd1498Szrj;; Four FP units.
112*38fd1498Szrj(define_cpu_unit "bdver1-ffma0" "bdver1_fp")
113*38fd1498Szrj(define_cpu_unit "bdver1-ffma1" "bdver1_fp")
114*38fd1498Szrj(define_cpu_unit "bdver1-fmal0" "bdver1_fp")
115*38fd1498Szrj(define_cpu_unit "bdver1-fmal1" "bdver1_fp")
116*38fd1498Szrj
117*38fd1498Szrj(define_reservation "bdver1-ffma"     "(bdver1-ffma0 | bdver1-ffma1)")
118*38fd1498Szrj(define_reservation "bdver1-fcvt"     "bdver1-ffma0")
119*38fd1498Szrj(define_reservation "bdver1-fmma"     "bdver1-ffma0")
120*38fd1498Szrj(define_reservation "bdver1-fxbar"    "bdver1-ffma1")
121*38fd1498Szrj(define_reservation "bdver1-fmal"     "(bdver1-fmal0 | bdver1-fmal1)")
122*38fd1498Szrj(define_reservation "bdver1-fsto"     "bdver1-fmal1")
123*38fd1498Szrj
124*38fd1498Szrj;; Vector operations usually consume many of pipes.
125*38fd1498Szrj(define_reservation "bdver1-fvector"  "(bdver1-ffma0 + bdver1-ffma1
126*38fd1498Szrj					+ bdver1-fmal0 + bdver1-fmal1)")
127*38fd1498Szrj
128*38fd1498Szrj;; Jump instructions are executed in the branch unit completely transparent to us.
129*38fd1498Szrj(define_insn_reservation "bdver1_call" 0
130*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
131*38fd1498Szrj			      (eq_attr "type" "call,callv"))
132*38fd1498Szrj			 "bdver1-double,bdver1-agu")
133*38fd1498Szrj;; PUSH mem is double path.
134*38fd1498Szrj(define_insn_reservation "bdver1_push" 1
135*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
136*38fd1498Szrj			      (eq_attr "type" "push"))
137*38fd1498Szrj			 "bdver1-direct,bdver1-agu,bdver1-store")
138*38fd1498Szrj;; POP r16/mem are double path.
139*38fd1498Szrj(define_insn_reservation "bdver1_pop" 1
140*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
141*38fd1498Szrj			      (eq_attr "type" "pop"))
142*38fd1498Szrj			 "bdver1-direct,bdver1-ivector")
143*38fd1498Szrj;; LEAVE no latency info so far, assume same with amdfam10.
144*38fd1498Szrj(define_insn_reservation "bdver1_leave" 3
145*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
146*38fd1498Szrj			      (eq_attr "type" "leave"))
147*38fd1498Szrj			 "bdver1-vector,bdver1-ivector")
148*38fd1498Szrj;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
149*38fd1498Szrj(define_insn_reservation "bdver1_lea" 1
150*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
151*38fd1498Szrj			      (eq_attr "type" "lea"))
152*38fd1498Szrj			 "bdver1-direct,bdver1-agu")
153*38fd1498Szrj
154*38fd1498Szrj;; MUL executes in special multiplier unit attached to IEU1.
155*38fd1498Szrj(define_insn_reservation "bdver1_imul_DI" 6
156*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
157*38fd1498Szrj			      (and (eq_attr "type" "imul")
158*38fd1498Szrj				   (and (eq_attr "mode" "DI")
159*38fd1498Szrj					(eq_attr "memory" "none,unknown"))))
160*38fd1498Szrj			 "bdver1-direct1,bdver1-ieu1")
161*38fd1498Szrj(define_insn_reservation "bdver1_imul" 4
162*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
163*38fd1498Szrj			      (and (eq_attr "type" "imul")
164*38fd1498Szrj				   (eq_attr "memory" "none,unknown")))
165*38fd1498Szrj			 "bdver1-direct1,bdver1-ieu1")
166*38fd1498Szrj(define_insn_reservation "bdver1_imul_mem_DI" 10
167*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
168*38fd1498Szrj			      (and (eq_attr "type" "imul")
169*38fd1498Szrj				   (and (eq_attr "mode" "DI")
170*38fd1498Szrj					(eq_attr "memory" "load,both"))))
171*38fd1498Szrj                         "bdver1-direct1,bdver1-load,bdver1-ieu1")
172*38fd1498Szrj(define_insn_reservation "bdver1_imul_mem" 8
173*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
174*38fd1498Szrj			      (and (eq_attr "type" "imul")
175*38fd1498Szrj				   (eq_attr "memory" "load,both")))
176*38fd1498Szrj			 "bdver1-direct1,bdver1-load,bdver1-ieu1")
177*38fd1498Szrj
178*38fd1498Szrj;; IDIV cannot execute in parallel with other instructions.  Dealing with it
179*38fd1498Szrj;; as with short latency vector instruction is good approximation avoiding
180*38fd1498Szrj;; scheduler from trying too hard to can hide it's latency by overlap with
181*38fd1498Szrj;; other instructions.
182*38fd1498Szrj;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
183*38fd1498Szrj;; of the other code.
184*38fd1498Szrj(define_insn_reservation "bdver1_idiv" 6
185*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
186*38fd1498Szrj			      (and (eq_attr "type" "idiv")
187*38fd1498Szrj				   (eq_attr "memory" "none,unknown")))
188*38fd1498Szrj			 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
189*38fd1498Szrj
190*38fd1498Szrj(define_insn_reservation "bdver1_idiv_mem" 10
191*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
192*38fd1498Szrj			      (and (eq_attr "type" "idiv")
193*38fd1498Szrj				   (eq_attr "memory" "load,both")))
194*38fd1498Szrj			 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
195*38fd1498Szrj
196*38fd1498Szrj;; The parallelism of string instructions is not documented.  Model it same way
197*38fd1498Szrj;; as IDIV to create smaller automata.  This probably does not matter much.
198*38fd1498Szrj;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
199*38fd1498Szrj(define_insn_reservation "bdver1_str" 6
200*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
201*38fd1498Szrj			      (and (eq_attr "type" "str")
202*38fd1498Szrj				   (eq_attr "memory" "load,both,store")))
203*38fd1498Szrj			 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
204*38fd1498Szrj
205*38fd1498Szrj;; Integer instructions.
206*38fd1498Szrj(define_insn_reservation "bdver1_idirect" 1
207*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
208*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "direct")
209*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
210*38fd1498Szrj					(eq_attr "memory" "none,unknown"))))
211*38fd1498Szrj			 "bdver1-direct,bdver1-ieu")
212*38fd1498Szrj(define_insn_reservation "bdver1_ivector" 2
213*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
214*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "vector")
215*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
216*38fd1498Szrj					(eq_attr "memory" "none,unknown"))))
217*38fd1498Szrj			 "bdver1-vector,bdver1-ieu,bdver1-ieu")
218*38fd1498Szrj(define_insn_reservation "bdver1_idirect_loadmov" 4
219*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
220*38fd1498Szrj			      (and (eq_attr "type" "imov")
221*38fd1498Szrj				   (eq_attr "memory" "load")))
222*38fd1498Szrj			 "bdver1-direct,bdver1-load")
223*38fd1498Szrj(define_insn_reservation "bdver1_idirect_load" 5
224*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
225*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "direct")
226*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
227*38fd1498Szrj					(eq_attr "memory" "load"))))
228*38fd1498Szrj			 "bdver1-direct,bdver1-load,bdver1-ieu")
229*38fd1498Szrj(define_insn_reservation "bdver1_ivector_load" 6
230*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
231*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "vector")
232*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
233*38fd1498Szrj					(eq_attr "memory" "load"))))
234*38fd1498Szrj			 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
235*38fd1498Szrj(define_insn_reservation "bdver1_idirect_movstore" 4
236*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
237*38fd1498Szrj			      (and (eq_attr "type" "imov")
238*38fd1498Szrj				   (eq_attr "memory" "store")))
239*38fd1498Szrj			 "bdver1-direct,bdver1-agu,bdver1-store")
240*38fd1498Szrj(define_insn_reservation "bdver1_idirect_both" 4
241*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
242*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "direct")
243*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
244*38fd1498Szrj					(eq_attr "memory" "both"))))
245*38fd1498Szrj			 "bdver1-direct,bdver1-load,
246*38fd1498Szrj			  bdver1-ieu,bdver1-store,
247*38fd1498Szrj			  bdver1-store")
248*38fd1498Szrj(define_insn_reservation "bdver1_ivector_both" 5
249*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
250*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "vector")
251*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
252*38fd1498Szrj					(eq_attr "memory" "both"))))
253*38fd1498Szrj			 "bdver1-vector,bdver1-load,
254*38fd1498Szrj			  bdver1-ieu,
255*38fd1498Szrj			  bdver1-ieu,
256*38fd1498Szrj			  bdver1-store")
257*38fd1498Szrj(define_insn_reservation "bdver1_idirect_store" 4
258*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
259*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "direct")
260*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
261*38fd1498Szrj					(eq_attr "memory" "store"))))
262*38fd1498Szrj			 "bdver1-direct,(bdver1-ieu+bdver1-agu),
263*38fd1498Szrj			  bdver1-store")
264*38fd1498Szrj(define_insn_reservation "bdver1_ivector_store" 5
265*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
266*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "vector")
267*38fd1498Szrj				   (and (eq_attr "unit" "integer,unknown")
268*38fd1498Szrj					(eq_attr "memory" "store"))))
269*38fd1498Szrj			 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
270*38fd1498Szrj			  bdver1-store")
271*38fd1498Szrj
272*38fd1498Szrj;; BDVER1 floating point units.
273*38fd1498Szrj(define_insn_reservation "bdver1_fldxf" 13
274*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
275*38fd1498Szrj			      (and (eq_attr "type" "fmov")
276*38fd1498Szrj				   (and (eq_attr "memory" "load")
277*38fd1498Szrj					(eq_attr "mode" "XF"))))
278*38fd1498Szrj			 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
279*38fd1498Szrj(define_insn_reservation "bdver1_fld" 5
280*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
281*38fd1498Szrj			      (and (eq_attr "type" "fmov")
282*38fd1498Szrj				   (eq_attr "memory" "load")))
283*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
284*38fd1498Szrj(define_insn_reservation "bdver1_fstxf" 8
285*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
286*38fd1498Szrj			      (and (eq_attr "type" "fmov")
287*38fd1498Szrj				   (and (eq_attr "memory" "store,both")
288*38fd1498Szrj					(eq_attr "mode" "XF"))))
289*38fd1498Szrj			 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
290*38fd1498Szrj(define_insn_reservation "bdver1_fst" 2
291*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
292*38fd1498Szrj			      (and (eq_attr "type" "fmov")
293*38fd1498Szrj				   (eq_attr "memory" "store,both")))
294*38fd1498Szrj			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
295*38fd1498Szrj(define_insn_reservation "bdver1_fist" 2
296*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
297*38fd1498Szrj			      (eq_attr "type" "fistp,fisttp"))
298*38fd1498Szrj			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
299*38fd1498Szrj(define_insn_reservation "bdver1_fmov_bdver1" 2
300*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
301*38fd1498Szrj			      (eq_attr "type" "fmov"))
302*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
303*38fd1498Szrj(define_insn_reservation "bdver1_fadd_load" 10
304*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
305*38fd1498Szrj			      (and (eq_attr "type" "fop")
306*38fd1498Szrj				   (eq_attr "memory" "load")))
307*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
308*38fd1498Szrj(define_insn_reservation "bdver1_fadd" 6
309*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
310*38fd1498Szrj			      (eq_attr "type" "fop"))
311*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
312*38fd1498Szrj(define_insn_reservation "bdver1_fmul_load" 10
313*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
314*38fd1498Szrj			      (and (eq_attr "type" "fmul")
315*38fd1498Szrj				   (eq_attr "memory" "load")))
316*38fd1498Szrj			 "bdver1-double,bdver1-fpload,bdver1-ffma")
317*38fd1498Szrj(define_insn_reservation "bdver1_fmul" 6
318*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
319*38fd1498Szrj			      (eq_attr "type" "fmul"))
320*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
321*38fd1498Szrj(define_insn_reservation "bdver1_fsgn" 2
322*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
323*38fd1498Szrj			      (eq_attr "type" "fsgn"))
324*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
325*38fd1498Szrj(define_insn_reservation "bdver1_fdiv_load" 46
326*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
327*38fd1498Szrj			      (and (eq_attr "type" "fdiv")
328*38fd1498Szrj				   (eq_attr "memory" "load")))
329*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
330*38fd1498Szrj(define_insn_reservation "bdver1_fdiv" 42
331*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
332*38fd1498Szrj			      (eq_attr "type" "fdiv"))
333*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
334*38fd1498Szrj(define_insn_reservation "bdver1_fpspc_load" 103
335*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
336*38fd1498Szrj			      (and (eq_attr "type" "fpspc")
337*38fd1498Szrj				   (eq_attr "memory" "load")))
338*38fd1498Szrj			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
339*38fd1498Szrj(define_insn_reservation "bdver1_fpspc" 100
340*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
341*38fd1498Szrj			      (and (eq_attr "type" "fpspc")
342*38fd1498Szrj				   (eq_attr "memory" "load")))
343*38fd1498Szrj			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
344*38fd1498Szrj(define_insn_reservation "bdver1_fcmov_load" 17
345*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
346*38fd1498Szrj			      (and (eq_attr "type" "fcmov")
347*38fd1498Szrj				   (eq_attr "memory" "load")))
348*38fd1498Szrj			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
349*38fd1498Szrj(define_insn_reservation "bdver1_fcmov" 15
350*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
351*38fd1498Szrj			      (eq_attr "type" "fcmov"))
352*38fd1498Szrj			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
353*38fd1498Szrj(define_insn_reservation "bdver1_fcomi_load" 6
354*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
355*38fd1498Szrj			      (and (eq_attr "type" "fcmp")
356*38fd1498Szrj				   (and (eq_attr "bdver1_decode" "double")
357*38fd1498Szrj					(eq_attr "memory" "load"))))
358*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
359*38fd1498Szrj(define_insn_reservation "bdver1_fcomi" 2
360*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
361*38fd1498Szrj			      (and (eq_attr "bdver1_decode" "double")
362*38fd1498Szrj				   (eq_attr "type" "fcmp")))
363*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
364*38fd1498Szrj(define_insn_reservation "bdver1_fcom_load" 6
365*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
366*38fd1498Szrj			      (and (eq_attr "type" "fcmp")
367*38fd1498Szrj				   (eq_attr "memory" "load")))
368*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
369*38fd1498Szrj(define_insn_reservation "bdver1_fcom" 2
370*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
371*38fd1498Szrj			      (eq_attr "type" "fcmp"))
372*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
373*38fd1498Szrj(define_insn_reservation "bdver1_fxch" 2
374*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
375*38fd1498Szrj			      (eq_attr "type" "fxch"))
376*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
377*38fd1498Szrj
378*38fd1498Szrj;; SSE loads.
379*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
380*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
381*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
382*38fd1498Szrj				   (and (eq_attr "prefix" "vex")
383*38fd1498Szrj					(and (eq_attr "movu" "1")
384*38fd1498Szrj					     (and (eq_attr "mode" "V4SF,V2DF")
385*38fd1498Szrj						  (eq_attr "memory" "load"))))))
386*38fd1498Szrj			 "bdver1-direct,bdver1-fpload")
387*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
388*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
389*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
390*38fd1498Szrj				   (and (eq_attr "movu" "1")
391*38fd1498Szrj				        (and (eq_attr "mode" "V8SF,V4DF")
392*38fd1498Szrj				             (eq_attr "memory" "load")))))
393*38fd1498Szrj			 "bdver1-double,bdver1-fpload")
394*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
395*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
396*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
397*38fd1498Szrj				   (and (eq_attr "movu" "1")
398*38fd1498Szrj				        (and (eq_attr "mode" "V4SF,V2DF")
399*38fd1498Szrj				             (eq_attr "memory" "load")))))
400*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
401*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx128_load" 4
402*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
403*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
404*38fd1498Szrj				   (and (eq_attr "prefix" "vex")
405*38fd1498Szrj				        (and (eq_attr "mode" "V4SF,V2DF,TI")
406*38fd1498Szrj				             (eq_attr "memory" "load")))))
407*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
408*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx256_load" 5
409*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
410*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
411*38fd1498Szrj				   (and (eq_attr "mode" "V8SF,V4DF,OI")
412*38fd1498Szrj				        (eq_attr "memory" "load"))))
413*38fd1498Szrj			 "bdver1-double,bdver1-fpload,bdver1-fmal")
414*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_sse128_load" 4
415*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
416*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
417*38fd1498Szrj				   (and (eq_attr "mode" "V4SF,V2DF,TI")
418*38fd1498Szrj				        (eq_attr "memory" "load"))))
419*38fd1498Szrj			 "bdver1-direct,bdver1-fpload")
420*38fd1498Szrj(define_insn_reservation "bdver1_ssescalar_movq_load" 4
421*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
422*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
423*38fd1498Szrj				   (and (eq_attr "mode" "DI")
424*38fd1498Szrj				        (eq_attr "memory" "load"))))
425*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
426*38fd1498Szrj(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
427*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
428*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
429*38fd1498Szrj				   (and (eq_attr "prefix" "vex")
430*38fd1498Szrj				        (and (eq_attr "mode" "SF")
431*38fd1498Szrj				             (eq_attr "memory" "load")))))
432*38fd1498Szrj			 "bdver1-direct,bdver1-fpload")
433*38fd1498Szrj(define_insn_reservation "bdver1_ssescalar_sse128_load" 4
434*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
435*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
436*38fd1498Szrj				   (and (eq_attr "mode" "SF,DF")
437*38fd1498Szrj				        (eq_attr "memory" "load"))))
438*38fd1498Szrj			 "bdver1-direct,bdver1-fpload, bdver1-ffma")
439*38fd1498Szrj(define_insn_reservation "bdver1_mmxsse_load" 4
440*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
441*38fd1498Szrj			      (and (eq_attr "type" "mmxmov,ssemov")
442*38fd1498Szrj				   (eq_attr "memory" "load")))
443*38fd1498Szrj			 "bdver1-direct,bdver1-fpload, bdver1-fmal")
444*38fd1498Szrj
445*38fd1498Szrj;; SSE stores.
446*38fd1498Szrj(define_insn_reservation "bdver1_sse_store_avx256" 5
447*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
448*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
449*38fd1498Szrj				   (and (eq_attr "mode" "V8SF,V4DF,OI")
450*38fd1498Szrj					(eq_attr "memory" "store,both"))))
451*38fd1498Szrj			 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
452*38fd1498Szrj(define_insn_reservation "bdver1_sse_store" 4
453*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
454*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
455*38fd1498Szrj				   (and (eq_attr "mode" "V4SF,V2DF,TI")
456*38fd1498Szrj					(eq_attr "memory" "store,both"))))
457*38fd1498Szrj			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
458*38fd1498Szrj(define_insn_reservation "bdver1_mmxsse_store_short" 4
459*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
460*38fd1498Szrj			      (and (eq_attr "type" "mmxmov,ssemov")
461*38fd1498Szrj				   (eq_attr "memory" "store,both")))
462*38fd1498Szrj			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
463*38fd1498Szrj
464*38fd1498Szrj;; Register moves.
465*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx256" 3
466*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
467*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
468*38fd1498Szrj				   (and (eq_attr "mode" "V8SF,V4DF,OI")
469*38fd1498Szrj					(eq_attr "memory" "none"))))
470*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
471*38fd1498Szrj(define_insn_reservation "bdver1_movss_movsd" 2
472*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
473*38fd1498Szrj			      (and (eq_attr "type" "ssemov")
474*38fd1498Szrj				   (and (eq_attr "mode" "SF,DF")
475*38fd1498Szrj                                        (eq_attr "memory" "none"))))
476*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
477*38fd1498Szrj(define_insn_reservation "bdver1_mmxssemov" 2
478*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
479*38fd1498Szrj			      (and (eq_attr "type" "mmxmov,ssemov")
480*38fd1498Szrj				   (eq_attr "memory" "none")))
481*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
482*38fd1498Szrj;; SSE logs.
483*38fd1498Szrj(define_insn_reservation "bdver1_sselog_load_256" 7
484*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
485*38fd1498Szrj			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
486*38fd1498Szrj				   (and (eq_attr "mode" "V8SF")
487*38fd1498Szrj				   (eq_attr "memory" "load"))))
488*38fd1498Szrj			 "bdver1-double,bdver1-fpload,bdver1-fmal")
489*38fd1498Szrj(define_insn_reservation "bdver1_sselog_256" 3
490*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
491*38fd1498Szrj			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
492*38fd1498Szrj                                   (eq_attr "mode" "V8SF")))
493*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
494*38fd1498Szrj(define_insn_reservation "bdver1_sselog_load" 6
495*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
496*38fd1498Szrj			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
497*38fd1498Szrj				   (eq_attr "memory" "load")))
498*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
499*38fd1498Szrj(define_insn_reservation "bdver1_sselog" 2
500*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
501*38fd1498Szrj			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
502*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
503*38fd1498Szrj
504*38fd1498Szrj;; PCMP actually executes in FMAL.
505*38fd1498Szrj(define_insn_reservation "bdver1_ssecmp_load" 6
506*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
507*38fd1498Szrj			      (and (eq_attr "type" "ssecmp")
508*38fd1498Szrj				   (eq_attr "memory" "load")))
509*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
510*38fd1498Szrj(define_insn_reservation "bdver1_ssecmp" 2
511*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
512*38fd1498Szrj			      (eq_attr "type" "ssecmp"))
513*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
514*38fd1498Szrj(define_insn_reservation "bdver1_ssecomi_load" 6
515*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
516*38fd1498Szrj			      (and (eq_attr "type" "ssecomi")
517*38fd1498Szrj				   (eq_attr "memory" "load")))
518*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
519*38fd1498Szrj(define_insn_reservation "bdver1_ssecomi" 2
520*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
521*38fd1498Szrj			      (eq_attr "type" "ssecomi"))
522*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
523*38fd1498Szrj
524*38fd1498Szrj;; Conversions behaves very irregularly and the scheduling is critical here.
525*38fd1498Szrj;; Take each instruction separately.
526*38fd1498Szrj
527*38fd1498Szrj;; 256 bit conversion.
528*38fd1498Szrj(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
529*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
530*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
531*38fd1498Szrj				   (and (eq_attr "memory" "load")
532*38fd1498Szrj					(ior (ior (match_operand:V4DF 0 "register_operand")
533*38fd1498Szrj					          (ior (match_operand:V8SF 0 "register_operand")
534*38fd1498Szrj						       (match_operand:V8SI 0 "register_operand")))
535*38fd1498Szrj					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
536*38fd1498Szrj						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
537*38fd1498Szrj						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
538*38fd1498Szrj			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
539*38fd1498Szrj(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
540*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
541*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
542*38fd1498Szrj				   (and (eq_attr "memory" "none")
543*38fd1498Szrj					(ior (ior (match_operand:V4DF 0 "register_operand")
544*38fd1498Szrj					          (ior (match_operand:V8SF 0 "register_operand")
545*38fd1498Szrj						       (match_operand:V8SI 0 "register_operand")))
546*38fd1498Szrj					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
547*38fd1498Szrj						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
548*38fd1498Szrj						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
549*38fd1498Szrj			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
550*38fd1498Szrj;; CVTSS2SD, CVTSD2SS.
551*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
552*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
553*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
554*38fd1498Szrj				   (and (eq_attr "mode" "SF,DF")
555*38fd1498Szrj					(eq_attr "memory" "load"))))
556*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
557*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
558*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
559*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
560*38fd1498Szrj				   (and (eq_attr "mode" "SF,DF")
561*38fd1498Szrj					(eq_attr "memory" "none"))))
562*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
563*38fd1498Szrj;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
564*38fd1498Szrj(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
565*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
566*38fd1498Szrj			      (and (eq_attr "type" "sseicvt")
567*38fd1498Szrj				   (and (eq_attr "mode" "SF,DF")
568*38fd1498Szrj					(eq_attr "memory" "load"))))
569*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
570*38fd1498Szrj(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
571*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
572*38fd1498Szrj			      (and (eq_attr "type" "sseicvt")
573*38fd1498Szrj				   (and (eq_attr "mode" "SF,DF")
574*38fd1498Szrj					(eq_attr "memory" "none"))))
575*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
576*38fd1498Szrj;; CVTPD2PS.
577*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
578*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
579*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
580*38fd1498Szrj				   (and (eq_attr "memory" "load")
581*38fd1498Szrj                                        (and (match_operand:V4SF 0 "register_operand")
582*38fd1498Szrj					     (match_operand:V2DF 1 "nonimmediate_operand")))))
583*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
584*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
585*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
586*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
587*38fd1498Szrj				   (and (eq_attr "memory" "none")
588*38fd1498Szrj                                        (and (match_operand:V4SF 0 "register_operand")
589*38fd1498Szrj					     (match_operand:V2DF 1 "nonimmediate_operand")))))
590*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
591*38fd1498Szrj;; CVTPI2PS, CVTDQ2PS.
592*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
593*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
594*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
595*38fd1498Szrj				   (and (eq_attr "memory" "load")
596*38fd1498Szrj                                        (and (match_operand:V4SF 0 "register_operand")
597*38fd1498Szrj					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
598*38fd1498Szrj					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
599*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
600*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
601*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
602*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
603*38fd1498Szrj				   (and (eq_attr "memory" "none")
604*38fd1498Szrj                                        (and (match_operand:V4SF 0 "register_operand")
605*38fd1498Szrj					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
606*38fd1498Szrj					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
607*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
608*38fd1498Szrj;; CVTDQ2PD.
609*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
610*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
611*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
612*38fd1498Szrj				   (and (eq_attr "memory" "load")
613*38fd1498Szrj                                        (and (match_operand:V2DF 0 "register_operand")
614*38fd1498Szrj					     (match_operand:V4SI 1 "nonimmediate_operand")))))
615*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
616*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
617*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
618*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
619*38fd1498Szrj				   (and (eq_attr "memory" "none")
620*38fd1498Szrj                                        (and (match_operand:V2DF 0 "register_operand")
621*38fd1498Szrj					     (match_operand:V4SI 1 "nonimmediate_operand")))))
622*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
623*38fd1498Szrj;; CVTPS2PD, CVTPI2PD.
624*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
625*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
626*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
627*38fd1498Szrj				   (and (eq_attr "memory" "load")
628*38fd1498Szrj                                        (and (match_operand:V2DF 0 "register_operand")
629*38fd1498Szrj					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
630*38fd1498Szrj					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
631*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
632*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
633*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
634*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
635*38fd1498Szrj				   (and (eq_attr "memory" "load")
636*38fd1498Szrj                                        (and (match_operand:V2DF 0 "register_operand")
637*38fd1498Szrj					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
638*38fd1498Szrj					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
639*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
640*38fd1498Szrj;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
641*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
642*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
643*38fd1498Szrj			      (and (eq_attr "type" "sseicvt")
644*38fd1498Szrj				   (and (eq_attr "mode" "SI,DI")
645*38fd1498Szrj					(eq_attr "memory" "load"))))
646*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
647*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
648*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
649*38fd1498Szrj			      (and (eq_attr "type" "sseicvt")
650*38fd1498Szrj				   (and (eq_attr "mode" "SI,DI")
651*38fd1498Szrj					(eq_attr "memory" "none"))))
652*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
653*38fd1498Szrj;; CVTPD2PI, CVTTPD2PI.
654*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
655*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
656*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
657*38fd1498Szrj				   (and (eq_attr "memory" "load")
658*38fd1498Szrj				        (and (match_operand:V2DF 1 "nonimmediate_operand")
659*38fd1498Szrj					     (match_operand:V2SI 0 "register_operand")))))
660*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
661*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
662*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
663*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
664*38fd1498Szrj				   (and (eq_attr "memory" "none")
665*38fd1498Szrj				        (and (match_operand:V2DF 1 "nonimmediate_operand")
666*38fd1498Szrj					     (match_operand:V2SI 0 "register_operand")))))
667*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
668*38fd1498Szrj;; CVTPD2DQ, CVTTPD2DQ.
669*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
670*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
671*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
672*38fd1498Szrj				   (and (eq_attr "memory" "load")
673*38fd1498Szrj				        (and (match_operand:V2DF 1 "nonimmediate_operand")
674*38fd1498Szrj					     (match_operand:V4SI 0 "register_operand")))))
675*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
676*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
677*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
678*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
679*38fd1498Szrj				   (and (eq_attr "memory" "none")
680*38fd1498Szrj				        (and (match_operand:V2DF 1 "nonimmediate_operand")
681*38fd1498Szrj					     (match_operand:V4SI 0 "register_operand")))))
682*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
683*38fd1498Szrj;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
684*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
685*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
686*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
687*38fd1498Szrj                                   (and (eq_attr "memory" "load")
688*38fd1498Szrj				        (and (match_operand:V4SF 1 "nonimmediate_operand")
689*38fd1498Szrj				             (ior (match_operand: V2SI 0 "register_operand")
690*38fd1498Szrj						  (match_operand: V4SI 0 "register_operand"))))))
691*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
692*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
693*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
694*38fd1498Szrj			      (and (eq_attr "type" "ssecvt")
695*38fd1498Szrj				   (and (eq_attr "memory" "none")
696*38fd1498Szrj				        (and (match_operand:V4SF 1 "nonimmediate_operand")
697*38fd1498Szrj				             (ior (match_operand: V2SI 0 "register_operand")
698*38fd1498Szrj						  (match_operand: V4SI 0 "register_operand"))))))
699*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
700*38fd1498Szrj
701*38fd1498Szrj;; SSE MUL, ADD, and MULADD.
702*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd_load_256" 11
703*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
704*38fd1498Szrj			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
705*38fd1498Szrj				   (and (eq_attr "mode" "V8SF,V4DF")
706*38fd1498Szrj					(eq_attr "memory" "load"))))
707*38fd1498Szrj			 "bdver1-double,bdver1-fpload,bdver1-ffma")
708*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd_256" 7
709*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
710*38fd1498Szrj			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
711*38fd1498Szrj				   (and (eq_attr "mode" "V8SF,V4DF")
712*38fd1498Szrj					(eq_attr "memory" "none"))))
713*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,bdver1-ffma")
714*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd_load" 10
715*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
716*38fd1498Szrj			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
717*38fd1498Szrj				   (eq_attr "memory" "load")))
718*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
719*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd" 6
720*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
721*38fd1498Szrj			      (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
722*38fd1498Szrj				   (eq_attr "memory" "none")))
723*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
724*38fd1498Szrj(define_insn_reservation "bdver1_sseimul_load" 8
725*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
726*38fd1498Szrj			      (and (eq_attr "type" "sseimul")
727*38fd1498Szrj				   (eq_attr "memory" "load")))
728*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fmma")
729*38fd1498Szrj(define_insn_reservation "bdver1_sseimul" 4
730*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
731*38fd1498Szrj			      (and (eq_attr "type" "sseimul")
732*38fd1498Szrj				   (eq_attr "memory" "none")))
733*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
734*38fd1498Szrj(define_insn_reservation "bdver1_sseiadd_load" 6
735*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
736*38fd1498Szrj			      (and (eq_attr "type" "sseiadd")
737*38fd1498Szrj				   (eq_attr "memory" "load")))
738*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
739*38fd1498Szrj(define_insn_reservation "bdver1_sseiadd" 2
740*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
741*38fd1498Szrj			      (and (eq_attr "type" "sseiadd")
742*38fd1498Szrj				   (eq_attr "memory" "none")))
743*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
744*38fd1498Szrj
745*38fd1498Szrj;; SSE DIV: no throughput information (assume same as amdfam10).
746*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double_load_256" 31
747*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
748*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
749*38fd1498Szrj				   (and (eq_attr "mode" "V4DF")
750*38fd1498Szrj				        (eq_attr "memory" "load"))))
751*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
752*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double_256" 27
753*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
754*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
755*38fd1498Szrj				   (and (eq_attr "mode" "V4DF")
756*38fd1498Szrj				        (eq_attr "memory" "none"))))
757*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
758*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single_load_256" 28
759*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
760*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
761*38fd1498Szrj				   (and (eq_attr "mode" "V8SF")
762*38fd1498Szrj				        (eq_attr "memory" "load"))))
763*38fd1498Szrj			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
764*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single_256" 24
765*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
766*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
767*38fd1498Szrj				   (and (eq_attr "mode" "V8SF")
768*38fd1498Szrj				        (eq_attr "memory" "none"))))
769*38fd1498Szrj			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
770*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double_load" 31
771*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
772*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
773*38fd1498Szrj				   (and (eq_attr "mode" "DF,V2DF")
774*38fd1498Szrj					(eq_attr "memory" "load"))))
775*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
776*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double" 27
777*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
778*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
779*38fd1498Szrj				   (and (eq_attr "mode" "DF,V2DF")
780*38fd1498Szrj					(eq_attr "memory" "none"))))
781*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
782*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single_load" 28
783*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
784*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
785*38fd1498Szrj				   (and (eq_attr "mode" "SF,V4SF")
786*38fd1498Szrj					(eq_attr "memory" "load"))))
787*38fd1498Szrj			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
788*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single" 24
789*38fd1498Szrj			 (and (eq_attr "cpu" "bdver1,bdver2")
790*38fd1498Szrj			      (and (eq_attr "type" "ssediv")
791*38fd1498Szrj				   (and (eq_attr "mode" "SF,V4SF")
792*38fd1498Szrj					(eq_attr "memory" "none"))))
793*38fd1498Szrj			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
794*38fd1498Szrj
795*38fd1498Szrj(define_insn_reservation "bdver1_sseins" 3
796*38fd1498Szrj                         (and (eq_attr "cpu" "bdver1,bdver2")
797*38fd1498Szrj                              (and (eq_attr "type" "sseins")
798*38fd1498Szrj                                   (eq_attr "mode" "TI")))
799*38fd1498Szrj                         "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
800*38fd1498Szrj
801