1;; Copyright (C) 2002, 2003, 2004, 2005, 2006,
2;; 2007 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19;;
20;; AMD Athlon Scheduling
21;;
22;; The Athlon does contain three pipelined FP units, three integer units and
23;; three address generation units.
24;;
25;; The predecode logic is determining boundaries of instructions in the 64
26;; byte cache line. So the cache line straddling problem of K6 might be issue
27;; here as well, but it is not noted in the documentation.
28;;
29;; Three DirectPath instructions decoders and only one VectorPath decoder
30;; is available. They can decode three DirectPath instructions or one VectorPath
31;; instruction per cycle.
32;; Decoded macro instructions are then passed to 72 entry instruction control
33;; unit, that passes
34;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
35;;
36;; The load/store queue unit is not attached to the schedulers but
37;; communicates with all the execution units separately instead.
38
39(define_attr "athlon_decode" "direct,vector,double"
40  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
41	   (const_string "vector")
42         (and (eq_attr "type" "push")
43              (match_operand 1 "memory_operand" ""))
44	   (const_string "vector")
45         (and (eq_attr "type" "fmov")
46	      (and (eq_attr "memory" "load,store")
47		   (eq_attr "mode" "XF")))
48	   (const_string "vector")]
49	(const_string "direct")))
50
51(define_attr "amdfam10_decode" "direct,vector,double"
52  (const_string "direct"))
53;;
54;;           decode0 decode1 decode2
55;;                 \    |   /
56;;    instruction control unit (72 entry scheduler)
57;;                |                        |
58;;      integer scheduler (18)         stack map
59;;     /  |    |    |    |   \        stack rename
60;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
61;;    |  agu0  |   agu1      agu2    register file
62;;    |      \ |    |       /         |     |     |
63;;     \      /\    |     /         fadd  fmul  fstore
64;;       \  /    \  |   /           fadd  fmul  fstore
65;;       imul  load/store (2x)      fadd  fmul  fstore
66
67(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
68(define_cpu_unit "athlon-decode0" "athlon")
69(define_cpu_unit "athlon-decode1" "athlon")
70(define_cpu_unit "athlon-decode2" "athlon")
71(define_cpu_unit "athlon-decodev" "athlon")
72;; Model the fact that double decoded instruction may take 2 cycles
73;; to decode when decoder2 and decoder0 in next cycle
74;; is used (this is needed to allow troughput of 1.5 double decoded
75;; instructions per cycle).
76;;
77;; In order to avoid dependence between reservation of decoder
78;; and other units, we model decoder as two stage fully pipelined unit
79;; and only double decoded instruction may occupy unit in the first cycle.
80;; With this scheme however two double instructions can be issued cycle0.
81;;
82;; Avoid this by using presence set requiring decoder0 to be allocated
83;; too. Vector decoded instructions then can't be issued when
84;; modeled as consuming decoder0+decoder1+decoder2.
85;; We solve that by specialized vector decoder unit and exclusion set.
86(presence_set "athlon-decode2" "athlon-decode0")
87(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
88(define_reservation "athlon-vector" "nothing,athlon-decodev")
89(define_reservation "athlon-direct0" "nothing,athlon-decode0")
90(define_reservation "athlon-direct" "nothing,
91				     (athlon-decode0 | athlon-decode1
92				     | athlon-decode2)")
93;; Double instructions behaves like two direct instructions.
94(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
95				     | (nothing,(athlon-decode0 + athlon-decode1))
96				     | (nothing,(athlon-decode1 + athlon-decode2)))")
97
98;; Agu and ieu unit results in extremely large automatons and
99;; in our approximation they are hardly filled in.  Only ieu
100;; unit can, as issue rate is 3 and agu unit is always used
101;; first in the insn reservations.  Skip the models.
102
103;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
104;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
105;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
106;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
107(define_reservation "athlon-ieu" "nothing")
108(define_cpu_unit "athlon-ieu0" "athlon")
109;(define_cpu_unit "athlon-agu0" "athlon_agu")
110;(define_cpu_unit "athlon-agu1" "athlon_agu")
111;(define_cpu_unit "athlon-agu2" "athlon_agu")
112;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
113(define_reservation "athlon-agu" "nothing")
114
115(define_cpu_unit "athlon-mult" "athlon_mult")
116
117(define_cpu_unit "athlon-load0" "athlon_load")
118(define_cpu_unit "athlon-load1" "athlon_load")
119(define_reservation "athlon-load" "athlon-agu,
120				   (athlon-load0 | athlon-load1),nothing")
121;; 128bit SSE instructions issue two loads at once
122(define_reservation "athlon-load2" "athlon-agu,
123				   (athlon-load0 + athlon-load1),nothing")
124
125(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
126;; 128bit SSE instructions issue two stores at once
127(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
128
129
130;; The FP operations start to execute at stage 12 in the pipeline, while
131;; integer operations start to execute at stage 9 for Athlon and 11 for K8
132;; Compensate the difference for Athlon because it results in significantly
133;; smaller automata.
134(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
135;; The floating point loads.
136(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
137(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
138(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
139(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
140
141
142;; The three fp units are fully pipelined with latency of 3
143(define_cpu_unit "athlon-fadd" "athlon_fp")
144(define_cpu_unit "athlon-fmul" "athlon_fp")
145(define_cpu_unit "athlon-fstore" "athlon_fp")
146(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
147(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
148
149;; Vector operations usually consume many of pipes.
150(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
151
152
153;; Jump instructions are executed in the branch unit completely transparent to us
154(define_insn_reservation "athlon_branch" 0
155			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
156			      (eq_attr "type" "ibr"))
157			 "athlon-direct,athlon-ieu")
158(define_insn_reservation "athlon_call" 0
159			 (and (eq_attr "cpu" "athlon,k8,generic64")
160			      (eq_attr "type" "call,callv"))
161			 "athlon-vector,athlon-ieu")
162(define_insn_reservation "athlon_call_amdfam10" 0
163			 (and (eq_attr "cpu" "amdfam10")
164			      (eq_attr "type" "call,callv"))
165			 "athlon-double,athlon-ieu")
166
167;; Latency of push operation is 3 cycles, but ESP value is available
168;; earlier
169(define_insn_reservation "athlon_push" 2
170			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
171			      (eq_attr "type" "push"))
172			 "athlon-direct,athlon-agu,athlon-store")
173(define_insn_reservation "athlon_pop" 4
174			 (and (eq_attr "cpu" "athlon,k8,generic64")
175			      (eq_attr "type" "pop"))
176			 "athlon-vector,athlon-load,athlon-ieu")
177(define_insn_reservation "athlon_pop_k8" 3
178			 (and (eq_attr "cpu" "k8,generic64")
179			      (eq_attr "type" "pop"))
180			 "athlon-double,(athlon-ieu+athlon-load)")
181(define_insn_reservation "athlon_pop_amdfam10" 3
182			 (and (eq_attr "cpu" "amdfam10")
183			      (eq_attr "type" "pop"))
184			 "athlon-direct,(athlon-ieu+athlon-load)")
185(define_insn_reservation "athlon_leave" 3
186			 (and (eq_attr "cpu" "athlon")
187			      (eq_attr "type" "leave"))
188			 "athlon-vector,(athlon-ieu+athlon-load)")
189(define_insn_reservation "athlon_leave_k8" 3
190			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
191			      (eq_attr "type" "leave"))
192			 "athlon-double,(athlon-ieu+athlon-load)")
193
194;; Lea executes in AGU unit with 2 cycles latency.
195(define_insn_reservation "athlon_lea" 2
196			 (and (eq_attr "cpu" "athlon,k8,generic64")
197			      (eq_attr "type" "lea"))
198			 "athlon-direct,athlon-agu,nothing")
199;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
200(define_insn_reservation "athlon_lea_amdfam10" 1
201			 (and (eq_attr "cpu" "amdfam10")
202			      (eq_attr "type" "lea"))
203			 "athlon-direct,athlon-agu,nothing")
204
205;; Mul executes in special multiplier unit attached to IEU0
206(define_insn_reservation "athlon_imul" 5
207			 (and (eq_attr "cpu" "athlon")
208			      (and (eq_attr "type" "imul")
209				   (eq_attr "memory" "none,unknown")))
210			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
211;; ??? Widening multiply is vector or double.
212(define_insn_reservation "athlon_imul_k8_DI" 4
213			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
214			      (and (eq_attr "type" "imul")
215				   (and (eq_attr "mode" "DI")
216					(eq_attr "memory" "none,unknown"))))
217			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
218(define_insn_reservation "athlon_imul_k8" 3
219			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
220			      (and (eq_attr "type" "imul")
221				   (eq_attr "memory" "none,unknown")))
222			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
223(define_insn_reservation "athlon_imul_amdfam10_HI" 4
224			 (and (eq_attr "cpu" "amdfam10")
225			      (and (eq_attr "type" "imul")
226				   (and (eq_attr "mode" "HI")
227					(eq_attr "memory" "none,unknown"))))
228			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
229(define_insn_reservation "athlon_imul_mem" 8
230			 (and (eq_attr "cpu" "athlon")
231			      (and (eq_attr "type" "imul")
232				   (eq_attr "memory" "load,both")))
233			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
234(define_insn_reservation "athlon_imul_mem_k8_DI" 7
235			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
236			      (and (eq_attr "type" "imul")
237				   (and (eq_attr "mode" "DI")
238					(eq_attr "memory" "load,both"))))
239			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
240(define_insn_reservation "athlon_imul_mem_k8" 6
241			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
242			      (and (eq_attr "type" "imul")
243				   (eq_attr "memory" "load,both")))
244			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
245
246;; Idiv cannot execute in parallel with other instructions.  Dealing with it
247;; as with short latency vector instruction is good approximation avoiding
248;; scheduler from trying too hard to can hide it's latency by overlap with
249;; other instructions.
250;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
251;; of the other code
252;; Using the same heuristics for amdfam10 as K8 with idiv
253
254(define_insn_reservation "athlon_idiv" 6
255			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
256			      (and (eq_attr "type" "idiv")
257				   (eq_attr "memory" "none,unknown")))
258			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
259(define_insn_reservation "athlon_idiv_mem" 9
260			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
261			      (and (eq_attr "type" "idiv")
262				   (eq_attr "memory" "load,both")))
263			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
264;; The parallelism of string instructions is not documented.  Model it same way
265;; as idiv to create smaller automata.  This probably does not matter much.
266;; Using the same heuristics for amdfam10 as K8 with idiv
267(define_insn_reservation "athlon_str" 6
268			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
269			      (and (eq_attr "type" "str")
270				   (eq_attr "memory" "load,both,store")))
271			 "athlon-vector,athlon-load,athlon-ieu0*6")
272
273(define_insn_reservation "athlon_idirect" 1
274			 (and (eq_attr "cpu" "athlon,k8,generic64")
275			      (and (eq_attr "athlon_decode" "direct")
276				   (and (eq_attr "unit" "integer,unknown")
277					(eq_attr "memory" "none,unknown"))))
278			 "athlon-direct,athlon-ieu")
279(define_insn_reservation "athlon_idirect_amdfam10" 1
280			 (and (eq_attr "cpu" "amdfam10")
281			      (and (eq_attr "amdfam10_decode" "direct")
282				   (and (eq_attr "unit" "integer,unknown")
283					(eq_attr "memory" "none,unknown"))))
284			 "athlon-direct,athlon-ieu")
285(define_insn_reservation "athlon_ivector" 2
286			 (and (eq_attr "cpu" "athlon,k8,generic64")
287			      (and (eq_attr "athlon_decode" "vector")
288				   (and (eq_attr "unit" "integer,unknown")
289					(eq_attr "memory" "none,unknown"))))
290			 "athlon-vector,athlon-ieu,athlon-ieu")
291(define_insn_reservation "athlon_ivector_amdfam10" 2
292			 (and (eq_attr "cpu" "amdfam10")
293			      (and (eq_attr "amdfam10_decode" "vector")
294				   (and (eq_attr "unit" "integer,unknown")
295					(eq_attr "memory" "none,unknown"))))
296			 "athlon-vector,athlon-ieu,athlon-ieu")
297
298(define_insn_reservation "athlon_idirect_loadmov" 3
299			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
300			      (and (eq_attr "type" "imov")
301				   (eq_attr "memory" "load")))
302			 "athlon-direct,athlon-load")
303
304(define_insn_reservation "athlon_idirect_load" 4
305			 (and (eq_attr "cpu" "athlon,k8,generic64")
306			      (and (eq_attr "athlon_decode" "direct")
307				   (and (eq_attr "unit" "integer,unknown")
308					(eq_attr "memory" "load"))))
309			 "athlon-direct,athlon-load,athlon-ieu")
310(define_insn_reservation "athlon_idirect_load_amdfam10" 4
311			 (and (eq_attr "cpu" "amdfam10")
312			      (and (eq_attr "amdfam10_decode" "direct")
313				   (and (eq_attr "unit" "integer,unknown")
314					(eq_attr "memory" "load"))))
315			 "athlon-direct,athlon-load,athlon-ieu")
316(define_insn_reservation "athlon_ivector_load" 6
317			 (and (eq_attr "cpu" "athlon,k8,generic64")
318			      (and (eq_attr "athlon_decode" "vector")
319				   (and (eq_attr "unit" "integer,unknown")
320					(eq_attr "memory" "load"))))
321			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
322(define_insn_reservation "athlon_ivector_load_amdfam10" 6
323			 (and (eq_attr "cpu" "amdfam10")
324			      (and (eq_attr "amdfam10_decode" "vector")
325				   (and (eq_attr "unit" "integer,unknown")
326					(eq_attr "memory" "load"))))
327			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
328
329(define_insn_reservation "athlon_idirect_movstore" 1
330			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
331			      (and (eq_attr "type" "imov")
332				   (eq_attr "memory" "store")))
333			 "athlon-direct,athlon-agu,athlon-store")
334
335(define_insn_reservation "athlon_idirect_both" 4
336			 (and (eq_attr "cpu" "athlon,k8,generic64")
337			      (and (eq_attr "athlon_decode" "direct")
338				   (and (eq_attr "unit" "integer,unknown")
339					(eq_attr "memory" "both"))))
340			 "athlon-direct,athlon-load,
341			  athlon-ieu,athlon-store,
342			  athlon-store")
343(define_insn_reservation "athlon_idirect_both_amdfam10" 4
344			 (and (eq_attr "cpu" "amdfam10")
345			      (and (eq_attr "amdfam10_decode" "direct")
346				   (and (eq_attr "unit" "integer,unknown")
347					(eq_attr "memory" "both"))))
348			 "athlon-direct,athlon-load,
349			  athlon-ieu,athlon-store,
350			  athlon-store")
351
352(define_insn_reservation "athlon_ivector_both" 6
353			 (and (eq_attr "cpu" "athlon,k8,generic64")
354			      (and (eq_attr "athlon_decode" "vector")
355				   (and (eq_attr "unit" "integer,unknown")
356					(eq_attr "memory" "both"))))
357			 "athlon-vector,athlon-load,
358			  athlon-ieu,
359			  athlon-ieu,
360			  athlon-store")
361(define_insn_reservation "athlon_ivector_both_amdfam10" 6
362			 (and (eq_attr "cpu" "amdfam10")
363			      (and (eq_attr "amdfam10_decode" "vector")
364				   (and (eq_attr "unit" "integer,unknown")
365					(eq_attr "memory" "both"))))
366			 "athlon-vector,athlon-load,
367			  athlon-ieu,
368			  athlon-ieu,
369			  athlon-store")
370
371(define_insn_reservation "athlon_idirect_store" 1
372			 (and (eq_attr "cpu" "athlon,k8,generic64")
373			      (and (eq_attr "athlon_decode" "direct")
374				   (and (eq_attr "unit" "integer,unknown")
375					(eq_attr "memory" "store"))))
376			 "athlon-direct,(athlon-ieu+athlon-agu),
377			  athlon-store")
378(define_insn_reservation "athlon_idirect_store_amdfam10" 1
379			 (and (eq_attr "cpu" "amdfam10")
380			      (and (eq_attr "amdfam10_decode" "direct")
381				   (and (eq_attr "unit" "integer,unknown")
382					(eq_attr "memory" "store"))))
383			 "athlon-direct,(athlon-ieu+athlon-agu),
384			  athlon-store")
385
386(define_insn_reservation "athlon_ivector_store" 2
387			 (and (eq_attr "cpu" "athlon,k8,generic64")
388			      (and (eq_attr "athlon_decode" "vector")
389				   (and (eq_attr "unit" "integer,unknown")
390					(eq_attr "memory" "store"))))
391			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
392			  athlon-store")
393(define_insn_reservation "athlon_ivector_store_amdfam10" 2
394			 (and (eq_attr "cpu" "amdfam10")
395			      (and (eq_attr "amdfam10_decode" "vector")
396				   (and (eq_attr "unit" "integer,unknown")
397					(eq_attr "memory" "store"))))
398			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
399			  athlon-store")
400
401;; Athlon floatin point unit
402(define_insn_reservation "athlon_fldxf" 12
403			 (and (eq_attr "cpu" "athlon")
404			      (and (eq_attr "type" "fmov")
405				   (and (eq_attr "memory" "load")
406					(eq_attr "mode" "XF"))))
407			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
408(define_insn_reservation "athlon_fldxf_k8" 13
409			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
410			      (and (eq_attr "type" "fmov")
411				   (and (eq_attr "memory" "load")
412					(eq_attr "mode" "XF"))))
413			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
414;; Assume superforwarding to take place so effective latency of fany op is 0.
415(define_insn_reservation "athlon_fld" 0
416			 (and (eq_attr "cpu" "athlon")
417			      (and (eq_attr "type" "fmov")
418				   (eq_attr "memory" "load")))
419			 "athlon-direct,athlon-fpload,athlon-fany")
420(define_insn_reservation "athlon_fld_k8" 2
421			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
422			      (and (eq_attr "type" "fmov")
423				   (eq_attr "memory" "load")))
424			 "athlon-direct,athlon-fploadk8,athlon-fstore")
425
426(define_insn_reservation "athlon_fstxf" 10
427			 (and (eq_attr "cpu" "athlon")
428			      (and (eq_attr "type" "fmov")
429				   (and (eq_attr "memory" "store,both")
430					(eq_attr "mode" "XF"))))
431			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
432(define_insn_reservation "athlon_fstxf_k8" 8
433			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
434			      (and (eq_attr "type" "fmov")
435				   (and (eq_attr "memory" "store,both")
436					(eq_attr "mode" "XF"))))
437			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
438(define_insn_reservation "athlon_fst" 4
439			 (and (eq_attr "cpu" "athlon")
440			      (and (eq_attr "type" "fmov")
441				   (eq_attr "memory" "store,both")))
442			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
443(define_insn_reservation "athlon_fst_k8" 2
444			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
445			      (and (eq_attr "type" "fmov")
446				   (eq_attr "memory" "store,both")))
447			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
448(define_insn_reservation "athlon_fist" 4
449			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
450			      (eq_attr "type" "fistp,fisttp"))
451			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
452(define_insn_reservation "athlon_fmov" 2
453			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
454			      (eq_attr "type" "fmov"))
455			 "athlon-direct,athlon-fpsched,athlon-faddmul")
456(define_insn_reservation "athlon_fadd_load" 4
457			 (and (eq_attr "cpu" "athlon")
458			      (and (eq_attr "type" "fop")
459				   (eq_attr "memory" "load")))
460			 "athlon-direct,athlon-fpload,athlon-fadd")
461(define_insn_reservation "athlon_fadd_load_k8" 6
462			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
463			      (and (eq_attr "type" "fop")
464				   (eq_attr "memory" "load")))
465			 "athlon-direct,athlon-fploadk8,athlon-fadd")
466(define_insn_reservation "athlon_fadd" 4
467			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
468			      (eq_attr "type" "fop"))
469			 "athlon-direct,athlon-fpsched,athlon-fadd")
470(define_insn_reservation "athlon_fmul_load" 4
471			 (and (eq_attr "cpu" "athlon")
472			      (and (eq_attr "type" "fmul")
473				   (eq_attr "memory" "load")))
474			 "athlon-direct,athlon-fpload,athlon-fmul")
475(define_insn_reservation "athlon_fmul_load_k8" 6
476			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
477			      (and (eq_attr "type" "fmul")
478				   (eq_attr "memory" "load")))
479			 "athlon-direct,athlon-fploadk8,athlon-fmul")
480(define_insn_reservation "athlon_fmul" 4
481			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
482			      (eq_attr "type" "fmul"))
483			 "athlon-direct,athlon-fpsched,athlon-fmul")
484(define_insn_reservation "athlon_fsgn" 2
485			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
486			      (eq_attr "type" "fsgn"))
487			 "athlon-direct,athlon-fpsched,athlon-fmul")
488(define_insn_reservation "athlon_fdiv_load" 24
489			 (and (eq_attr "cpu" "athlon")
490			      (and (eq_attr "type" "fdiv")
491				   (eq_attr "memory" "load")))
492			 "athlon-direct,athlon-fpload,athlon-fmul")
493(define_insn_reservation "athlon_fdiv_load_k8" 13
494			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
495			      (and (eq_attr "type" "fdiv")
496				   (eq_attr "memory" "load")))
497			 "athlon-direct,athlon-fploadk8,athlon-fmul")
498(define_insn_reservation "athlon_fdiv" 24
499			 (and (eq_attr "cpu" "athlon")
500			      (eq_attr "type" "fdiv"))
501			 "athlon-direct,athlon-fpsched,athlon-fmul")
502(define_insn_reservation "athlon_fdiv_k8" 11
503			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
504			      (eq_attr "type" "fdiv"))
505			 "athlon-direct,athlon-fpsched,athlon-fmul")
506(define_insn_reservation "athlon_fpspc_load" 103
507			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
508			      (and (eq_attr "type" "fpspc")
509				   (eq_attr "memory" "load")))
510			 "athlon-vector,athlon-fpload,athlon-fvector")
511(define_insn_reservation "athlon_fpspc" 100
512			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
513			      (eq_attr "type" "fpspc"))
514			 "athlon-vector,athlon-fpsched,athlon-fvector")
515(define_insn_reservation "athlon_fcmov_load" 7
516			 (and (eq_attr "cpu" "athlon")
517			      (and (eq_attr "type" "fcmov")
518				   (eq_attr "memory" "load")))
519			 "athlon-vector,athlon-fpload,athlon-fvector")
520(define_insn_reservation "athlon_fcmov" 7
521			 (and (eq_attr "cpu" "athlon")
522			      (eq_attr "type" "fcmov"))
523			 "athlon-vector,athlon-fpsched,athlon-fvector")
524(define_insn_reservation "athlon_fcmov_load_k8" 17
525			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
526			      (and (eq_attr "type" "fcmov")
527				   (eq_attr "memory" "load")))
528			 "athlon-vector,athlon-fploadk8,athlon-fvector")
529(define_insn_reservation "athlon_fcmov_k8" 15
530			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
531			      (eq_attr "type" "fcmov"))
532			 "athlon-vector,athlon-fpsched,athlon-fvector")
533;; fcomi is vector decoded by uses only one pipe.
534(define_insn_reservation "athlon_fcomi_load" 3
535			 (and (eq_attr "cpu" "athlon")
536			      (and (eq_attr "type" "fcmp")
537				   (and (eq_attr "athlon_decode" "vector")
538				        (eq_attr "memory" "load"))))
539			 "athlon-vector,athlon-fpload,athlon-fadd")
540(define_insn_reservation "athlon_fcomi_load_k8" 5
541			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
542			      (and (eq_attr "type" "fcmp")
543				   (and (eq_attr "athlon_decode" "vector")
544				        (eq_attr "memory" "load"))))
545			 "athlon-vector,athlon-fploadk8,athlon-fadd")
546(define_insn_reservation "athlon_fcomi" 3
547			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
548			      (and (eq_attr "athlon_decode" "vector")
549				   (eq_attr "type" "fcmp")))
550			 "athlon-vector,athlon-fpsched,athlon-fadd")
551(define_insn_reservation "athlon_fcom_load" 2
552			 (and (eq_attr "cpu" "athlon")
553			      (and (eq_attr "type" "fcmp")
554				   (eq_attr "memory" "load")))
555			 "athlon-direct,athlon-fpload,athlon-fadd")
556(define_insn_reservation "athlon_fcom_load_k8" 4
557			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
558			      (and (eq_attr "type" "fcmp")
559				   (eq_attr "memory" "load")))
560			 "athlon-direct,athlon-fploadk8,athlon-fadd")
561(define_insn_reservation "athlon_fcom" 2
562			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
563			      (eq_attr "type" "fcmp"))
564			 "athlon-direct,athlon-fpsched,athlon-fadd")
565;; Never seen by the scheduler because we still don't do post reg-stack
566;; scheduling.
567;(define_insn_reservation "athlon_fxch" 2
568;			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
569;			      (eq_attr "type" "fxch"))
570;			 "athlon-direct,athlon-fpsched,athlon-fany")
571
572;; Athlon handle MMX operations in the FPU unit with shorter latencies
573
574(define_insn_reservation "athlon_movlpd_load" 0
575			 (and (eq_attr "cpu" "athlon")
576			      (and (eq_attr "type" "ssemov")
577				   (match_operand:DF 1 "memory_operand" "")))
578			 "athlon-direct,athlon-fpload,athlon-fany")
579(define_insn_reservation "athlon_movlpd_load_k8" 2
580			 (and (eq_attr "cpu" "k8")
581			      (and (eq_attr "type" "ssemov")
582				   (match_operand:DF 1 "memory_operand" "")))
583			 "athlon-direct,athlon-fploadk8,athlon-fstore")
584(define_insn_reservation "athlon_movsd_load_generic64" 2
585			 (and (eq_attr "cpu" "generic64")
586			      (and (eq_attr "type" "ssemov")
587				   (match_operand:DF 1 "memory_operand" "")))
588			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
589(define_insn_reservation "athlon_movaps_load_k8" 2
590			 (and (eq_attr "cpu" "k8,generic64")
591			      (and (eq_attr "type" "ssemov")
592				   (and (eq_attr "mode" "V4SF,V2DF,TI")
593					(eq_attr "memory" "load"))))
594			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
595(define_insn_reservation "athlon_movaps_load" 0
596			 (and (eq_attr "cpu" "athlon")
597			      (and (eq_attr "type" "ssemov")
598				   (and (eq_attr "mode" "V4SF,V2DF,TI")
599					(eq_attr "memory" "load"))))
600			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
601(define_insn_reservation "athlon_movss_load" 1
602			 (and (eq_attr "cpu" "athlon")
603			      (and (eq_attr "type" "ssemov")
604				   (and (eq_attr "mode" "SF,DI")
605					(eq_attr "memory" "load"))))
606			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
607(define_insn_reservation "athlon_movss_load_k8" 1
608			 (and (eq_attr "cpu" "k8,generic64")
609			      (and (eq_attr "type" "ssemov")
610				   (and (eq_attr "mode" "SF,DI")
611					(eq_attr "memory" "load"))))
612			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
613(define_insn_reservation "athlon_mmxsseld" 0
614			 (and (eq_attr "cpu" "athlon")
615			      (and (eq_attr "type" "mmxmov,ssemov")
616				   (eq_attr "memory" "load")))
617			 "athlon-direct,athlon-fpload,athlon-fany")
618(define_insn_reservation "athlon_mmxsseld_k8" 2
619			 (and (eq_attr "cpu" "k8,generic64")
620			      (and (eq_attr "type" "mmxmov,ssemov")
621				   (eq_attr "memory" "load")))
622			 "athlon-direct,athlon-fploadk8,athlon-fstore")
623;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
624;; loads  generated are direct path, latency of 2 and do not use any FP
625;; executions units. No separate entries for movlpx/movhpx loads, which
626;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
627;; as they will not be generated.
628(define_insn_reservation "athlon_sseld_amdfam10" 2
629			 (and (eq_attr "cpu" "amdfam10")
630			      (and (eq_attr "type" "ssemov")
631				   (eq_attr "memory" "load")))
632			 "athlon-direct,athlon-fploadk8")
633;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
634;; and can use any  FP executions units
635(define_insn_reservation "athlon_mmxld_amdfam10" 4
636			 (and (eq_attr "cpu" "amdfam10")
637			      (and (eq_attr "type" "mmxmov")
638				   (eq_attr "memory" "load")))
639			 "athlon-direct,athlon-fploadk8, athlon-fany")
640(define_insn_reservation "athlon_mmxssest" 3
641			 (and (eq_attr "cpu" "k8,generic64")
642			      (and (eq_attr "type" "mmxmov,ssemov")
643				   (and (eq_attr "mode" "V4SF,V2DF,TI")
644					(eq_attr "memory" "store,both"))))
645			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
646(define_insn_reservation "athlon_mmxssest_k8" 3
647			 (and (eq_attr "cpu" "k8,generic64")
648			      (and (eq_attr "type" "mmxmov,ssemov")
649				   (and (eq_attr "mode" "V4SF,V2DF,TI")
650					(eq_attr "memory" "store,both"))))
651			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
652(define_insn_reservation "athlon_mmxssest_short" 2
653			 (and (eq_attr "cpu" "athlon,k8,generic64")
654			      (and (eq_attr "type" "mmxmov,ssemov")
655				   (eq_attr "memory" "store,both")))
656			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
657;; On AMDFAM10 all double, single and integer packed SSEx data stores
658;; generated are all double path, latency of 2 and use the FSTORE FP
659;; execution unit. No entries separate for movupx/movdqu, which are
660;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
661;; as they will not be generated.
662(define_insn_reservation "athlon_ssest_amdfam10" 2
663			 (and (eq_attr "cpu" "amdfam10")
664			      (and (eq_attr "type" "ssemov")
665				   (and (eq_attr "mode" "V4SF,V2DF,TI")
666					(eq_attr "memory" "store,both"))))
667			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
668;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
669;; data stores generated are all direct path, latency of 2 and use
670;; the FSTORE FP execution unit
671(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
672			 (and (eq_attr "cpu" "amdfam10")
673			      (and (eq_attr "type" "mmxmov,ssemov")
674				   (eq_attr "memory" "store,both")))
675			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
676(define_insn_reservation "athlon_movaps_k8" 2
677			 (and (eq_attr "cpu" "k8,generic64")
678			      (and (eq_attr "type" "ssemov")
679				   (eq_attr "mode" "V4SF,V2DF,TI")))
680			 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
681(define_insn_reservation "athlon_movaps" 2
682			 (and (eq_attr "cpu" "athlon")
683			      (and (eq_attr "type" "ssemov")
684				   (eq_attr "mode" "V4SF,V2DF,TI")))
685			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
686(define_insn_reservation "athlon_mmxssemov" 2
687			 (and (eq_attr "cpu" "athlon,k8,generic64")
688			      (eq_attr "type" "mmxmov,ssemov"))
689			 "athlon-direct,athlon-fpsched,athlon-faddmul")
690(define_insn_reservation "athlon_mmxmul_load" 4
691			 (and (eq_attr "cpu" "athlon,k8,generic64")
692			      (and (eq_attr "type" "mmxmul")
693				   (eq_attr "memory" "load")))
694			 "athlon-direct,athlon-fpload,athlon-fmul")
695(define_insn_reservation "athlon_mmxmul" 3
696			 (and (eq_attr "cpu" "athlon,k8,generic64")
697			      (eq_attr "type" "mmxmul"))
698			 "athlon-direct,athlon-fpsched,athlon-fmul")
699(define_insn_reservation "athlon_mmx_load" 3
700			 (and (eq_attr "cpu" "athlon,k8,generic64")
701			      (and (eq_attr "unit" "mmx")
702				   (eq_attr "memory" "load")))
703			 "athlon-direct,athlon-fpload,athlon-faddmul")
704(define_insn_reservation "athlon_mmx" 2
705			 (and (eq_attr "cpu" "athlon,k8,generic64")
706			      (eq_attr "unit" "mmx"))
707			 "athlon-direct,athlon-fpsched,athlon-faddmul")
708;; SSE operations are handled by the i387 unit as well.  The latency
709;; is same as for i387 operations for scalar operations
710
711(define_insn_reservation "athlon_sselog_load" 3
712			 (and (eq_attr "cpu" "athlon")
713			      (and (eq_attr "type" "sselog,sselog1")
714				   (eq_attr "memory" "load")))
715			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
716(define_insn_reservation "athlon_sselog_load_k8" 5
717			 (and (eq_attr "cpu" "k8,generic64")
718			      (and (eq_attr "type" "sselog,sselog1")
719				   (eq_attr "memory" "load")))
720			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
721(define_insn_reservation "athlon_sselog_load_amdfam10" 4
722			 (and (eq_attr "cpu" "amdfam10")
723			      (and (eq_attr "type" "sselog,sselog1")
724				   (eq_attr "memory" "load")))
725			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
726(define_insn_reservation "athlon_sselog" 3
727			 (and (eq_attr "cpu" "athlon")
728			      (eq_attr "type" "sselog,sselog1"))
729			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
730(define_insn_reservation "athlon_sselog_k8" 3
731			 (and (eq_attr "cpu" "k8,generic64")
732			      (eq_attr "type" "sselog,sselog1"))
733			 "athlon-double,athlon-fpsched,athlon-fmul")
734(define_insn_reservation "athlon_sselog_amdfam10" 2
735			 (and (eq_attr "cpu" "amdfam10")
736			      (eq_attr "type" "sselog,sselog1"))
737			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
738
739;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
740(define_insn_reservation "athlon_ssecmp_load" 2
741			 (and (eq_attr "cpu" "athlon")
742			      (and (eq_attr "type" "ssecmp")
743				   (and (eq_attr "mode" "SF,DF,DI")
744					(eq_attr "memory" "load"))))
745			 "athlon-direct,athlon-fpload,athlon-fadd")
746(define_insn_reservation "athlon_ssecmp_load_k8" 4
747			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
748			      (and (eq_attr "type" "ssecmp")
749				   (and (eq_attr "mode" "SF,DF,DI,TI")
750					(eq_attr "memory" "load"))))
751			 "athlon-direct,athlon-fploadk8,athlon-fadd")
752(define_insn_reservation "athlon_ssecmp" 2
753			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
754			      (and (eq_attr "type" "ssecmp")
755				   (eq_attr "mode" "SF,DF,DI,TI")))
756			 "athlon-direct,athlon-fpsched,athlon-fadd")
757(define_insn_reservation "athlon_ssecmpvector_load" 3
758			 (and (eq_attr "cpu" "athlon")
759			      (and (eq_attr "type" "ssecmp")
760				   (eq_attr "memory" "load")))
761			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
762(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
763			 (and (eq_attr "cpu" "k8,generic64")
764			      (and (eq_attr "type" "ssecmp")
765				   (eq_attr "memory" "load")))
766			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
767(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
768			 (and (eq_attr "cpu" "amdfam10")
769			      (and (eq_attr "type" "ssecmp")
770				   (eq_attr "memory" "load")))
771			 "athlon-direct,athlon-fploadk8,athlon-fadd")
772(define_insn_reservation "athlon_ssecmpvector" 3
773			 (and (eq_attr "cpu" "athlon")
774			      (eq_attr "type" "ssecmp"))
775			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
776(define_insn_reservation "athlon_ssecmpvector_k8" 3
777			 (and (eq_attr "cpu" "k8,generic64")
778			      (eq_attr "type" "ssecmp"))
779			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
780(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
781			 (and (eq_attr "cpu" "amdfam10")
782			      (eq_attr "type" "ssecmp"))
783			 "athlon-direct,athlon-fpsched,athlon-fadd")
784(define_insn_reservation "athlon_ssecomi_load" 4
785			 (and (eq_attr "cpu" "athlon")
786			      (and (eq_attr "type" "ssecomi")
787				   (eq_attr "memory" "load")))
788			 "athlon-vector,athlon-fpload,athlon-fadd")
789(define_insn_reservation "athlon_ssecomi_load_k8" 6
790			 (and (eq_attr "cpu" "k8,generic64")
791			      (and (eq_attr "type" "ssecomi")
792				   (eq_attr "memory" "load")))
793			 "athlon-vector,athlon-fploadk8,athlon-fadd")
794(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
795			 (and (eq_attr "cpu" "amdfam10")
796			      (and (eq_attr "type" "ssecomi")
797				   (eq_attr "memory" "load")))
798			 "athlon-direct,athlon-fploadk8,athlon-fadd")
799(define_insn_reservation "athlon_ssecomi" 4
800			 (and (eq_attr "cpu" "athlon,k8,generic64")
801			      (eq_attr "type" "ssecomi"))
802			 "athlon-vector,athlon-fpsched,athlon-fadd")
803(define_insn_reservation "athlon_ssecomi_amdfam10" 3
804			 (and (eq_attr "cpu" "amdfam10")
805;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
806			      (eq_attr "type" "ssecomi"))
807			 "athlon-direct,athlon-fpsched,athlon-fadd")
808(define_insn_reservation "athlon_sseadd_load" 4
809			 (and (eq_attr "cpu" "athlon")
810			      (and (eq_attr "type" "sseadd")
811				   (and (eq_attr "mode" "SF,DF,DI")
812					(eq_attr "memory" "load"))))
813			 "athlon-direct,athlon-fpload,athlon-fadd")
814(define_insn_reservation "athlon_sseadd_load_k8" 6
815			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
816			      (and (eq_attr "type" "sseadd")
817				   (and (eq_attr "mode" "SF,DF,DI")
818					(eq_attr "memory" "load"))))
819			 "athlon-direct,athlon-fploadk8,athlon-fadd")
820(define_insn_reservation "athlon_sseadd" 4
821			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
822			      (and (eq_attr "type" "sseadd")
823				   (eq_attr "mode" "SF,DF,DI")))
824			 "athlon-direct,athlon-fpsched,athlon-fadd")
825(define_insn_reservation "athlon_sseaddvector_load" 5
826			 (and (eq_attr "cpu" "athlon")
827			      (and (eq_attr "type" "sseadd")
828				   (eq_attr "memory" "load")))
829			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
830(define_insn_reservation "athlon_sseaddvector_load_k8" 7
831			 (and (eq_attr "cpu" "k8,generic64")
832			      (and (eq_attr "type" "sseadd")
833				   (eq_attr "memory" "load")))
834			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
835(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
836			 (and (eq_attr "cpu" "amdfam10")
837			      (and (eq_attr "type" "sseadd")
838				   (eq_attr "memory" "load")))
839			 "athlon-direct,athlon-fploadk8,athlon-fadd")
840(define_insn_reservation "athlon_sseaddvector" 5
841			 (and (eq_attr "cpu" "athlon")
842			      (eq_attr "type" "sseadd"))
843			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
844(define_insn_reservation "athlon_sseaddvector_k8" 5
845			 (and (eq_attr "cpu" "k8,generic64")
846			      (eq_attr "type" "sseadd"))
847			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
848(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
849			 (and (eq_attr "cpu" "amdfam10")
850			      (eq_attr "type" "sseadd"))
851			 "athlon-direct,athlon-fpsched,athlon-fadd")
852
853;; Conversions behaves very irregularly and the scheduling is critical here.
854;; Take each instruction separately.  Assume that the mode is always set to the
855;; destination one and athlon_decode is set to the K8 versions.
856
857;; cvtss2sd
858(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
859			 (and (eq_attr "cpu" "k8,athlon,generic64")
860			      (and (eq_attr "type" "ssecvt")
861				   (and (eq_attr "athlon_decode" "direct")
862					(and (eq_attr "mode" "DF")
863					     (eq_attr "memory" "load")))))
864			 "athlon-direct,athlon-fploadk8,athlon-fstore")
865(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
866			 (and (eq_attr "cpu" "amdfam10")
867			      (and (eq_attr "type" "ssecvt")
868				   (and (eq_attr "amdfam10_decode" "double")
869					(and (eq_attr "mode" "DF")
870					     (eq_attr "memory" "load")))))
871			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
872(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
873			 (and (eq_attr "cpu" "athlon,k8,generic64")
874			      (and (eq_attr "type" "ssecvt")
875				   (and (eq_attr "athlon_decode" "direct")
876					(eq_attr "mode" "DF"))))
877			 "athlon-direct,athlon-fpsched,athlon-fstore")
878(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
879			 (and (eq_attr "cpu" "amdfam10")
880			      (and (eq_attr "type" "ssecvt")
881				   (and (eq_attr "amdfam10_decode" "vector")
882					(eq_attr "mode" "DF"))))
883			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
884;; cvtps2pd.  Model same way the other double decoded FP conversions.
885(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
886			 (and (eq_attr "cpu" "k8,athlon,generic64")
887			      (and (eq_attr "type" "ssecvt")
888				   (and (eq_attr "athlon_decode" "double")
889					(and (eq_attr "mode" "V2DF,V4SF,TI")
890					     (eq_attr "memory" "load")))))
891			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
892(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
893			 (and (eq_attr "cpu" "amdfam10")
894			      (and (eq_attr "type" "ssecvt")
895				   (and (eq_attr "amdfam10_decode" "direct")
896					(and (eq_attr "mode" "V2DF,V4SF,TI")
897					     (eq_attr "memory" "load")))))
898			 "athlon-direct,athlon-fploadk8,athlon-fstore")
899(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
900			 (and (eq_attr "cpu" "k8,athlon,generic64")
901			      (and (eq_attr "type" "ssecvt")
902				   (and (eq_attr "athlon_decode" "double")
903					(eq_attr "mode" "V2DF,V4SF,TI"))))
904			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
905(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
906			 (and (eq_attr "cpu" "amdfam10")
907			      (and (eq_attr "type" "ssecvt")
908				   (and (eq_attr "amdfam10_decode" "direct")
909					(eq_attr "mode" "V2DF,V4SF,TI"))))
910			 "athlon-direct,athlon-fpsched,athlon-fstore")
911;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
912;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
913(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
914			 (and (eq_attr "cpu" "athlon,k8")
915			      (and (eq_attr "type" "sseicvt")
916				   (and (eq_attr "athlon_decode" "direct")
917					(and (eq_attr "mode" "SF,DF")
918					     (eq_attr "memory" "load")))))
919			 "athlon-direct,athlon-fploadk8,athlon-fstore")
920(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
921			 (and (eq_attr "cpu" "amdfam10")
922			      (and (eq_attr "type" "sseicvt")
923				   (and (eq_attr "amdfam10_decode" "double")
924					(and (eq_attr "mode" "SF,DF")
925					     (eq_attr "memory" "load")))))
926			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
927;; cvtsi2ss mem, reg is doublepath
928(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
929			 (and (eq_attr "cpu" "athlon")
930			      (and (eq_attr "type" "sseicvt")
931				   (and (eq_attr "athlon_decode" "double")
932					(and (eq_attr "mode" "SF,DF")
933					     (eq_attr "memory" "load")))))
934			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
935(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
936			 (and (eq_attr "cpu" "k8,generic64")
937			      (and (eq_attr "type" "sseicvt")
938				   (and (eq_attr "athlon_decode" "double")
939					(and (eq_attr "mode" "SF,DF")
940					     (eq_attr "memory" "load")))))
941			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
942(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
943			 (and (eq_attr "cpu" "amdfam10")
944			      (and (eq_attr "type" "sseicvt")
945				   (and (eq_attr "amdfam10_decode" "double")
946					(and (eq_attr "mode" "SF,DF")
947					     (eq_attr "memory" "load")))))
948			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
949;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
950(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
951			 (and (eq_attr "cpu" "k8,athlon,generic64")
952			      (and (eq_attr "type" "sseicvt")
953				   (and (eq_attr "athlon_decode" "double")
954					(and (eq_attr "mode" "SF,DF")
955					     (eq_attr "memory" "none")))))
956			 "athlon-double,athlon-fploadk8,athlon-fstore")
957(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
958			 (and (eq_attr "cpu" "amdfam10")
959			      (and (eq_attr "type" "sseicvt")
960				   (and (eq_attr "amdfam10_decode" "vector")
961					(and (eq_attr "mode" "SF,DF")
962					     (eq_attr "memory" "none")))))
963			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
964;; cvtsi2ss reg, reg is doublepath
965(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
966			 (and (eq_attr "cpu" "athlon,k8,generic64")
967			      (and (eq_attr "type" "sseicvt")
968				   (and (eq_attr "athlon_decode" "vector")
969					(and (eq_attr "mode" "SF,DF")
970					     (eq_attr "memory" "none")))))
971			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
972(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
973			 (and (eq_attr "cpu" "amdfam10")
974			      (and (eq_attr "type" "sseicvt")
975				   (and (eq_attr "amdfam10_decode" "vector")
976					(and (eq_attr "mode" "SF,DF")
977					     (eq_attr "memory" "none")))))
978			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
979;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
980(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
981			 (and (eq_attr "cpu" "k8,athlon,generic64")
982			      (and (eq_attr "type" "ssecvt")
983				   (and (eq_attr "athlon_decode" "double")
984					(and (eq_attr "mode" "SF")
985					     (eq_attr "memory" "load")))))
986			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
987(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
988			 (and (eq_attr "cpu" "amdfam10")
989			      (and (eq_attr "type" "ssecvt")
990				   (and (eq_attr "amdfam10_decode" "double")
991					(and (eq_attr "mode" "SF")
992					     (eq_attr "memory" "load")))))
993			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
994;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
995(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
996			 (and (eq_attr "cpu" "athlon,k8,generic64")
997			      (and (eq_attr "type" "ssecvt")
998				   (and (eq_attr "athlon_decode" "vector")
999					(and (eq_attr "mode" "SF")
1000					     (eq_attr "memory" "none")))))
1001			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
1002(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
1003			 (and (eq_attr "cpu" "amdfam10")
1004			      (and (eq_attr "type" "ssecvt")
1005				   (and (eq_attr "amdfam10_decode" "vector")
1006					(and (eq_attr "mode" "SF")
1007					     (eq_attr "memory" "none")))))
1008			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
1009(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
1010			 (and (eq_attr "cpu" "athlon,k8,generic64")
1011			      (and (eq_attr "type" "ssecvt")
1012				   (and (eq_attr "athlon_decode" "vector")
1013					(and (eq_attr "mode" "V4SF,V2DF,TI")
1014					     (eq_attr "memory" "load")))))
1015			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
1016(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
1017			 (and (eq_attr "cpu" "amdfam10")
1018			      (and (eq_attr "type" "ssecvt")
1019				   (and (eq_attr "amdfam10_decode" "double")
1020					(and (eq_attr "mode" "V4SF,V2DF,TI")
1021					     (eq_attr "memory" "load")))))
1022			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1023;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
1024;; ??? Why it is fater than cvtsd2ss?
1025(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
1026			 (and (eq_attr "cpu" "athlon,k8,generic64")
1027			      (and (eq_attr "type" "ssecvt")
1028				   (and (eq_attr "athlon_decode" "vector")
1029					(and (eq_attr "mode" "V4SF,V2DF,TI")
1030					     (eq_attr "memory" "none")))))
1031			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
1032(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
1033			 (and (eq_attr "cpu" "amdfam10")
1034			      (and (eq_attr "type" "ssecvt")
1035				   (and (eq_attr "amdfam10_decode" "double")
1036					(and (eq_attr "mode" "V4SF,V2DF,TI")
1037					     (eq_attr "memory" "none")))))
1038			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1039;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
1040(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
1041			 (and (eq_attr "cpu" "athlon,k8,generic64")
1042			      (and (eq_attr "type" "sseicvt")
1043				   (and (eq_attr "athlon_decode" "vector")
1044					(and (eq_attr "mode" "SI,DI")
1045					     (eq_attr "memory" "load")))))
1046			 "athlon-vector,athlon-fploadk8,athlon-fvector")
1047(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
1048			 (and (eq_attr "cpu" "amdfam10")
1049			      (and (eq_attr "type" "sseicvt")
1050				   (and (eq_attr "amdfam10_decode" "double")
1051					(and (eq_attr "mode" "SI,DI")
1052					     (eq_attr "memory" "load")))))
1053			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
1054;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
1055(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
1056			 (and (eq_attr "cpu" "athlon")
1057			      (and (eq_attr "type" "sseicvt")
1058				   (and (eq_attr "athlon_decode" "double")
1059					(and (eq_attr "mode" "SI,DI")
1060					     (eq_attr "memory" "none")))))
1061			 "athlon-vector,athlon-fpsched,athlon-fvector")
1062(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
1063			 (and (eq_attr "cpu" "k8,generic64")
1064			      (and (eq_attr "type" "sseicvt")
1065				   (and (eq_attr "athlon_decode" "double")
1066					(and (eq_attr "mode" "SI,DI")
1067					     (eq_attr "memory" "none")))))
1068			 "athlon-double,athlon-fpsched,athlon-fstore")
1069(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
1070			 (and (eq_attr "cpu" "amdfam10")
1071			      (and (eq_attr "type" "sseicvt")
1072				   (and (eq_attr "amdfam10_decode" "double")
1073					(and (eq_attr "mode" "SI,DI")
1074					     (eq_attr "memory" "none")))))
1075			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
1076;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
1077(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
1078			 (and (eq_attr "cpu" "amdfam10")
1079			      (and (eq_attr "type" "sseicvt")
1080				   (and (eq_attr "amdfam10_decode" "double")
1081					(and (eq_attr "mode" "TI")
1082					     (eq_attr "memory" "load")))))
1083			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1084;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
1085(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
1086			 (and (eq_attr "cpu" "amdfam10")
1087			      (and (eq_attr "type" "sseicvt")
1088				   (and (eq_attr "amdfam10_decode" "double")
1089					(and (eq_attr "mode" "TI")
1090					     (eq_attr "memory" "none")))))
1091			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1092
1093
1094(define_insn_reservation "athlon_ssemul_load" 4
1095			 (and (eq_attr "cpu" "athlon")
1096			      (and (eq_attr "type" "ssemul")
1097				   (and (eq_attr "mode" "SF,DF")
1098					(eq_attr "memory" "load"))))
1099			 "athlon-direct,athlon-fpload,athlon-fmul")
1100(define_insn_reservation "athlon_ssemul_load_k8" 6
1101			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
1102			      (and (eq_attr "type" "ssemul")
1103				   (and (eq_attr "mode" "SF,DF")
1104					(eq_attr "memory" "load"))))
1105			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1106(define_insn_reservation "athlon_ssemul" 4
1107			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
1108			      (and (eq_attr "type" "ssemul")
1109				   (eq_attr "mode" "SF,DF")))
1110			 "athlon-direct,athlon-fpsched,athlon-fmul")
1111(define_insn_reservation "athlon_ssemulvector_load" 5
1112			 (and (eq_attr "cpu" "athlon")
1113			      (and (eq_attr "type" "ssemul")
1114				   (eq_attr "memory" "load")))
1115			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
1116(define_insn_reservation "athlon_ssemulvector_load_k8" 7
1117			 (and (eq_attr "cpu" "k8,generic64")
1118			      (and (eq_attr "type" "ssemul")
1119				   (eq_attr "memory" "load")))
1120			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
1121(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
1122			 (and (eq_attr "cpu" "amdfam10")
1123			      (and (eq_attr "type" "ssemul")
1124				   (eq_attr "memory" "load")))
1125			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1126(define_insn_reservation "athlon_ssemulvector" 5
1127			 (and (eq_attr "cpu" "athlon")
1128			      (eq_attr "type" "ssemul"))
1129			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
1130(define_insn_reservation "athlon_ssemulvector_k8" 5
1131			 (and (eq_attr "cpu" "k8,generic64")
1132			      (eq_attr "type" "ssemul"))
1133			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
1134(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
1135			 (and (eq_attr "cpu" "amdfam10")
1136			      (eq_attr "type" "ssemul"))
1137			 "athlon-direct,athlon-fpsched,athlon-fmul")
1138;; divsd timings.  divss is faster
1139(define_insn_reservation "athlon_ssediv_load" 20
1140			 (and (eq_attr "cpu" "athlon")
1141			      (and (eq_attr "type" "ssediv")
1142				   (and (eq_attr "mode" "SF,DF")
1143					(eq_attr "memory" "load"))))
1144			 "athlon-direct,athlon-fpload,athlon-fmul*17")
1145(define_insn_reservation "athlon_ssediv_load_k8" 22
1146			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
1147			      (and (eq_attr "type" "ssediv")
1148				   (and (eq_attr "mode" "SF,DF")
1149					(eq_attr "memory" "load"))))
1150			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1151(define_insn_reservation "athlon_ssediv" 20
1152			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
1153			      (and (eq_attr "type" "ssediv")
1154				   (eq_attr "mode" "SF,DF")))
1155			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
1156(define_insn_reservation "athlon_ssedivvector_load" 39
1157			 (and (eq_attr "cpu" "athlon")
1158			      (and (eq_attr "type" "ssediv")
1159				   (eq_attr "memory" "load")))
1160			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
1161(define_insn_reservation "athlon_ssedivvector_load_k8" 35
1162			 (and (eq_attr "cpu" "k8,generic64")
1163			      (and (eq_attr "type" "ssediv")
1164				   (eq_attr "memory" "load")))
1165			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
1166(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
1167			 (and (eq_attr "cpu" "amdfam10")
1168			      (and (eq_attr "type" "ssediv")
1169				   (eq_attr "memory" "load")))
1170			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1171(define_insn_reservation "athlon_ssedivvector" 39
1172			 (and (eq_attr "cpu" "athlon")
1173			      (eq_attr "type" "ssediv"))
1174			 "athlon-vector,athlon-fmul*34")
1175(define_insn_reservation "athlon_ssedivvector_k8" 39
1176			 (and (eq_attr "cpu" "k8,generic64")
1177			      (eq_attr "type" "ssediv"))
1178			 "athlon-double,athlon-fmul*34")
1179(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
1180			 (and (eq_attr "cpu" "amdfam10")
1181			      (eq_attr "type" "ssediv"))
1182			 "athlon-direct,athlon-fmul*17")
1183(define_insn_reservation "athlon_sseins_amdfam10" 5
1184                         (and (eq_attr "cpu" "amdfam10")
1185                              (and (eq_attr "type" "sseins")
1186                                   (eq_attr "mode" "TI")))
1187                         "athlon-vector,athlon-fpsched,athlon-faddmul")
1188