1;; Copyright (C) 2002-2018 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; AMD Athlon Scheduling
20;;
21;; The Athlon does contain three pipelined FP units, three integer units and
22;; three address generation units.
23;;
24;; The predecode logic is determining boundaries of instructions in the 64
25;; byte cache line. So the cache line straddling problem of K6 might be issue
26;; here as well, but it is not noted in the documentation.
27;;
28;; Three DirectPath instructions decoders and only one VectorPath decoder
29;; is available. They can decode three DirectPath instructions or one VectorPath
30;; instruction per cycle.
31;; Decoded macro instructions are then passed to 72 entry instruction control
32;; unit, that passes
33;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
34;;
35;; The load/store queue unit is not attached to the schedulers but
36;; communicates with all the execution units separately instead.
37
38(define_attr "athlon_decode" "direct,vector,double"
39  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
40	   (const_string "vector")
41         (and (eq_attr "type" "push")
42              (match_operand 1 "memory_operand"))
43	   (const_string "vector")
44         (and (eq_attr "type" "fmov")
45	      (and (eq_attr "memory" "load,store")
46		   (eq_attr "mode" "XF")))
47	   (const_string "vector")]
48	(const_string "direct")))
49
50(define_attr "amdfam10_decode" "direct,vector,double"
51  (const_string "direct"))
52;;
53;;           decode0 decode1 decode2
54;;                 \    |   /
55;;    instruction control unit (72 entry scheduler)
56;;                |                        |
57;;      integer scheduler (18)         stack map
58;;     /  |    |    |    |   \        stack rename
59;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
60;;    |  agu0  |   agu1      agu2    register file
61;;    |      \ |    |       /         |     |     |
62;;     \      /\    |     /         fadd  fmul  fstore
63;;       \  /    \  |   /           fadd  fmul  fstore
64;;       imul  load/store (2x)      fadd  fmul  fstore
65
66(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
67(define_cpu_unit "athlon-decode0" "athlon")
68(define_cpu_unit "athlon-decode1" "athlon")
69(define_cpu_unit "athlon-decode2" "athlon")
70(define_cpu_unit "athlon-decodev" "athlon")
71;; Model the fact that double decoded instruction may take 2 cycles
72;; to decode when decoder2 and decoder0 in next cycle
73;; is used (this is needed to allow troughput of 1.5 double decoded
74;; instructions per cycle).
75;;
76;; In order to avoid dependence between reservation of decoder
77;; and other units, we model decoder as two stage fully pipelined unit
78;; and only double decoded instruction may occupy unit in the first cycle.
79;; With this scheme however two double instructions can be issued cycle0.
80;;
81;; Avoid this by using presence set requiring decoder0 to be allocated
82;; too. Vector decoded instructions then can't be issued when
83;; modeled as consuming decoder0+decoder1+decoder2.
84;; We solve that by specialized vector decoder unit and exclusion set.
85(presence_set "athlon-decode2" "athlon-decode0")
86(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
87(define_reservation "athlon-vector" "nothing,athlon-decodev")
88(define_reservation "athlon-direct0" "nothing,athlon-decode0")
89(define_reservation "athlon-direct" "nothing,
90				     (athlon-decode0 | athlon-decode1
91				     | athlon-decode2)")
92;; Double instructions behaves like two direct instructions.
93(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
94				     | (nothing,(athlon-decode0 + athlon-decode1))
95				     | (nothing,(athlon-decode1 + athlon-decode2)))")
96
97;; Agu and ieu unit results in extremely large automatons and
98;; in our approximation they are hardly filled in.  Only ieu
99;; unit can, as issue rate is 3 and agu unit is always used
100;; first in the insn reservations.  Skip the models.
101
102;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
103;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
104;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
105;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
106(define_reservation "athlon-ieu" "nothing")
107(define_cpu_unit "athlon-ieu0" "athlon")
108;(define_cpu_unit "athlon-agu0" "athlon_agu")
109;(define_cpu_unit "athlon-agu1" "athlon_agu")
110;(define_cpu_unit "athlon-agu2" "athlon_agu")
111;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
112(define_reservation "athlon-agu" "nothing")
113
114(define_cpu_unit "athlon-mult" "athlon_mult")
115
116(define_cpu_unit "athlon-load0" "athlon_load")
117(define_cpu_unit "athlon-load1" "athlon_load")
118(define_reservation "athlon-load" "athlon-agu,
119				   (athlon-load0 | athlon-load1),nothing")
120;; 128bit SSE instructions issue two loads at once
121(define_reservation "athlon-load2" "athlon-agu,
122				   (athlon-load0 + athlon-load1),nothing")
123
124(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
125;; 128bit SSE instructions issue two stores at once
126(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
127
128
129;; The FP operations start to execute at stage 12 in the pipeline, while
130;; integer operations start to execute at stage 9 for Athlon and 11 for K8
131;; Compensate the difference for Athlon because it results in significantly
132;; smaller automata.
133(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
134;; The floating point loads.
135(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
136(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
137(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
138(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
139
140
141;; The three fp units are fully pipelined with latency of 3
142(define_cpu_unit "athlon-fadd" "athlon_fp")
143(define_cpu_unit "athlon-fmul" "athlon_fp")
144(define_cpu_unit "athlon-fstore" "athlon_fp")
145(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
146(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
147
148;; Vector operations usually consume many of pipes.
149(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
150
151
152;; Jump instructions are executed in the branch unit completely transparent to us
153(define_insn_reservation "athlon_branch" 0
154			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
155			      (eq_attr "type" "ibr"))
156			 "athlon-direct,athlon-ieu")
157(define_insn_reservation "athlon_call" 0
158			 (and (eq_attr "cpu" "athlon,k8")
159			      (eq_attr "type" "call,callv"))
160			 "athlon-vector,athlon-ieu")
161(define_insn_reservation "athlon_call_amdfam10" 0
162			 (and (eq_attr "cpu" "amdfam10")
163			      (eq_attr "type" "call,callv"))
164			 "athlon-double,athlon-ieu")
165
166;; Latency of push operation is 3 cycles, but ESP value is available
167;; earlier
168(define_insn_reservation "athlon_push" 2
169			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
170			      (eq_attr "type" "push"))
171			 "athlon-direct,athlon-agu,athlon-store")
172(define_insn_reservation "athlon_pop" 4
173			 (and (eq_attr "cpu" "athlon,k8")
174			      (eq_attr "type" "pop"))
175			 "athlon-vector,athlon-load,athlon-ieu")
176(define_insn_reservation "athlon_pop_k8" 3
177			 (and (eq_attr "cpu" "k8")
178			      (eq_attr "type" "pop"))
179			 "athlon-double,(athlon-ieu+athlon-load)")
180(define_insn_reservation "athlon_pop_amdfam10" 3
181			 (and (eq_attr "cpu" "amdfam10")
182			      (eq_attr "type" "pop"))
183			 "athlon-direct,(athlon-ieu+athlon-load)")
184(define_insn_reservation "athlon_leave" 3
185			 (and (eq_attr "cpu" "athlon")
186			      (eq_attr "type" "leave"))
187			 "athlon-vector,(athlon-ieu+athlon-load)")
188(define_insn_reservation "athlon_leave_k8" 3
189			 (and (eq_attr "cpu" "k8,amdfam10")
190			      (eq_attr "type" "leave"))
191			 "athlon-double,(athlon-ieu+athlon-load)")
192
193;; Lea executes in AGU unit with 2 cycles latency.
194(define_insn_reservation "athlon_lea" 2
195			 (and (eq_attr "cpu" "athlon,k8")
196			      (eq_attr "type" "lea"))
197			 "athlon-direct,athlon-agu,nothing")
198;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
199(define_insn_reservation "athlon_lea_amdfam10" 1
200			 (and (eq_attr "cpu" "amdfam10")
201			      (eq_attr "type" "lea"))
202			 "athlon-direct,athlon-agu,nothing")
203
204;; Mul executes in special multiplier unit attached to IEU0
205(define_insn_reservation "athlon_imul" 5
206			 (and (eq_attr "cpu" "athlon")
207			      (and (eq_attr "type" "imul")
208				   (eq_attr "memory" "none,unknown")))
209			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
210;; ??? Widening multiply is vector or double.
211(define_insn_reservation "athlon_imul_k8_DI" 4
212			 (and (eq_attr "cpu" "k8,amdfam10")
213			      (and (eq_attr "type" "imul")
214				   (and (eq_attr "mode" "DI")
215					(eq_attr "memory" "none,unknown"))))
216			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
217(define_insn_reservation "athlon_imul_k8" 3
218			 (and (eq_attr "cpu" "k8,amdfam10")
219			      (and (eq_attr "type" "imul")
220				   (eq_attr "memory" "none,unknown")))
221			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
222(define_insn_reservation "athlon_imul_amdfam10_HI" 4
223			 (and (eq_attr "cpu" "amdfam10")
224			      (and (eq_attr "type" "imul")
225				   (and (eq_attr "mode" "HI")
226					(eq_attr "memory" "none,unknown"))))
227			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
228(define_insn_reservation "athlon_imul_mem" 8
229			 (and (eq_attr "cpu" "athlon")
230			      (and (eq_attr "type" "imul")
231				   (eq_attr "memory" "load,both")))
232			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
233(define_insn_reservation "athlon_imul_mem_k8_DI" 7
234			 (and (eq_attr "cpu" "k8,amdfam10")
235			      (and (eq_attr "type" "imul")
236				   (and (eq_attr "mode" "DI")
237					(eq_attr "memory" "load,both"))))
238			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
239(define_insn_reservation "athlon_imul_mem_k8" 6
240			 (and (eq_attr "cpu" "k8,amdfam10")
241			      (and (eq_attr "type" "imul")
242				   (eq_attr "memory" "load,both")))
243			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
244
245;; Idiv cannot execute in parallel with other instructions.  Dealing with it
246;; as with short latency vector instruction is good approximation avoiding
247;; scheduler from trying too hard to can hide it's latency by overlap with
248;; other instructions.
249;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
250;; of the other code
251;; Using the same heuristics for amdfam10 as K8 with idiv
252
253(define_insn_reservation "athlon_idiv" 6
254			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
255			      (and (eq_attr "type" "idiv")
256				   (eq_attr "memory" "none,unknown")))
257			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
258(define_insn_reservation "athlon_idiv_mem" 9
259			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
260			      (and (eq_attr "type" "idiv")
261				   (eq_attr "memory" "load,both")))
262			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
263;; The parallelism of string instructions is not documented.  Model it same way
264;; as idiv to create smaller automata.  This probably does not matter much.
265;; Using the same heuristics for amdfam10 as K8 with idiv
266(define_insn_reservation "athlon_str" 6
267			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
268			      (and (eq_attr "type" "str")
269				   (eq_attr "memory" "load,both,store")))
270			 "athlon-vector,athlon-load,athlon-ieu0*6")
271
272(define_insn_reservation "athlon_idirect" 1
273			 (and (eq_attr "cpu" "athlon,k8")
274			      (and (eq_attr "athlon_decode" "direct")
275				   (and (eq_attr "unit" "integer,unknown")
276					(eq_attr "memory" "none,unknown"))))
277			 "athlon-direct,athlon-ieu")
278(define_insn_reservation "athlon_idirect_amdfam10" 1
279			 (and (eq_attr "cpu" "amdfam10")
280			      (and (eq_attr "amdfam10_decode" "direct")
281				   (and (eq_attr "unit" "integer,unknown")
282					(eq_attr "memory" "none,unknown"))))
283			 "athlon-direct,athlon-ieu")
284(define_insn_reservation "athlon_ivector" 2
285			 (and (eq_attr "cpu" "athlon,k8")
286			      (and (eq_attr "athlon_decode" "vector")
287				   (and (eq_attr "unit" "integer,unknown")
288					(eq_attr "memory" "none,unknown"))))
289			 "athlon-vector,athlon-ieu,athlon-ieu")
290(define_insn_reservation "athlon_ivector_amdfam10" 2
291			 (and (eq_attr "cpu" "amdfam10")
292			      (and (eq_attr "amdfam10_decode" "vector")
293				   (and (eq_attr "unit" "integer,unknown")
294					(eq_attr "memory" "none,unknown"))))
295			 "athlon-vector,athlon-ieu,athlon-ieu")
296
297(define_insn_reservation "athlon_idirect_loadmov" 3
298			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
299			      (and (eq_attr "type" "imov")
300				   (eq_attr "memory" "load")))
301			 "athlon-direct,athlon-load")
302
303(define_insn_reservation "athlon_idirect_load" 4
304			 (and (eq_attr "cpu" "athlon,k8")
305			      (and (eq_attr "athlon_decode" "direct")
306				   (and (eq_attr "unit" "integer,unknown")
307					(eq_attr "memory" "load"))))
308			 "athlon-direct,athlon-load,athlon-ieu")
309(define_insn_reservation "athlon_idirect_load_amdfam10" 4
310			 (and (eq_attr "cpu" "amdfam10")
311			      (and (eq_attr "amdfam10_decode" "direct")
312				   (and (eq_attr "unit" "integer,unknown")
313					(eq_attr "memory" "load"))))
314			 "athlon-direct,athlon-load,athlon-ieu")
315(define_insn_reservation "athlon_ivector_load" 6
316			 (and (eq_attr "cpu" "athlon,k8")
317			      (and (eq_attr "athlon_decode" "vector")
318				   (and (eq_attr "unit" "integer,unknown")
319					(eq_attr "memory" "load"))))
320			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
321(define_insn_reservation "athlon_ivector_load_amdfam10" 6
322			 (and (eq_attr "cpu" "amdfam10")
323			      (and (eq_attr "amdfam10_decode" "vector")
324				   (and (eq_attr "unit" "integer,unknown")
325					(eq_attr "memory" "load"))))
326			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
327
328(define_insn_reservation "athlon_idirect_movstore" 1
329			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
330			      (and (eq_attr "type" "imov")
331				   (eq_attr "memory" "store")))
332			 "athlon-direct,athlon-agu,athlon-store")
333
334(define_insn_reservation "athlon_idirect_both" 4
335			 (and (eq_attr "cpu" "athlon,k8")
336			      (and (eq_attr "athlon_decode" "direct")
337				   (and (eq_attr "unit" "integer,unknown")
338					(eq_attr "memory" "both"))))
339			 "athlon-direct,athlon-load,
340			  athlon-ieu,athlon-store,
341			  athlon-store")
342(define_insn_reservation "athlon_idirect_both_amdfam10" 4
343			 (and (eq_attr "cpu" "amdfam10")
344			      (and (eq_attr "amdfam10_decode" "direct")
345				   (and (eq_attr "unit" "integer,unknown")
346					(eq_attr "memory" "both"))))
347			 "athlon-direct,athlon-load,
348			  athlon-ieu,athlon-store,
349			  athlon-store")
350
351(define_insn_reservation "athlon_ivector_both" 6
352			 (and (eq_attr "cpu" "athlon,k8")
353			      (and (eq_attr "athlon_decode" "vector")
354				   (and (eq_attr "unit" "integer,unknown")
355					(eq_attr "memory" "both"))))
356			 "athlon-vector,athlon-load,
357			  athlon-ieu,
358			  athlon-ieu,
359			  athlon-store")
360(define_insn_reservation "athlon_ivector_both_amdfam10" 6
361			 (and (eq_attr "cpu" "amdfam10")
362			      (and (eq_attr "amdfam10_decode" "vector")
363				   (and (eq_attr "unit" "integer,unknown")
364					(eq_attr "memory" "both"))))
365			 "athlon-vector,athlon-load,
366			  athlon-ieu,
367			  athlon-ieu,
368			  athlon-store")
369
370(define_insn_reservation "athlon_idirect_store" 1
371			 (and (eq_attr "cpu" "athlon,k8")
372			      (and (eq_attr "athlon_decode" "direct")
373				   (and (eq_attr "unit" "integer,unknown")
374					(eq_attr "memory" "store"))))
375			 "athlon-direct,(athlon-ieu+athlon-agu),
376			  athlon-store")
377(define_insn_reservation "athlon_idirect_store_amdfam10" 1
378			 (and (eq_attr "cpu" "amdfam10")
379			      (and (eq_attr "amdfam10_decode" "direct")
380				   (and (eq_attr "unit" "integer,unknown")
381					(eq_attr "memory" "store"))))
382			 "athlon-direct,(athlon-ieu+athlon-agu),
383			  athlon-store")
384
385(define_insn_reservation "athlon_ivector_store" 2
386			 (and (eq_attr "cpu" "athlon,k8")
387			      (and (eq_attr "athlon_decode" "vector")
388				   (and (eq_attr "unit" "integer,unknown")
389					(eq_attr "memory" "store"))))
390			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
391			  athlon-store")
392(define_insn_reservation "athlon_ivector_store_amdfam10" 2
393			 (and (eq_attr "cpu" "amdfam10")
394			      (and (eq_attr "amdfam10_decode" "vector")
395				   (and (eq_attr "unit" "integer,unknown")
396					(eq_attr "memory" "store"))))
397			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
398			  athlon-store")
399
400;; Athlon floatin point unit
401(define_insn_reservation "athlon_fldxf" 12
402			 (and (eq_attr "cpu" "athlon")
403			      (and (eq_attr "type" "fmov")
404				   (and (eq_attr "memory" "load")
405					(eq_attr "mode" "XF"))))
406			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
407(define_insn_reservation "athlon_fldxf_k8" 13
408			 (and (eq_attr "cpu" "k8,amdfam10")
409			      (and (eq_attr "type" "fmov")
410				   (and (eq_attr "memory" "load")
411					(eq_attr "mode" "XF"))))
412			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
413;; Assume superforwarding to take place so effective latency of fany op is 0.
414(define_insn_reservation "athlon_fld" 0
415			 (and (eq_attr "cpu" "athlon")
416			      (and (eq_attr "type" "fmov")
417				   (eq_attr "memory" "load")))
418			 "athlon-direct,athlon-fpload,athlon-fany")
419(define_insn_reservation "athlon_fld_k8" 2
420			 (and (eq_attr "cpu" "k8,amdfam10")
421			      (and (eq_attr "type" "fmov")
422				   (eq_attr "memory" "load")))
423			 "athlon-direct,athlon-fploadk8,athlon-fstore")
424
425(define_insn_reservation "athlon_fstxf" 10
426			 (and (eq_attr "cpu" "athlon")
427			      (and (eq_attr "type" "fmov")
428				   (and (eq_attr "memory" "store,both")
429					(eq_attr "mode" "XF"))))
430			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
431(define_insn_reservation "athlon_fstxf_k8" 8
432			 (and (eq_attr "cpu" "k8,amdfam10")
433			      (and (eq_attr "type" "fmov")
434				   (and (eq_attr "memory" "store,both")
435					(eq_attr "mode" "XF"))))
436			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
437(define_insn_reservation "athlon_fst" 4
438			 (and (eq_attr "cpu" "athlon")
439			      (and (eq_attr "type" "fmov")
440				   (eq_attr "memory" "store,both")))
441			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
442(define_insn_reservation "athlon_fst_k8" 2
443			 (and (eq_attr "cpu" "k8,amdfam10")
444			      (and (eq_attr "type" "fmov")
445				   (eq_attr "memory" "store,both")))
446			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
447(define_insn_reservation "athlon_fist" 4
448			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
449			      (eq_attr "type" "fistp,fisttp"))
450			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
451(define_insn_reservation "athlon_fmov" 2
452			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
453			      (eq_attr "type" "fmov"))
454			 "athlon-direct,athlon-fpsched,athlon-faddmul")
455(define_insn_reservation "athlon_fadd_load" 4
456			 (and (eq_attr "cpu" "athlon")
457			      (and (eq_attr "type" "fop")
458				   (eq_attr "memory" "load")))
459			 "athlon-direct,athlon-fpload,athlon-fadd")
460(define_insn_reservation "athlon_fadd_load_k8" 6
461			 (and (eq_attr "cpu" "k8,amdfam10")
462			      (and (eq_attr "type" "fop")
463				   (eq_attr "memory" "load")))
464			 "athlon-direct,athlon-fploadk8,athlon-fadd")
465(define_insn_reservation "athlon_fadd" 4
466			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
467			      (eq_attr "type" "fop"))
468			 "athlon-direct,athlon-fpsched,athlon-fadd")
469(define_insn_reservation "athlon_fmul_load" 4
470			 (and (eq_attr "cpu" "athlon")
471			      (and (eq_attr "type" "fmul")
472				   (eq_attr "memory" "load")))
473			 "athlon-direct,athlon-fpload,athlon-fmul")
474(define_insn_reservation "athlon_fmul_load_k8" 6
475			 (and (eq_attr "cpu" "k8,amdfam10")
476			      (and (eq_attr "type" "fmul")
477				   (eq_attr "memory" "load")))
478			 "athlon-direct,athlon-fploadk8,athlon-fmul")
479(define_insn_reservation "athlon_fmul" 4
480			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
481			      (eq_attr "type" "fmul"))
482			 "athlon-direct,athlon-fpsched,athlon-fmul")
483(define_insn_reservation "athlon_fsgn" 2
484			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
485			      (eq_attr "type" "fsgn"))
486			 "athlon-direct,athlon-fpsched,athlon-fmul")
487(define_insn_reservation "athlon_fdiv_load" 24
488			 (and (eq_attr "cpu" "athlon")
489			      (and (eq_attr "type" "fdiv")
490				   (eq_attr "memory" "load")))
491			 "athlon-direct,athlon-fpload,athlon-fmul")
492(define_insn_reservation "athlon_fdiv_load_k8" 13
493			 (and (eq_attr "cpu" "k8,amdfam10")
494			      (and (eq_attr "type" "fdiv")
495				   (eq_attr "memory" "load")))
496			 "athlon-direct,athlon-fploadk8,athlon-fmul")
497(define_insn_reservation "athlon_fdiv" 24
498			 (and (eq_attr "cpu" "athlon")
499			      (eq_attr "type" "fdiv"))
500			 "athlon-direct,athlon-fpsched,athlon-fmul")
501(define_insn_reservation "athlon_fdiv_k8" 11
502			 (and (eq_attr "cpu" "k8,amdfam10")
503			      (eq_attr "type" "fdiv"))
504			 "athlon-direct,athlon-fpsched,athlon-fmul")
505(define_insn_reservation "athlon_fpspc_load" 103
506			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
507			      (and (eq_attr "type" "fpspc")
508				   (eq_attr "memory" "load")))
509			 "athlon-vector,athlon-fpload,athlon-fvector")
510(define_insn_reservation "athlon_fpspc" 100
511			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
512			      (eq_attr "type" "fpspc"))
513			 "athlon-vector,athlon-fpsched,athlon-fvector")
514(define_insn_reservation "athlon_fcmov_load" 7
515			 (and (eq_attr "cpu" "athlon")
516			      (and (eq_attr "type" "fcmov")
517				   (eq_attr "memory" "load")))
518			 "athlon-vector,athlon-fpload,athlon-fvector")
519(define_insn_reservation "athlon_fcmov" 7
520			 (and (eq_attr "cpu" "athlon")
521			      (eq_attr "type" "fcmov"))
522			 "athlon-vector,athlon-fpsched,athlon-fvector")
523(define_insn_reservation "athlon_fcmov_load_k8" 17
524			 (and (eq_attr "cpu" "k8,amdfam10")
525			      (and (eq_attr "type" "fcmov")
526				   (eq_attr "memory" "load")))
527			 "athlon-vector,athlon-fploadk8,athlon-fvector")
528(define_insn_reservation "athlon_fcmov_k8" 15
529			 (and (eq_attr "cpu" "k8,amdfam10")
530			      (eq_attr "type" "fcmov"))
531			 "athlon-vector,athlon-fpsched,athlon-fvector")
532;; fcomi is vector decoded by uses only one pipe.
533(define_insn_reservation "athlon_fcomi_load" 3
534			 (and (eq_attr "cpu" "athlon")
535			      (and (eq_attr "type" "fcmp")
536				   (and (eq_attr "athlon_decode" "vector")
537				        (eq_attr "memory" "load"))))
538			 "athlon-vector,athlon-fpload,athlon-fadd")
539(define_insn_reservation "athlon_fcomi_load_k8" 5
540			 (and (eq_attr "cpu" "k8,amdfam10")
541			      (and (eq_attr "type" "fcmp")
542				   (and (eq_attr "athlon_decode" "vector")
543				        (eq_attr "memory" "load"))))
544			 "athlon-vector,athlon-fploadk8,athlon-fadd")
545(define_insn_reservation "athlon_fcomi" 3
546			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
547			      (and (eq_attr "athlon_decode" "vector")
548				   (eq_attr "type" "fcmp")))
549			 "athlon-vector,athlon-fpsched,athlon-fadd")
550(define_insn_reservation "athlon_fcom_load" 2
551			 (and (eq_attr "cpu" "athlon")
552			      (and (eq_attr "type" "fcmp")
553				   (eq_attr "memory" "load")))
554			 "athlon-direct,athlon-fpload,athlon-fadd")
555(define_insn_reservation "athlon_fcom_load_k8" 4
556			 (and (eq_attr "cpu" "k8,amdfam10")
557			      (and (eq_attr "type" "fcmp")
558				   (eq_attr "memory" "load")))
559			 "athlon-direct,athlon-fploadk8,athlon-fadd")
560(define_insn_reservation "athlon_fcom" 2
561			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
562			      (eq_attr "type" "fcmp"))
563			 "athlon-direct,athlon-fpsched,athlon-fadd")
564;; Never seen by the scheduler because we still don't do post reg-stack
565;; scheduling.
566;(define_insn_reservation "athlon_fxch" 2
567;			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
568;			      (eq_attr "type" "fxch"))
569;			 "athlon-direct,athlon-fpsched,athlon-fany")
570
571;; Athlon handle MMX operations in the FPU unit with shorter latencies
572
573(define_insn_reservation "athlon_movlpd_load" 0
574			 (and (eq_attr "cpu" "athlon")
575			      (and (eq_attr "type" "ssemov")
576				   (match_operand:DF 1 "memory_operand")))
577			 "athlon-direct,athlon-fpload,athlon-fany")
578(define_insn_reservation "athlon_movlpd_load_k8" 2
579			 (and (eq_attr "cpu" "k8")
580			      (and (eq_attr "type" "ssemov")
581				   (match_operand:DF 1 "memory_operand")))
582			 "athlon-direct,athlon-fploadk8,athlon-fstore")
583(define_insn_reservation "athlon_movaps_load_k8" 2
584			 (and (eq_attr "cpu" "k8")
585			      (and (eq_attr "type" "ssemov")
586				   (and (eq_attr "mode" "V4SF,V2DF,TI")
587					(eq_attr "memory" "load"))))
588			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
589(define_insn_reservation "athlon_movaps_load" 0
590			 (and (eq_attr "cpu" "athlon")
591			      (and (eq_attr "type" "ssemov")
592				   (and (eq_attr "mode" "V4SF,V2DF,TI")
593					(eq_attr "memory" "load"))))
594			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
595(define_insn_reservation "athlon_movss_load" 1
596			 (and (eq_attr "cpu" "athlon")
597			      (and (eq_attr "type" "ssemov")
598				   (and (eq_attr "mode" "SF,DI")
599					(eq_attr "memory" "load"))))
600			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
601(define_insn_reservation "athlon_movss_load_k8" 1
602			 (and (eq_attr "cpu" "k8")
603			      (and (eq_attr "type" "ssemov")
604				   (and (eq_attr "mode" "SF,DI")
605					(eq_attr "memory" "load"))))
606			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
607(define_insn_reservation "athlon_mmxsseld" 0
608			 (and (eq_attr "cpu" "athlon")
609			      (and (eq_attr "type" "mmxmov,ssemov")
610				   (eq_attr "memory" "load")))
611			 "athlon-direct,athlon-fpload,athlon-fany")
612(define_insn_reservation "athlon_mmxsseld_k8" 2
613			 (and (eq_attr "cpu" "k8")
614			      (and (eq_attr "type" "mmxmov,ssemov")
615				   (eq_attr "memory" "load")))
616			 "athlon-direct,athlon-fploadk8,athlon-fstore")
617;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
618;; loads  generated are direct path, latency of 2 and do not use any FP
619;; executions units. No separate entries for movlpx/movhpx loads, which
620;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
621;; as they will not be generated.
622(define_insn_reservation "athlon_sseld_amdfam10" 2
623			 (and (eq_attr "cpu" "amdfam10")
624			      (and (eq_attr "type" "ssemov")
625				   (eq_attr "memory" "load")))
626			 "athlon-direct,athlon-fploadk8")
627;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
628;; and can use any  FP executions units
629(define_insn_reservation "athlon_mmxld_amdfam10" 4
630			 (and (eq_attr "cpu" "amdfam10")
631			      (and (eq_attr "type" "mmxmov")
632				   (eq_attr "memory" "load")))
633			 "athlon-direct,athlon-fploadk8, athlon-fany")
634(define_insn_reservation "athlon_mmxssest" 3
635			 (and (eq_attr "cpu" "k8")
636			      (and (eq_attr "type" "mmxmov,ssemov")
637				   (and (eq_attr "mode" "V4SF,V2DF,TI")
638					(eq_attr "memory" "store,both"))))
639			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
640(define_insn_reservation "athlon_mmxssest_k8" 3
641			 (and (eq_attr "cpu" "k8")
642			      (and (eq_attr "type" "mmxmov,ssemov")
643				   (and (eq_attr "mode" "V4SF,V2DF,TI")
644					(eq_attr "memory" "store,both"))))
645			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
646(define_insn_reservation "athlon_mmxssest_short" 2
647			 (and (eq_attr "cpu" "athlon,k8")
648			      (and (eq_attr "type" "mmxmov,ssemov")
649				   (eq_attr "memory" "store,both")))
650			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
651;; On AMDFAM10 all double, single and integer packed SSEx data stores
652;; generated are all double path, latency of 2 and use the FSTORE FP
653;; execution unit. No entries separate for movupx/movdqu, which are
654;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
655;; as they will not be generated.
656(define_insn_reservation "athlon_ssest_amdfam10" 2
657			 (and (eq_attr "cpu" "amdfam10")
658			      (and (eq_attr "type" "ssemov")
659				   (and (eq_attr "mode" "V4SF,V2DF,TI")
660					(eq_attr "memory" "store,both"))))
661			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
662;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
663;; data stores generated are all direct path, latency of 2 and use
664;; the FSTORE FP execution unit
665(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
666			 (and (eq_attr "cpu" "amdfam10")
667			      (and (eq_attr "type" "mmxmov,ssemov")
668				   (eq_attr "memory" "store,both")))
669			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
670(define_insn_reservation "athlon_movaps_k8" 2
671			 (and (eq_attr "cpu" "k8")
672			      (and (eq_attr "type" "ssemov")
673				   (eq_attr "mode" "V4SF,V2DF,TI")))
674			 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
675(define_insn_reservation "athlon_movaps" 2
676			 (and (eq_attr "cpu" "athlon")
677			      (and (eq_attr "type" "ssemov")
678				   (eq_attr "mode" "V4SF,V2DF,TI")))
679			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
680(define_insn_reservation "athlon_mmxssemov" 2
681			 (and (eq_attr "cpu" "athlon,k8")
682			      (eq_attr "type" "mmxmov,ssemov"))
683			 "athlon-direct,athlon-fpsched,athlon-faddmul")
684(define_insn_reservation "athlon_mmxmul_load" 4
685			 (and (eq_attr "cpu" "athlon,k8")
686			      (and (eq_attr "type" "mmxmul")
687				   (eq_attr "memory" "load")))
688			 "athlon-direct,athlon-fpload,athlon-fmul")
689(define_insn_reservation "athlon_mmxmul" 3
690			 (and (eq_attr "cpu" "athlon,k8")
691			      (eq_attr "type" "mmxmul"))
692			 "athlon-direct,athlon-fpsched,athlon-fmul")
693(define_insn_reservation "athlon_mmx_load" 3
694			 (and (eq_attr "cpu" "athlon,k8")
695			      (and (eq_attr "unit" "mmx")
696				   (eq_attr "memory" "load")))
697			 "athlon-direct,athlon-fpload,athlon-faddmul")
698(define_insn_reservation "athlon_mmx" 2
699			 (and (eq_attr "cpu" "athlon,k8")
700			      (eq_attr "unit" "mmx"))
701			 "athlon-direct,athlon-fpsched,athlon-faddmul")
702;; SSE operations are handled by the i387 unit as well.  The latency
703;; is same as for i387 operations for scalar operations
704
705(define_insn_reservation "athlon_sselog_load" 3
706			 (and (eq_attr "cpu" "athlon")
707			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
708				   (eq_attr "memory" "load")))
709			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
710(define_insn_reservation "athlon_sselog_load_k8" 5
711			 (and (eq_attr "cpu" "k8")
712			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
713				   (eq_attr "memory" "load")))
714			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
715(define_insn_reservation "athlon_sselog_load_amdfam10" 4
716			 (and (eq_attr "cpu" "amdfam10")
717			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
718				   (eq_attr "memory" "load")))
719			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
720(define_insn_reservation "athlon_sselog" 3
721			 (and (eq_attr "cpu" "athlon")
722			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
723			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
724(define_insn_reservation "athlon_sselog_k8" 3
725			 (and (eq_attr "cpu" "k8")
726			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
727			 "athlon-double,athlon-fpsched,athlon-fmul")
728(define_insn_reservation "athlon_sselog_amdfam10" 2
729			 (and (eq_attr "cpu" "amdfam10")
730			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
731			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
732
733;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
734(define_insn_reservation "athlon_ssecmp_load" 2
735			 (and (eq_attr "cpu" "athlon")
736			      (and (eq_attr "type" "ssecmp")
737				   (and (eq_attr "mode" "SF,DF,DI")
738					(eq_attr "memory" "load"))))
739			 "athlon-direct,athlon-fpload,athlon-fadd")
740(define_insn_reservation "athlon_ssecmp_load_k8" 4
741			 (and (eq_attr "cpu" "k8,amdfam10")
742			      (and (eq_attr "type" "ssecmp")
743				   (and (eq_attr "mode" "SF,DF,DI,TI")
744					(eq_attr "memory" "load"))))
745			 "athlon-direct,athlon-fploadk8,athlon-fadd")
746(define_insn_reservation "athlon_ssecmp" 2
747			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
748			      (and (eq_attr "type" "ssecmp")
749				   (eq_attr "mode" "SF,DF,DI,TI")))
750			 "athlon-direct,athlon-fpsched,athlon-fadd")
751(define_insn_reservation "athlon_ssecmpvector_load" 3
752			 (and (eq_attr "cpu" "athlon")
753			      (and (eq_attr "type" "ssecmp")
754				   (eq_attr "memory" "load")))
755			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
756(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
757			 (and (eq_attr "cpu" "k8")
758			      (and (eq_attr "type" "ssecmp")
759				   (eq_attr "memory" "load")))
760			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
761(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
762			 (and (eq_attr "cpu" "amdfam10")
763			      (and (eq_attr "type" "ssecmp")
764				   (eq_attr "memory" "load")))
765			 "athlon-direct,athlon-fploadk8,athlon-fadd")
766(define_insn_reservation "athlon_ssecmpvector" 3
767			 (and (eq_attr "cpu" "athlon")
768			      (eq_attr "type" "ssecmp"))
769			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
770(define_insn_reservation "athlon_ssecmpvector_k8" 3
771			 (and (eq_attr "cpu" "k8")
772			      (eq_attr "type" "ssecmp"))
773			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
774(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
775			 (and (eq_attr "cpu" "amdfam10")
776			      (eq_attr "type" "ssecmp"))
777			 "athlon-direct,athlon-fpsched,athlon-fadd")
778(define_insn_reservation "athlon_ssecomi_load" 4
779			 (and (eq_attr "cpu" "athlon")
780			      (and (eq_attr "type" "ssecomi")
781				   (eq_attr "memory" "load")))
782			 "athlon-vector,athlon-fpload,athlon-fadd")
783(define_insn_reservation "athlon_ssecomi_load_k8" 6
784			 (and (eq_attr "cpu" "k8")
785			      (and (eq_attr "type" "ssecomi")
786				   (eq_attr "memory" "load")))
787			 "athlon-vector,athlon-fploadk8,athlon-fadd")
788(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
789			 (and (eq_attr "cpu" "amdfam10")
790			      (and (eq_attr "type" "ssecomi")
791				   (eq_attr "memory" "load")))
792			 "athlon-direct,athlon-fploadk8,athlon-fadd")
793(define_insn_reservation "athlon_ssecomi" 4
794			 (and (eq_attr "cpu" "athlon,k8")
795			      (eq_attr "type" "ssecomi"))
796			 "athlon-vector,athlon-fpsched,athlon-fadd")
797(define_insn_reservation "athlon_ssecomi_amdfam10" 3
798			 (and (eq_attr "cpu" "amdfam10")
799;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
800			      (eq_attr "type" "ssecomi"))
801			 "athlon-direct,athlon-fpsched,athlon-fadd")
802(define_insn_reservation "athlon_sseadd_load" 4
803			 (and (eq_attr "cpu" "athlon")
804			      (and (eq_attr "type" "sseadd,sseadd1")
805				   (and (eq_attr "mode" "SF,DF,DI")
806					(eq_attr "memory" "load"))))
807			 "athlon-direct,athlon-fpload,athlon-fadd")
808(define_insn_reservation "athlon_sseadd_load_k8" 6
809			 (and (eq_attr "cpu" "k8,amdfam10")
810			      (and (eq_attr "type" "sseadd,sseadd1")
811				   (and (eq_attr "mode" "SF,DF,DI")
812					(eq_attr "memory" "load"))))
813			 "athlon-direct,athlon-fploadk8,athlon-fadd")
814(define_insn_reservation "athlon_sseadd" 4
815			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
816			      (and (eq_attr "type" "sseadd,sseadd1")
817				   (eq_attr "mode" "SF,DF,DI")))
818			 "athlon-direct,athlon-fpsched,athlon-fadd")
819(define_insn_reservation "athlon_sseaddvector_load" 5
820			 (and (eq_attr "cpu" "athlon")
821			      (and (eq_attr "type" "sseadd,sseadd1")
822				   (eq_attr "memory" "load")))
823			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
824(define_insn_reservation "athlon_sseaddvector_load_k8" 7
825			 (and (eq_attr "cpu" "k8")
826			      (and (eq_attr "type" "sseadd,sseadd1")
827				   (eq_attr "memory" "load")))
828			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
829(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
830			 (and (eq_attr "cpu" "amdfam10")
831			      (and (eq_attr "type" "sseadd,sseadd1")
832				   (eq_attr "memory" "load")))
833			 "athlon-direct,athlon-fploadk8,athlon-fadd")
834(define_insn_reservation "athlon_sseaddvector" 5
835			 (and (eq_attr "cpu" "athlon")
836			      (eq_attr "type" "sseadd,sseadd1"))
837			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
838(define_insn_reservation "athlon_sseaddvector_k8" 5
839			 (and (eq_attr "cpu" "k8")
840			      (eq_attr "type" "sseadd,sseadd1"))
841			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
842(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
843			 (and (eq_attr "cpu" "amdfam10")
844			      (eq_attr "type" "sseadd,sseadd1"))
845			 "athlon-direct,athlon-fpsched,athlon-fadd")
846
847;; Conversions behaves very irregularly and the scheduling is critical here.
848;; Take each instruction separately.  Assume that the mode is always set to the
849;; destination one and athlon_decode is set to the K8 versions.
850
851;; cvtss2sd
852(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
853			 (and (eq_attr "cpu" "k8,athlon")
854			      (and (eq_attr "type" "ssecvt")
855				   (and (eq_attr "athlon_decode" "direct")
856					(and (eq_attr "mode" "DF")
857					     (eq_attr "memory" "load")))))
858			 "athlon-direct,athlon-fploadk8,athlon-fstore")
859(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
860			 (and (eq_attr "cpu" "amdfam10")
861			      (and (eq_attr "type" "ssecvt")
862				   (and (eq_attr "amdfam10_decode" "double")
863					(and (eq_attr "mode" "DF")
864					     (eq_attr "memory" "load")))))
865			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
866(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
867			 (and (eq_attr "cpu" "athlon,k8")
868			      (and (eq_attr "type" "ssecvt")
869				   (and (eq_attr "athlon_decode" "direct")
870					(eq_attr "mode" "DF"))))
871			 "athlon-direct,athlon-fpsched,athlon-fstore")
872(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
873			 (and (eq_attr "cpu" "amdfam10")
874			      (and (eq_attr "type" "ssecvt")
875				   (and (eq_attr "amdfam10_decode" "vector")
876					(eq_attr "mode" "DF"))))
877			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
878;; cvtps2pd.  Model same way the other double decoded FP conversions.
879(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
880			 (and (eq_attr "cpu" "k8,athlon")
881			      (and (eq_attr "type" "ssecvt")
882				   (and (eq_attr "athlon_decode" "double")
883					(and (eq_attr "mode" "V2DF,V4SF,TI")
884					     (eq_attr "memory" "load")))))
885			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
886(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
887			 (and (eq_attr "cpu" "amdfam10")
888			      (and (eq_attr "type" "ssecvt")
889				   (and (eq_attr "amdfam10_decode" "direct")
890					(and (eq_attr "mode" "V2DF,V4SF,TI")
891					     (eq_attr "memory" "load")))))
892			 "athlon-direct,athlon-fploadk8,athlon-fstore")
893(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
894			 (and (eq_attr "cpu" "k8,athlon")
895			      (and (eq_attr "type" "ssecvt")
896				   (and (eq_attr "athlon_decode" "double")
897					(eq_attr "mode" "V2DF,V4SF,TI"))))
898			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
899(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
900			 (and (eq_attr "cpu" "amdfam10")
901			      (and (eq_attr "type" "ssecvt")
902				   (and (eq_attr "amdfam10_decode" "direct")
903					(eq_attr "mode" "V2DF,V4SF,TI"))))
904			 "athlon-direct,athlon-fpsched,athlon-fstore")
905;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
906;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
907(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
908			 (and (eq_attr "cpu" "athlon,k8")
909			      (and (eq_attr "type" "sseicvt")
910				   (and (eq_attr "athlon_decode" "direct")
911					(and (eq_attr "mode" "SF,DF")
912					     (eq_attr "memory" "load")))))
913			 "athlon-direct,athlon-fploadk8,athlon-fstore")
914(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
915			 (and (eq_attr "cpu" "amdfam10")
916			      (and (eq_attr "type" "sseicvt")
917				   (and (eq_attr "amdfam10_decode" "double")
918					(and (eq_attr "mode" "SF,DF")
919					     (eq_attr "memory" "load")))))
920			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
921;; cvtsi2ss mem, reg is doublepath
922(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
923			 (and (eq_attr "cpu" "athlon")
924			      (and (eq_attr "type" "sseicvt")
925				   (and (eq_attr "athlon_decode" "double")
926					(and (eq_attr "mode" "SF,DF")
927					     (eq_attr "memory" "load")))))
928			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
929(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
930			 (and (eq_attr "cpu" "k8")
931			      (and (eq_attr "type" "sseicvt")
932				   (and (eq_attr "athlon_decode" "double")
933					(and (eq_attr "mode" "SF,DF")
934					     (eq_attr "memory" "load")))))
935			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
936(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
937			 (and (eq_attr "cpu" "amdfam10")
938			      (and (eq_attr "type" "sseicvt")
939				   (and (eq_attr "amdfam10_decode" "double")
940					(and (eq_attr "mode" "SF,DF")
941					     (eq_attr "memory" "load")))))
942			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
943;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
944(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
945			 (and (eq_attr "cpu" "k8,athlon")
946			      (and (eq_attr "type" "sseicvt")
947				   (and (eq_attr "athlon_decode" "double")
948					(and (eq_attr "mode" "SF,DF")
949					     (eq_attr "memory" "none")))))
950			 "athlon-double,athlon-fploadk8,athlon-fstore")
951(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
952			 (and (eq_attr "cpu" "amdfam10")
953			      (and (eq_attr "type" "sseicvt")
954				   (and (eq_attr "amdfam10_decode" "vector")
955					(and (eq_attr "mode" "SF,DF")
956					     (eq_attr "memory" "none")))))
957			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
958;; cvtsi2ss reg, reg is doublepath
959(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
960			 (and (eq_attr "cpu" "athlon,k8")
961			      (and (eq_attr "type" "sseicvt")
962				   (and (eq_attr "athlon_decode" "vector")
963					(and (eq_attr "mode" "SF,DF")
964					     (eq_attr "memory" "none")))))
965			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
966(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
967			 (and (eq_attr "cpu" "amdfam10")
968			      (and (eq_attr "type" "sseicvt")
969				   (and (eq_attr "amdfam10_decode" "vector")
970					(and (eq_attr "mode" "SF,DF")
971					     (eq_attr "memory" "none")))))
972			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
973;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
974(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
975			 (and (eq_attr "cpu" "k8,athlon")
976			      (and (eq_attr "type" "ssecvt")
977				   (and (eq_attr "athlon_decode" "double")
978					(and (eq_attr "mode" "SF")
979					     (eq_attr "memory" "load")))))
980			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
981(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
982			 (and (eq_attr "cpu" "amdfam10")
983			      (and (eq_attr "type" "ssecvt")
984				   (and (eq_attr "amdfam10_decode" "double")
985					(and (eq_attr "mode" "SF")
986					     (eq_attr "memory" "load")))))
987			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
988;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
989(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
990			 (and (eq_attr "cpu" "athlon,k8")
991			      (and (eq_attr "type" "ssecvt")
992				   (and (eq_attr "athlon_decode" "vector")
993					(and (eq_attr "mode" "SF")
994					     (eq_attr "memory" "none")))))
995			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
996(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
997			 (and (eq_attr "cpu" "amdfam10")
998			      (and (eq_attr "type" "ssecvt")
999				   (and (eq_attr "amdfam10_decode" "vector")
1000					(and (eq_attr "mode" "SF")
1001					     (eq_attr "memory" "none")))))
1002			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
1003(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
1004			 (and (eq_attr "cpu" "athlon,k8")
1005			      (and (eq_attr "type" "ssecvt")
1006				   (and (eq_attr "athlon_decode" "vector")
1007					(and (eq_attr "mode" "V4SF,V2DF,TI")
1008					     (eq_attr "memory" "load")))))
1009			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
1010(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
1011			 (and (eq_attr "cpu" "amdfam10")
1012			      (and (eq_attr "type" "ssecvt")
1013				   (and (eq_attr "amdfam10_decode" "double")
1014					(and (eq_attr "mode" "V4SF,V2DF,TI")
1015					     (eq_attr "memory" "load")))))
1016			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1017;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
1018;; ??? Why it is fater than cvtsd2ss?
1019(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
1020			 (and (eq_attr "cpu" "athlon,k8")
1021			      (and (eq_attr "type" "ssecvt")
1022				   (and (eq_attr "athlon_decode" "vector")
1023					(and (eq_attr "mode" "V4SF,V2DF,TI")
1024					     (eq_attr "memory" "none")))))
1025			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
1026(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
1027			 (and (eq_attr "cpu" "amdfam10")
1028			      (and (eq_attr "type" "ssecvt")
1029				   (and (eq_attr "amdfam10_decode" "double")
1030					(and (eq_attr "mode" "V4SF,V2DF,TI")
1031					     (eq_attr "memory" "none")))))
1032			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1033;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
1034(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
1035			 (and (eq_attr "cpu" "athlon,k8")
1036			      (and (eq_attr "type" "sseicvt")
1037				   (and (eq_attr "athlon_decode" "vector")
1038					(and (eq_attr "mode" "SI,DI")
1039					     (eq_attr "memory" "load")))))
1040			 "athlon-vector,athlon-fploadk8,athlon-fvector")
1041(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
1042			 (and (eq_attr "cpu" "amdfam10")
1043			      (and (eq_attr "type" "sseicvt")
1044				   (and (eq_attr "amdfam10_decode" "double")
1045					(and (eq_attr "mode" "SI,DI")
1046					     (eq_attr "memory" "load")))))
1047			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
1048;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
1049(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
1050			 (and (eq_attr "cpu" "athlon")
1051			      (and (eq_attr "type" "sseicvt")
1052				   (and (eq_attr "athlon_decode" "double")
1053					(and (eq_attr "mode" "SI,DI")
1054					     (eq_attr "memory" "none")))))
1055			 "athlon-vector,athlon-fpsched,athlon-fvector")
1056(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
1057			 (and (eq_attr "cpu" "k8")
1058			      (and (eq_attr "type" "sseicvt")
1059				   (and (eq_attr "athlon_decode" "double")
1060					(and (eq_attr "mode" "SI,DI")
1061					     (eq_attr "memory" "none")))))
1062			 "athlon-double,athlon-fpsched,athlon-fstore")
1063(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
1064			 (and (eq_attr "cpu" "amdfam10")
1065			      (and (eq_attr "type" "sseicvt")
1066				   (and (eq_attr "amdfam10_decode" "double")
1067					(and (eq_attr "mode" "SI,DI")
1068					     (eq_attr "memory" "none")))))
1069			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
1070;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
1071(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
1072			 (and (eq_attr "cpu" "amdfam10")
1073			      (and (eq_attr "type" "sseicvt")
1074				   (and (eq_attr "amdfam10_decode" "double")
1075					(and (eq_attr "mode" "TI")
1076					     (eq_attr "memory" "load")))))
1077			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1078;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
1079(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
1080			 (and (eq_attr "cpu" "amdfam10")
1081			      (and (eq_attr "type" "sseicvt")
1082				   (and (eq_attr "amdfam10_decode" "double")
1083					(and (eq_attr "mode" "TI")
1084					     (eq_attr "memory" "none")))))
1085			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1086
1087
1088(define_insn_reservation "athlon_ssemul_load" 4
1089			 (and (eq_attr "cpu" "athlon")
1090			      (and (eq_attr "type" "ssemul")
1091				   (and (eq_attr "mode" "SF,DF")
1092					(eq_attr "memory" "load"))))
1093			 "athlon-direct,athlon-fpload,athlon-fmul")
1094(define_insn_reservation "athlon_ssemul_load_k8" 6
1095			 (and (eq_attr "cpu" "k8,amdfam10")
1096			      (and (eq_attr "type" "ssemul")
1097				   (and (eq_attr "mode" "SF,DF")
1098					(eq_attr "memory" "load"))))
1099			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1100(define_insn_reservation "athlon_ssemul" 4
1101			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
1102			      (and (eq_attr "type" "ssemul")
1103				   (eq_attr "mode" "SF,DF")))
1104			 "athlon-direct,athlon-fpsched,athlon-fmul")
1105(define_insn_reservation "athlon_ssemulvector_load" 5
1106			 (and (eq_attr "cpu" "athlon")
1107			      (and (eq_attr "type" "ssemul")
1108				   (eq_attr "memory" "load")))
1109			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
1110(define_insn_reservation "athlon_ssemulvector_load_k8" 7
1111			 (and (eq_attr "cpu" "k8")
1112			      (and (eq_attr "type" "ssemul")
1113				   (eq_attr "memory" "load")))
1114			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
1115(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
1116			 (and (eq_attr "cpu" "amdfam10")
1117			      (and (eq_attr "type" "ssemul")
1118				   (eq_attr "memory" "load")))
1119			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1120(define_insn_reservation "athlon_ssemulvector" 5
1121			 (and (eq_attr "cpu" "athlon")
1122			      (eq_attr "type" "ssemul"))
1123			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
1124(define_insn_reservation "athlon_ssemulvector_k8" 5
1125			 (and (eq_attr "cpu" "k8")
1126			      (eq_attr "type" "ssemul"))
1127			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
1128(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
1129			 (and (eq_attr "cpu" "amdfam10")
1130			      (eq_attr "type" "ssemul"))
1131			 "athlon-direct,athlon-fpsched,athlon-fmul")
1132;; divsd timings.  divss is faster
1133(define_insn_reservation "athlon_ssediv_load" 20
1134			 (and (eq_attr "cpu" "athlon")
1135			      (and (eq_attr "type" "ssediv")
1136				   (and (eq_attr "mode" "SF,DF")
1137					(eq_attr "memory" "load"))))
1138			 "athlon-direct,athlon-fpload,athlon-fmul*17")
1139(define_insn_reservation "athlon_ssediv_load_k8" 22
1140			 (and (eq_attr "cpu" "k8,amdfam10")
1141			      (and (eq_attr "type" "ssediv")
1142				   (and (eq_attr "mode" "SF,DF")
1143					(eq_attr "memory" "load"))))
1144			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1145(define_insn_reservation "athlon_ssediv" 20
1146			 (and (eq_attr "cpu" "athlon,k8,amdfam10")
1147			      (and (eq_attr "type" "ssediv")
1148				   (eq_attr "mode" "SF,DF")))
1149			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
1150(define_insn_reservation "athlon_ssedivvector_load" 39
1151			 (and (eq_attr "cpu" "athlon")
1152			      (and (eq_attr "type" "ssediv")
1153				   (eq_attr "memory" "load")))
1154			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
1155(define_insn_reservation "athlon_ssedivvector_load_k8" 35
1156			 (and (eq_attr "cpu" "k8")
1157			      (and (eq_attr "type" "ssediv")
1158				   (eq_attr "memory" "load")))
1159			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
1160(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
1161			 (and (eq_attr "cpu" "amdfam10")
1162			      (and (eq_attr "type" "ssediv")
1163				   (eq_attr "memory" "load")))
1164			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1165(define_insn_reservation "athlon_ssedivvector" 39
1166			 (and (eq_attr "cpu" "athlon")
1167			      (eq_attr "type" "ssediv"))
1168			 "athlon-vector,athlon-fmul*34")
1169(define_insn_reservation "athlon_ssedivvector_k8" 39
1170			 (and (eq_attr "cpu" "k8")
1171			      (eq_attr "type" "ssediv"))
1172			 "athlon-double,athlon-fmul*34")
1173(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
1174			 (and (eq_attr "cpu" "amdfam10")
1175			      (eq_attr "type" "ssediv"))
1176			 "athlon-direct,athlon-fmul*17")
1177(define_insn_reservation "athlon_sseins_amdfam10" 5
1178                         (and (eq_attr "cpu" "amdfam10")
1179                              (and (eq_attr "type" "sseins")
1180                                   (eq_attr "mode" "TI")))
1181                         "athlon-vector,athlon-fpsched,athlon-faddmul")
1182