1;; Scheduling for Haswell and derived processors.
2;; Copyright (C) 2004-2021 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.  */
19
20;; The scheduling description in this file is based on core2.md.
21;; The major difference from the CORE2 pipeline is that HASWELL has
22;; two MU for load and one MU for store.
23(define_automaton "haswell_decoder,haswell_core,haswell_idiv,haswell_fdiv,haswell_ssediv,haswell_load,haswell_store")
24
25;; The CPU domain, used for HASWELL bypass latencies
26(define_attr "hsw_domain" "int,float,simd"
27  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
28	   (const_string "float")
29	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
30			  sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
31			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
32	   (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
33		    (const_string "float")
34		  (eq_attr "mode" "SI")
35		    (const_string "int")]
36		  (const_string "simd"))
37	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
38	   (const_string "simd")]
39	(const_string "int")))
40
41(define_cpu_unit "hsw_decoder0" "haswell_decoder")
42(define_cpu_unit "hsw_decoder1" "haswell_decoder")
43(define_cpu_unit "hsw_decoder2" "haswell_decoder")
44(define_cpu_unit "hsw_decoder3" "haswell_decoder")
45
46;; We first wish to find an instruction for hsw_decoder0, so exclude
47;; other hsw_decoders from being reserved until hsw_decoder0 is
48;; reserved.
49(presence_set "hsw_decoder1" "hsw_decoder0")
50(presence_set "hsw_decoder2" "hsw_decoder0")
51(presence_set "hsw_decoder3" "hsw_decoder0")
52
53;; Most instructions can be decoded on any of the three decoders.
54(define_reservation "hsw_decodern" "(hsw_decoder0|hsw_decoder1|hsw_decoder2|hsw_decoder3)")
55
56;; The out-of-order core has eight pipelines.  These are similar to the
57;; Pentium Pro's five pipelines.  Port 2,3 are responsible for memory loads,
58;; port 7 for store address calculations, port 4 for memory stores, and
59;; ports 0, 1, 5 and 6 for everything else.
60
61(define_cpu_unit "hsw_p0,hsw_p1,hsw_p5,hsw_p6" "haswell_core")
62(define_cpu_unit "hsw_p2,hsw_p3" "haswell_load")
63(define_cpu_unit "hsw_p4,hsw_p7" "haswell_store")
64(define_cpu_unit "hsw_idiv" "haswell_idiv")
65(define_cpu_unit "hsw_fdiv" "haswell_fdiv")
66(define_cpu_unit "hsw_ssediv" "haswell_ssediv")
67
68(define_reservation "hsw_p0156" "hsw_p0|hsw_p1|hsw_p5|hsw_p6")
69(define_reservation "hsw_p0p1p5p6" "hsw_p0+hsw_p1+hsw_p5+hsw_p6")
70(define_reservation "hsw_p23" "hsw_p2|hsw_p3")
71(define_reservation "hsw_p4p7" "hsw_p4+hsw_p7")
72(define_reservation "hsw_p237" "hsw_p2|hsw_p3|hsw_p7")
73(define_reservation "hsw_p015" "hsw_p0|hsw_p1|hsw_p5")
74(define_reservation "hsw_p01" "hsw_p0|hsw_p1")
75
76(define_insn_reservation "hsw_complex_insn" 6
77			 (and (eq_attr "cpu" "generic,haswell")
78			      (eq_attr "type" "other,multi,str"))
79			 "hsw_decoder0")
80
81(define_insn_reservation "hsw_call" 1
82			 (and (eq_attr "cpu" "generic,haswell")
83			      (eq_attr "type" "call,callv"))
84			 "hsw_decoder0")
85
86;; imov with memory operands does not use the integer units.
87;; imovx always decodes to one uop, and also doesn't use the integer
88;; units if it has memory operands.
89(define_insn_reservation "hsw_imov" 1
90			 (and (eq_attr "cpu" "generic,haswell")
91			      (and (eq_attr "memory" "none")
92				   (eq_attr "type" "imov,imovx")))
93			 "hsw_decodern,hsw_p0156")
94
95(define_insn_reservation "hsw_imov_load" 2
96			 (and (eq_attr "cpu" "generic,haswell")
97			      (and (eq_attr "memory" "load")
98				   (eq_attr "type" "imov,imovx")))
99			 "hsw_decodern,hsw_p23")
100
101(define_insn_reservation "hsw_imov_store" 3
102			 (and (eq_attr "cpu" "generic,haswell")
103			      (and (eq_attr "memory" "store")
104				   (eq_attr "type" "imov")))
105			 "hsw_decodern,hsw_p4+(hsw_p2|hsw_p3|hsw_p7)")
106
107(define_insn_reservation "hsw_icmov" 2
108			 (and (eq_attr "cpu" "generic,haswell")
109			      (and (eq_attr "memory" "none")
110				   (eq_attr "type" "icmov")))
111			 "hsw_decodern,hsw_p0156,hsw_p0156")
112
113(define_insn_reservation "hsw_icmov_load" 2
114			 (and (eq_attr "cpu" "generic,haswell")
115			      (and (eq_attr "memory" "load")
116				   (eq_attr "type" "icmov")))
117			 "hsw_decodern,hsw_p23+hsw_p0156,hsw_p0156")
118
119(define_insn_reservation "hsw_push_reg" 3
120			 (and (eq_attr "cpu" "generic,haswell")
121			      (and (eq_attr "memory" "store")
122				   (eq_attr "type" "push")))
123			 "hsw_decodern,hsw_p4+hsw_p237")
124
125(define_insn_reservation "hsw_push_mem" 3
126			 (and (eq_attr "cpu" "generic,haswell")
127			      (and (eq_attr "memory" "both")
128				   (eq_attr "type" "push")))
129			 "hsw_decodern,hsw_p4+hsw_p237,hsw_p237")
130
131;; Consider lea latency as having 2 components.
132(define_insn_reservation "hsw_lea" 1
133			 (and (eq_attr "cpu" "generic,haswell")
134			      (and (eq_attr "memory" "none")
135				   (eq_attr "type" "lea")))
136			 "hsw_decodern,hsw_p1|hsw_p5")
137
138(define_insn_reservation "hsw_shift_rotate" 1
139			 (and (eq_attr "cpu" "generic,haswell")
140			      (and (eq_attr "memory" "none")
141				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
142			 "hsw_decodern,hsw_p0|hsw_p6")
143
144(define_insn_reservation "hsw_shift_rotate_mem" 1
145			 (and (eq_attr "cpu" "generic,haswell")
146			      (and (eq_attr "memory" "!none")
147				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
148			 "hsw_decodern,(hsw_p0|hsw_p6)+hsw_p237+hsw_p4")
149
150(define_insn_reservation "hsw_branch" 1
151			 (and (eq_attr "cpu" "generic,haswell")
152			      (and (eq_attr "memory" "none")
153				   (eq_attr "type" "ibr")))
154			 "hsw_decodern,hsw_p6")
155
156(define_insn_reservation "hsw_indirect_branch" 2
157			 (and (eq_attr "cpu" "generic,haswell")
158			      (and (eq_attr "memory" "!none")
159				   (eq_attr "type" "ibr")))
160			 "hsw_decoder0,hsw_p23+hsw_p6")
161
162(define_insn_reservation "hsw_leave" 4
163			 (and (eq_attr "cpu" "generic,haswell")
164			      (eq_attr "type" "leave"))
165			 "hsw_decoder0,hsw_p23+hsw_p0156,hsw_p0156")
166
167;; imul and imulx with two/three operands only execute on port 1.
168(define_insn_reservation "hsw_imul" 3
169			 (and (eq_attr "cpu" "generic,haswell")
170			      (and (eq_attr "memory" "none")
171				   (eq_attr "type" "imul")))
172			 "hsw_decodern,hsw_p1")
173
174(define_insn_reservation "hsw_imul_mem" 3
175			 (and (eq_attr "cpu" "generic,haswell")
176			      (and (eq_attr "memory" "!none")
177				   (eq_attr "type" "imul")))
178			 "hsw_decodern,hsw_p23+hsw_p1")
179
180(define_insn_reservation "hsw_imulx" 4
181			 (and (eq_attr "cpu" "generic,haswell")
182			      (and (eq_attr "memory" "none")
183				   (eq_attr "type" "imulx")))
184			 "hsw_decodern,hsw_p0156,hsw_p0156")
185
186(define_insn_reservation "hsw_imulx_mem" 4
187			 (and (eq_attr "cpu" "generic,haswell")
188			      (and (eq_attr "memory" "!none")
189				   (eq_attr "type" "imulx")))
190			 "hsw_decodern,hsw_p23+hsw_p0156,(hsw_p0|hsw_p6|hsw_p6)")
191
192
193;; div and idiv are very similar, so we model them the same.
194;; Use the same latency for all QI,HI and SI modes.
195(define_insn_reservation "hsw_idiv" 23
196			 (and (eq_attr "cpu" "generic,haswell")
197			      (and (eq_attr "memory" "none")
198				   (eq_attr "type" "idiv")))
199			 "hsw_decoder0,(hsw_p0p1p5p6+hsw_idiv)*9")
200
201(define_insn_reservation "hsw_idiv_load" 23
202			 (and (eq_attr "cpu" "generic,haswell")
203			      (and (eq_attr "memory" "load")
204					(eq_attr "type" "idiv")))
205			 "hsw_decoder0,hsw_p23+hsw_p0+hsw_idiv,(hsw_p0p1p5p6+hsw_idiv)*9")
206
207;; x87 floating point operations.
208
209(define_insn_reservation "hsw_fxch" 0
210			 (and (eq_attr "cpu" "generic,haswell")
211			      (eq_attr "type" "fxch"))
212			 "hsw_decodern")
213
214(define_insn_reservation "hsw_fop" 3
215			 (and (eq_attr "cpu" "generic,haswell")
216			      (and (eq_attr "memory" "none,unknown")
217				   (eq_attr "type" "fop")))
218			 "hsw_decodern,hsw_p1")
219
220(define_insn_reservation "hsw_fop_load" 5
221			 (and (eq_attr "cpu" "generic,haswell")
222			      (and (eq_attr "memory" "load")
223				   (eq_attr "type" "fop")))
224			 "hsw_decodern,hsw_p23+hsw_p1,hsw_p1")
225
226(define_insn_reservation "hsw_fop_store" 3
227			 (and (eq_attr "cpu" "generic,haswell")
228			      (and (eq_attr "memory" "store")
229				   (eq_attr "type" "fop")))
230			 "hsw_decodern,hsw_p0,hsw_p0,hsw_p0+hsw_p4+hsw_p3")
231
232(define_insn_reservation "hsw_fop_both" 5
233			 (and (eq_attr "cpu" "generic,haswell")
234			      (and (eq_attr "memory" "both")
235				   (eq_attr "type" "fop")))
236			 "hsw_decodern,hsw_p2+hsw_p0,hsw_p0+hsw_p4+hsw_p3")
237
238(define_insn_reservation "hsw_fsgn" 1
239			 (and (eq_attr "cpu" "generic,haswell")
240			      (eq_attr "type" "fsgn"))
241			 "hsw_decodern,hsw_p0")
242
243(define_insn_reservation "hsw_fistp" 7
244			 (and (eq_attr "cpu" "generic,haswell")
245			      (eq_attr "type" "fistp"))
246			 "hsw_decoder0,hsw_p1+hsw_p4+hsw_p23")
247
248(define_insn_reservation "hsw_fcmov" 2
249			 (and (eq_attr "cpu" "generic,haswell")
250			      (eq_attr "type" "fcmov"))
251			 "hsw_decoder0,hsw_p0+hsw_p5,hsw_p0")
252
253(define_insn_reservation "hsw_fcmp" 1
254			 (and (eq_attr "cpu" "generic,haswell")
255			      (and (eq_attr "memory" "none")
256				   (eq_attr "type" "fcmp")))
257			 "hsw_decodern,hsw_p1")
258
259(define_insn_reservation "hsw_fcmp_load" 1
260			 (and (eq_attr "cpu" "generic,haswell")
261			      (and (eq_attr "memory" "load")
262				   (eq_attr "type" "fcmp")))
263			 "hsw_decodern,hsw_p23+hsw_p1")
264
265(define_insn_reservation "hsw_fmov" 1
266			 (and (eq_attr "cpu" "generic,haswell")
267			      (and (eq_attr "memory" "none")
268				   (eq_attr "type" "fmov")))
269			 "hsw_decodern,hsw_p01")
270
271(define_insn_reservation "hsw_fmov_load" 3
272			 (and (eq_attr "cpu" "generic,haswell")
273			      (and (eq_attr "memory" "load")
274				   (and (eq_attr "mode" "!XF")
275					(eq_attr "type" "fmov"))))
276			 "hsw_decodern,hsw_p23")
277
278(define_insn_reservation "hsw_fmov_XF_load" 3
279			 (and (eq_attr "cpu" "generic,haswell")
280			      (and (eq_attr "memory" "load")
281				   (and (eq_attr "mode" "XF")
282					(eq_attr "type" "fmov"))))
283			 "hsw_decodern,(hsw_p23+hsw_p0)*2")
284
285(define_insn_reservation "hsw_fmov_store" 1
286			 (and (eq_attr "cpu" "generic,haswell")
287			      (and (eq_attr "memory" "store")
288				   (and (eq_attr "mode" "!XF")
289					(eq_attr "type" "fmov"))))
290			 "hsw_decodern,hsw_p4p7")
291
292(define_insn_reservation "hsw_fmov_XF_store" 3
293			 (and (eq_attr "cpu" "generic,haswell")
294			      (and (eq_attr "memory" "store")
295				   (and (eq_attr "mode" "XF")
296					(eq_attr "type" "fmov"))))
297			 "hsw_decodern,hsw_p4p7,hsw_p4p7")
298
299(define_insn_reservation "hsw_fmul" 4
300			 (and (eq_attr "cpu" "generic,haswell")
301			      (and (eq_attr "memory" "none")
302				   (eq_attr "type" "fmul")))
303			 "hsw_decodern,hsw_p01")
304
305(define_insn_reservation "hsw_fmul_load" 4
306			 (and (eq_attr "cpu" "generic,haswell")
307			      (and (eq_attr "memory" "load")
308				   (eq_attr "type" "fmul")))
309			 "hsw_decodern,hsw_p23+hsw_p01")
310
311;; fdiv latencies depend on the mode of the operands.  XFmode gives
312;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
313;; Division by a power of 2 takes only 9 cycles, but we cannot model
314;; that.  Throughput is equal to latency - 1, which we model using the
315;; hsw_div automaton.
316(define_insn_reservation "hsw_fdiv_SF" 18
317			 (and (eq_attr "cpu" "generic,haswell")
318			      (and (eq_attr "memory" "none")
319				   (and (eq_attr "mode" "SF")
320					(eq_attr "type" "fdiv,fpspc"))))
321			 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*16")
322
323(define_insn_reservation "hsw_fdiv_SF_load" 19
324			 (and (eq_attr "cpu" "generic,haswell")
325			      (and (eq_attr "memory" "load")
326				   (and (eq_attr "mode" "SF")
327					(eq_attr "type" "fdiv,fpspc"))))
328			 "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*16")
329
330(define_insn_reservation "hsw_fdiv_DF" 32
331			 (and (eq_attr "cpu" "generic,haswell")
332			      (and (eq_attr "memory" "none")
333				   (and (eq_attr "mode" "DF")
334					(eq_attr "type" "fdiv,fpspc"))))
335			 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*30")
336
337(define_insn_reservation "hsw_fdiv_DF_load" 33
338			 (and (eq_attr "cpu" "generic,haswell")
339			      (and (eq_attr "memory" "load")
340				   (and (eq_attr "mode" "DF")
341					(eq_attr "type" "fdiv,fpspc"))))
342			 "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*30")
343
344(define_insn_reservation "hsw_fdiv_XF" 38
345			 (and (eq_attr "cpu" "generic,haswell")
346			      (and (eq_attr "memory" "none")
347				   (and (eq_attr "mode" "XF")
348					(eq_attr "type" "fdiv,fpspc"))))
349			 "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*36")
350
351(define_insn_reservation "hsw_fdiv_XF_load" 39
352			 (and (eq_attr "cpu" "generic,haswell")
353			      (and (eq_attr "memory" "load")
354				   (and (eq_attr "mode" "XF")
355					(eq_attr "type" "fdiv,fpspc"))))
356			 "hsw_decodern,hsw_p2+hsw_p0+hsw_fdiv,hsw_fdiv*36")
357
358;; MMX instructions.
359
360(define_insn_reservation "hsw_mmx_add" 1
361			 (and (eq_attr "cpu" "generic,haswell")
362			      (and (eq_attr "memory" "none")
363				   (eq_attr "type" "mmxadd,sseiadd")))
364			 "hsw_decodern,hsw_p1|hsw_p5")
365
366(define_insn_reservation "hsw_mmx_add_load" 2
367			 (and (eq_attr "cpu" "generic,haswell")
368			      (and (eq_attr "memory" "load")
369				   (eq_attr "type" "mmxadd,sseiadd")))
370			 "hsw_decodern,hsw_p23+(hsw_p1|hsw_p5)")
371
372(define_insn_reservation "hsw_mmx_shft" 1
373			 (and (eq_attr "cpu" "generic,haswell")
374			      (and (eq_attr "memory" "none")
375				   (eq_attr "type" "mmxshft")))
376			 "hsw_decodern,hsw_p0")
377
378(define_insn_reservation "hsw_mmx_shft_load" 2
379			 (and (eq_attr "cpu" "generic,haswell")
380			      (and (eq_attr "memory" "load")
381				   (eq_attr "type" "mmxshft")))
382			 "hsw_decodern,hsw_p23+hsw_p0")
383
384(define_insn_reservation "hsw_mmx_sse_shft" 1
385			 (and (eq_attr "cpu" "generic,haswell")
386			      (and (eq_attr "memory" "none")
387				   (and (eq_attr "type" "sseishft")
388					(eq_attr "length_immediate" "!0"))))
389			 "hsw_decodern,hsw_p01")
390
391(define_insn_reservation "hsw_mmx_sse_shft_load" 2
392			 (and (eq_attr "cpu" "generic,haswell")
393			      (and (eq_attr "memory" "load")
394				   (and (eq_attr "type" "sseishft")
395					(eq_attr "length_immediate" "!0"))))
396			 "hsw_decodern,hsw_p01+hsw_p23")
397
398(define_insn_reservation "hsw_mmx_sse_shft1" 2
399			 (and (eq_attr "cpu" "generic,haswell")
400			      (and (eq_attr "memory" "none")
401				   (and (eq_attr "type" "sseishft")
402					(eq_attr "length_immediate" "0"))))
403			 "hsw_decodern,hsw_p01")
404
405(define_insn_reservation "hsw_mmx_sse_shft1_load" 3
406			 (and (eq_attr "cpu" "generic,haswell")
407			      (and (eq_attr "memory" "load")
408				   (and (eq_attr "type" "sseishft")
409					(eq_attr "length_immediate" "0"))))
410			 "hsw_decodern,hsw_p01+hsw_p23")
411
412(define_insn_reservation "hsw_mmx_mul" 5
413			 (and (eq_attr "cpu" "generic,haswell")
414			      (and (eq_attr "memory" "none")
415				   (eq_attr "type" "mmxmul,sseimul")))
416			 "hsw_decodern,hsw_p01")
417
418(define_insn_reservation "hsw_mmx_mul_load" 5
419			 (and (eq_attr "cpu" "generic,haswell")
420			      (and (eq_attr "memory" "none")
421				   (eq_attr "type" "mmxmul,sseimul")))
422			 "hsw_decodern,hsw_p23+hsw_p01")
423
424(define_insn_reservation "hsw_sse_mmxcvt" 4
425			 (and (eq_attr "cpu" "generic,haswell")
426			      (and (eq_attr "mode" "DI")
427				   (eq_attr "type" "mmxcvt")))
428			 "hsw_decodern,hsw_p1")
429
430;; (define_insn_reservation "hsw_sse_mmxshft" 2
431;;			 (and (eq_attr "cpu" "generic,haswell")
432;;			      (and (eq_attr "mode" "TI")
433;;				   (eq_attr "type" "mmxshft")))
434;;			 "hsw_decodern,hsw_p01")
435
436;; The sfence instruction.
437(define_insn_reservation "hsw_sse_sfence" 2
438			 (and (eq_attr "cpu" "generic,haswell")
439			      (and (eq_attr "memory" "unknown")
440				   (eq_attr "type" "sse")))
441			 "hsw_decoder0,hsw_p23+hsw_p4")
442
443(define_insn_reservation "hsw_sse_SFDF" 3
444			 (and (eq_attr "cpu" "generic,haswell")
445			      (and (eq_attr "mode" "SF,DF")
446				   (eq_attr "type" "sse")))
447			 "hsw_decodern,hsw_p01")
448
449(define_insn_reservation "hsw_sse_V4SF" 4
450			 (and (eq_attr "cpu" "generic,haswell")
451			      (and (eq_attr "mode" "V4SF")
452				   (eq_attr "type" "sse")))
453			 "hsw_decodern,hsw_p01")
454
455(define_insn_reservation "hsw_sse_V8SF" 4
456			 (and (eq_attr "cpu" "generic,haswell")
457			      (and (eq_attr "mode" "V8SF,V4DF")
458				   (eq_attr "type" "sse")))
459			 "hsw_decodern,hsw_p01")
460
461(define_insn_reservation "hsw_sse_addcmp" 3
462			 (and (eq_attr "cpu" "generic,haswell")
463			      (and (eq_attr "memory" "none")
464				   (eq_attr "type" "sseadd1,ssecmp,ssecomi")))
465			 "hsw_decodern,hsw_p01")
466
467(define_insn_reservation "hsw_sse_addcmp_load" 3
468			 (and (eq_attr "cpu" "generic,haswell")
469			      (and (eq_attr "memory" "load")
470				   (eq_attr "type" "sseadd1,ssecmp,ssecomi")))
471			 "hsw_decodern,hsw_p23+hsw_p01")
472
473(define_insn_reservation "hsw_sse_logic" 1
474			 (and (eq_attr "cpu" "generic,haswell")
475			      (and (eq_attr "memory" "none")
476				   (eq_attr "type" "sselog,sselog1")))
477			 "hsw_decodern,hsw_p015")
478
479(define_insn_reservation "hsw_sse_logic_load" 2
480			 (and (eq_attr "cpu" "generic,haswell")
481			      (and (eq_attr "memory" "load")
482				   (eq_attr "type" "sselog,sselog1")))
483			 "hsw_decodern,hsw_p015+hsw_p23")
484
485(define_insn_reservation "hsw_sse_add" 3
486			 (and (eq_attr "cpu" "generic,haswell")
487			      (and (eq_attr "memory" "none")
488				   (eq_attr "type" "sseadd")))
489			"hsw_decodern,hsw_p1|hsw_p5")
490
491(define_insn_reservation "hsw_sse_add_load" 3
492			 (and (eq_attr "cpu" "generic,haswell")
493			      (and (eq_attr "memory" "load")
494				   (eq_attr "type" "sseadd")))
495			"hsw_decodern,(hsw_p1|hsw_p5)+hsw_p23")
496
497(define_insn_reservation "hsw_sse_mul" 5
498			 (and (eq_attr "cpu" "generic,haswell")
499			      (and (eq_attr "memory" "none")
500				   (eq_attr "type" "ssemul")))
501			"hsw_decodern,hsw_p0")
502
503(define_insn_reservation "hsw_sse_mul_load" 5
504			 (and (eq_attr "cpu" "generic,haswell")
505			      (and (eq_attr "memory" "load")
506				   (eq_attr "type" "ssemul")))
507			"hsw_decodern,hsw_p0+hsw_p23")
508;; Use skylake pipeline.
509(define_insn_reservation "hsw_sse_muladd" 5
510			 (and (eq_attr "cpu" "generic,haswell")
511			      (and (eq_attr "memory" "none")
512					(eq_attr "type" "ssemuladd")))
513			"hsw_decodern,hsw_p01")
514
515(define_insn_reservation "hsw_sse_muladd_load" 5
516			 (and (eq_attr "cpu" "generic,haswell")
517			      (and (eq_attr "memory" "load")
518					(eq_attr "type" "ssemuladd")))
519			"hsw_decodern,hsw_p01+hsw_p23")
520
521(define_insn_reservation "hsw_sse_div_SF" 18
522			 (and (eq_attr "cpu" "generic,haswell")
523			      (and (eq_attr "memory" "none")
524				   (and (eq_attr "mode" "SF,V4SF,V8SF")
525					(eq_attr "type" "ssediv"))))
526			 "hsw_decodern,hsw_p0,hsw_ssediv*14")
527
528(define_insn_reservation "hsw_sse_div_SF_load" 18
529			 (and (eq_attr "cpu" "generic,haswell")
530			      (and (eq_attr "memory" "none")
531				   (and (eq_attr "mode" "SF,V4SF,V8SF")
532					(eq_attr "type" "ssediv"))))
533			 "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*14")
534
535(define_insn_reservation "hsw_sse_div_DF" 28
536			 (and (eq_attr "cpu" "generic,haswell")
537			      (and (eq_attr "memory" "none")
538				   (and (eq_attr "mode" "DF,V2DF,V4DF")
539					(eq_attr "type" "ssediv"))))
540			 "hsw_decodern,hsw_p0,hsw_ssediv*20")
541
542(define_insn_reservation "hsw_sse_div_DF_load" 28
543			 (and (eq_attr "cpu" "generic,haswell")
544			      (and (eq_attr "memory" "none")
545				   (and (eq_attr "mode" "DF,V2DF,V4DF")
546					(eq_attr "type" "ssediv"))))
547			 "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*20")
548
549(define_insn_reservation "hsw_sse_icvt" 4
550			 (and (eq_attr "cpu" "generic,haswell")
551			      (and (eq_attr "memory" "none")
552				   (eq_attr "type" "sseicvt")))
553			 "hsw_decodern,hsw_p1")
554
555(define_insn_reservation "hsw_sse_icvt_load" 4
556			 (and (eq_attr "cpu" "generic,haswell")
557			      (and (eq_attr "memory" "!none")
558				   (eq_attr "type" "sseicvt")))
559			 "hsw_decodern,hsw_p23+hsw_p1")
560
561
562(define_insn_reservation "hsw_sse_icvt_SI" 3
563			 (and (eq_attr "cpu" "generic,haswell")
564			      (and (eq_attr "memory" "none")
565				   (and (eq_attr "mode" "SI")
566					(eq_attr "type" "sseicvt"))))
567			 "hsw_decodern,hsw_p1")
568
569(define_insn_reservation "hsw_sse_icvt_SI_load" 3
570			 (and (eq_attr "cpu" "generic,haswell")
571			      (and (eq_attr "memory" "!none")
572				   (and (eq_attr "mode" "SI")
573					(eq_attr "type" "sseicvt"))))
574			 "hsw_decodern,hsw_p23+hsw_p1")
575
576(define_insn_reservation "hsw_sse_mov" 1
577			 (and (eq_attr "cpu" "generic,haswell")
578			      (and (eq_attr "memory" "none")
579				   (eq_attr "type" "ssemov")))
580			 "hsw_decodern,hsw_p015")
581
582(define_insn_reservation "hsw_sse_mov_load" 2
583			 (and (eq_attr "cpu" "generic,haswell")
584			      (and (eq_attr "memory" "load")
585				   (eq_attr "type" "ssemov")))
586			 "hsw_decodern,hsw_p23")
587
588(define_insn_reservation "hsw_sse_mov_store" 1
589			 (and (eq_attr "cpu" "generic,haswell")
590			      (and (eq_attr "memory" "store")
591				   (eq_attr "type" "ssemov")))
592			 "hsw_decodern,hsw_p4p7")
593
594(define_insn_reservation "hsw_insn" 1
595			 (and (eq_attr "cpu" "generic,haswell")
596			      (and (eq_attr "memory" "none,unknown")
597				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
598			 "hsw_decodern,hsw_p0156")
599
600(define_insn_reservation "hsw_insn_load" 1
601			 (and (eq_attr "cpu" "generic,haswell")
602			      (and (eq_attr "memory" "load")
603				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
604			 "hsw_decodern,hsw_p23+hsw_p0156")
605
606(define_insn_reservation "hsw_insn_store" 1
607			 (and (eq_attr "cpu" "generic,haswell")
608			      (and (eq_attr "memory" "store")
609				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
610			 "hsw_decodern,hsw_p0156+hsw_p4p7")
611
612;; read-modify-store instructions produce 4 uops so they have to be
613;; decoded on hsw_decoder0 as well.
614(define_insn_reservation "hsw_insn_both" 4
615			 (and (eq_attr "cpu" "generic,haswell")
616			      (and (eq_attr "memory" "both")
617				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
618			 "hsw_decodern,hsw_p23+hsw_p0156+hsw_p4p7")
619