xref: /openbsd/gnu/gcc/gcc/config/i386/i386.c (revision 76d0caae)
1 /* Subroutines used for code generation on IA-32.
2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
56 #endif
57 
58 /* Return index of given mode in mult and division cost tables.  */
59 #define MODE_INDEX(mode)					\
60   ((mode) == QImode ? 0						\
61    : (mode) == HImode ? 1					\
62    : (mode) == SImode ? 2					\
63    : (mode) == DImode ? 3					\
64    : 4)
65 
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
68 #define COSTS_N_BYTES(N) ((N) * 2)
69 
70 static const
71 struct processor_costs size_cost = {	/* costs for tuning for size */
72   COSTS_N_BYTES (2),			/* cost of an add instruction */
73   COSTS_N_BYTES (3),			/* cost of a lea instruction */
74   COSTS_N_BYTES (2),			/* variable shift costs */
75   COSTS_N_BYTES (3),			/* constant shift costs */
76   {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
77    COSTS_N_BYTES (3),			/*                               HI */
78    COSTS_N_BYTES (3),			/*                               SI */
79    COSTS_N_BYTES (3),			/*                               DI */
80    COSTS_N_BYTES (5)},			/*                            other */
81   0,					/* cost of multiply per each bit set */
82   {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
83    COSTS_N_BYTES (3),			/*                          HI */
84    COSTS_N_BYTES (3),			/*                          SI */
85    COSTS_N_BYTES (3),			/*                          DI */
86    COSTS_N_BYTES (5)},			/*                       other */
87   COSTS_N_BYTES (3),			/* cost of movsx */
88   COSTS_N_BYTES (3),			/* cost of movzx */
89   0,					/* "large" insn */
90   2,					/* MOVE_RATIO */
91   2,					/* cost for loading QImode using movzbl */
92   {2, 2, 2},				/* cost of loading integer registers
93 					   in QImode, HImode and SImode.
94 					   Relative to reg-reg move (2).  */
95   {2, 2, 2},				/* cost of storing integer registers */
96   2,					/* cost of reg,reg fld/fst */
97   {2, 2, 2},				/* cost of loading fp registers
98 					   in SFmode, DFmode and XFmode */
99   {2, 2, 2},				/* cost of storing fp registers
100 					   in SFmode, DFmode and XFmode */
101   3,					/* cost of moving MMX register */
102   {3, 3},				/* cost of loading MMX registers
103 					   in SImode and DImode */
104   {3, 3},				/* cost of storing MMX registers
105 					   in SImode and DImode */
106   3,					/* cost of moving SSE register */
107   {3, 3, 3},				/* cost of loading SSE registers
108 					   in SImode, DImode and TImode */
109   {3, 3, 3},				/* cost of storing SSE registers
110 					   in SImode, DImode and TImode */
111   3,					/* MMX or SSE register to integer */
112   0,					/* size of prefetch block */
113   0,					/* number of parallel prefetches */
114   2,					/* Branch cost */
115   COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
116   COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
117   COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
118   COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
119   COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
120   COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
121 };
122 
123 /* Processor costs (relative to an add) */
124 static const
125 struct processor_costs i386_cost = {	/* 386 specific costs */
126   COSTS_N_INSNS (1),			/* cost of an add instruction */
127   COSTS_N_INSNS (1),			/* cost of a lea instruction */
128   COSTS_N_INSNS (3),			/* variable shift costs */
129   COSTS_N_INSNS (2),			/* constant shift costs */
130   {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
131    COSTS_N_INSNS (6),			/*                               HI */
132    COSTS_N_INSNS (6),			/*                               SI */
133    COSTS_N_INSNS (6),			/*                               DI */
134    COSTS_N_INSNS (6)},			/*                               other */
135   COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
136   {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
137    COSTS_N_INSNS (23),			/*                          HI */
138    COSTS_N_INSNS (23),			/*                          SI */
139    COSTS_N_INSNS (23),			/*                          DI */
140    COSTS_N_INSNS (23)},			/*                          other */
141   COSTS_N_INSNS (3),			/* cost of movsx */
142   COSTS_N_INSNS (2),			/* cost of movzx */
143   15,					/* "large" insn */
144   3,					/* MOVE_RATIO */
145   4,					/* cost for loading QImode using movzbl */
146   {2, 4, 2},				/* cost of loading integer registers
147 					   in QImode, HImode and SImode.
148 					   Relative to reg-reg move (2).  */
149   {2, 4, 2},				/* cost of storing integer registers */
150   2,					/* cost of reg,reg fld/fst */
151   {8, 8, 8},				/* cost of loading fp registers
152 					   in SFmode, DFmode and XFmode */
153   {8, 8, 8},				/* cost of storing fp registers
154 					   in SFmode, DFmode and XFmode */
155   2,					/* cost of moving MMX register */
156   {4, 8},				/* cost of loading MMX registers
157 					   in SImode and DImode */
158   {4, 8},				/* cost of storing MMX registers
159 					   in SImode and DImode */
160   2,					/* cost of moving SSE register */
161   {4, 8, 16},				/* cost of loading SSE registers
162 					   in SImode, DImode and TImode */
163   {4, 8, 16},				/* cost of storing SSE registers
164 					   in SImode, DImode and TImode */
165   3,					/* MMX or SSE register to integer */
166   0,					/* size of prefetch block */
167   0,					/* number of parallel prefetches */
168   1,					/* Branch cost */
169   COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
170   COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
171   COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
172   COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
173   COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
174   COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
175 };
176 
177 static const
178 struct processor_costs i486_cost = {	/* 486 specific costs */
179   COSTS_N_INSNS (1),			/* cost of an add instruction */
180   COSTS_N_INSNS (1),			/* cost of a lea instruction */
181   COSTS_N_INSNS (3),			/* variable shift costs */
182   COSTS_N_INSNS (2),			/* constant shift costs */
183   {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
184    COSTS_N_INSNS (12),			/*                               HI */
185    COSTS_N_INSNS (12),			/*                               SI */
186    COSTS_N_INSNS (12),			/*                               DI */
187    COSTS_N_INSNS (12)},			/*                               other */
188   1,					/* cost of multiply per each bit set */
189   {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
190    COSTS_N_INSNS (40),			/*                          HI */
191    COSTS_N_INSNS (40),			/*                          SI */
192    COSTS_N_INSNS (40),			/*                          DI */
193    COSTS_N_INSNS (40)},			/*                          other */
194   COSTS_N_INSNS (3),			/* cost of movsx */
195   COSTS_N_INSNS (2),			/* cost of movzx */
196   15,					/* "large" insn */
197   3,					/* MOVE_RATIO */
198   4,					/* cost for loading QImode using movzbl */
199   {2, 4, 2},				/* cost of loading integer registers
200 					   in QImode, HImode and SImode.
201 					   Relative to reg-reg move (2).  */
202   {2, 4, 2},				/* cost of storing integer registers */
203   2,					/* cost of reg,reg fld/fst */
204   {8, 8, 8},				/* cost of loading fp registers
205 					   in SFmode, DFmode and XFmode */
206   {8, 8, 8},				/* cost of storing fp registers
207 					   in SFmode, DFmode and XFmode */
208   2,					/* cost of moving MMX register */
209   {4, 8},				/* cost of loading MMX registers
210 					   in SImode and DImode */
211   {4, 8},				/* cost of storing MMX registers
212 					   in SImode and DImode */
213   2,					/* cost of moving SSE register */
214   {4, 8, 16},				/* cost of loading SSE registers
215 					   in SImode, DImode and TImode */
216   {4, 8, 16},				/* cost of storing SSE registers
217 					   in SImode, DImode and TImode */
218   3,					/* MMX or SSE register to integer */
219   0,					/* size of prefetch block */
220   0,					/* number of parallel prefetches */
221   1,					/* Branch cost */
222   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
223   COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
224   COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
225   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
226   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
227   COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
228 };
229 
230 static const
231 struct processor_costs pentium_cost = {
232   COSTS_N_INSNS (1),			/* cost of an add instruction */
233   COSTS_N_INSNS (1),			/* cost of a lea instruction */
234   COSTS_N_INSNS (4),			/* variable shift costs */
235   COSTS_N_INSNS (1),			/* constant shift costs */
236   {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
237    COSTS_N_INSNS (11),			/*                               HI */
238    COSTS_N_INSNS (11),			/*                               SI */
239    COSTS_N_INSNS (11),			/*                               DI */
240    COSTS_N_INSNS (11)},			/*                               other */
241   0,					/* cost of multiply per each bit set */
242   {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
243    COSTS_N_INSNS (25),			/*                          HI */
244    COSTS_N_INSNS (25),			/*                          SI */
245    COSTS_N_INSNS (25),			/*                          DI */
246    COSTS_N_INSNS (25)},			/*                          other */
247   COSTS_N_INSNS (3),			/* cost of movsx */
248   COSTS_N_INSNS (2),			/* cost of movzx */
249   8,					/* "large" insn */
250   6,					/* MOVE_RATIO */
251   6,					/* cost for loading QImode using movzbl */
252   {2, 4, 2},				/* cost of loading integer registers
253 					   in QImode, HImode and SImode.
254 					   Relative to reg-reg move (2).  */
255   {2, 4, 2},				/* cost of storing integer registers */
256   2,					/* cost of reg,reg fld/fst */
257   {2, 2, 6},				/* cost of loading fp registers
258 					   in SFmode, DFmode and XFmode */
259   {4, 4, 6},				/* cost of storing fp registers
260 					   in SFmode, DFmode and XFmode */
261   8,					/* cost of moving MMX register */
262   {8, 8},				/* cost of loading MMX registers
263 					   in SImode and DImode */
264   {8, 8},				/* cost of storing MMX registers
265 					   in SImode and DImode */
266   2,					/* cost of moving SSE register */
267   {4, 8, 16},				/* cost of loading SSE registers
268 					   in SImode, DImode and TImode */
269   {4, 8, 16},				/* cost of storing SSE registers
270 					   in SImode, DImode and TImode */
271   3,					/* MMX or SSE register to integer */
272   0,					/* size of prefetch block */
273   0,					/* number of parallel prefetches */
274   2,					/* Branch cost */
275   COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
276   COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
277   COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
278   COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
279   COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
280   COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
281 };
282 
283 static const
284 struct processor_costs pentiumpro_cost = {
285   COSTS_N_INSNS (1),			/* cost of an add instruction */
286   COSTS_N_INSNS (1),			/* cost of a lea instruction */
287   COSTS_N_INSNS (1),			/* variable shift costs */
288   COSTS_N_INSNS (1),			/* constant shift costs */
289   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
290    COSTS_N_INSNS (4),			/*                               HI */
291    COSTS_N_INSNS (4),			/*                               SI */
292    COSTS_N_INSNS (4),			/*                               DI */
293    COSTS_N_INSNS (4)},			/*                               other */
294   0,					/* cost of multiply per each bit set */
295   {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
296    COSTS_N_INSNS (17),			/*                          HI */
297    COSTS_N_INSNS (17),			/*                          SI */
298    COSTS_N_INSNS (17),			/*                          DI */
299    COSTS_N_INSNS (17)},			/*                          other */
300   COSTS_N_INSNS (1),			/* cost of movsx */
301   COSTS_N_INSNS (1),			/* cost of movzx */
302   8,					/* "large" insn */
303   6,					/* MOVE_RATIO */
304   2,					/* cost for loading QImode using movzbl */
305   {4, 4, 4},				/* cost of loading integer registers
306 					   in QImode, HImode and SImode.
307 					   Relative to reg-reg move (2).  */
308   {2, 2, 2},				/* cost of storing integer registers */
309   2,					/* cost of reg,reg fld/fst */
310   {2, 2, 6},				/* cost of loading fp registers
311 					   in SFmode, DFmode and XFmode */
312   {4, 4, 6},				/* cost of storing fp registers
313 					   in SFmode, DFmode and XFmode */
314   2,					/* cost of moving MMX register */
315   {2, 2},				/* cost of loading MMX registers
316 					   in SImode and DImode */
317   {2, 2},				/* cost of storing MMX registers
318 					   in SImode and DImode */
319   2,					/* cost of moving SSE register */
320   {2, 2, 8},				/* cost of loading SSE registers
321 					   in SImode, DImode and TImode */
322   {2, 2, 8},				/* cost of storing SSE registers
323 					   in SImode, DImode and TImode */
324   3,					/* MMX or SSE register to integer */
325   32,					/* size of prefetch block */
326   6,					/* number of parallel prefetches */
327   2,					/* Branch cost */
328   COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
329   COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
330   COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
331   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
332   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
333   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
334 };
335 
336 static const
337 struct processor_costs k6_cost = {
338   COSTS_N_INSNS (1),			/* cost of an add instruction */
339   COSTS_N_INSNS (2),			/* cost of a lea instruction */
340   COSTS_N_INSNS (1),			/* variable shift costs */
341   COSTS_N_INSNS (1),			/* constant shift costs */
342   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
343    COSTS_N_INSNS (3),			/*                               HI */
344    COSTS_N_INSNS (3),			/*                               SI */
345    COSTS_N_INSNS (3),			/*                               DI */
346    COSTS_N_INSNS (3)},			/*                               other */
347   0,					/* cost of multiply per each bit set */
348   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
349    COSTS_N_INSNS (18),			/*                          HI */
350    COSTS_N_INSNS (18),			/*                          SI */
351    COSTS_N_INSNS (18),			/*                          DI */
352    COSTS_N_INSNS (18)},			/*                          other */
353   COSTS_N_INSNS (2),			/* cost of movsx */
354   COSTS_N_INSNS (2),			/* cost of movzx */
355   8,					/* "large" insn */
356   4,					/* MOVE_RATIO */
357   3,					/* cost for loading QImode using movzbl */
358   {4, 5, 4},				/* cost of loading integer registers
359 					   in QImode, HImode and SImode.
360 					   Relative to reg-reg move (2).  */
361   {2, 3, 2},				/* cost of storing integer registers */
362   4,					/* cost of reg,reg fld/fst */
363   {6, 6, 6},				/* cost of loading fp registers
364 					   in SFmode, DFmode and XFmode */
365   {4, 4, 4},				/* cost of storing fp registers
366 					   in SFmode, DFmode and XFmode */
367   2,					/* cost of moving MMX register */
368   {2, 2},				/* cost of loading MMX registers
369 					   in SImode and DImode */
370   {2, 2},				/* cost of storing MMX registers
371 					   in SImode and DImode */
372   2,					/* cost of moving SSE register */
373   {2, 2, 8},				/* cost of loading SSE registers
374 					   in SImode, DImode and TImode */
375   {2, 2, 8},				/* cost of storing SSE registers
376 					   in SImode, DImode and TImode */
377   6,					/* MMX or SSE register to integer */
378   32,					/* size of prefetch block */
379   1,					/* number of parallel prefetches */
380   1,					/* Branch cost */
381   COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
382   COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
383   COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
384   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
385   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
386   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
387 };
388 
389 static const
390 struct processor_costs athlon_cost = {
391   COSTS_N_INSNS (1),			/* cost of an add instruction */
392   COSTS_N_INSNS (2),			/* cost of a lea instruction */
393   COSTS_N_INSNS (1),			/* variable shift costs */
394   COSTS_N_INSNS (1),			/* constant shift costs */
395   {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
396    COSTS_N_INSNS (5),			/*                               HI */
397    COSTS_N_INSNS (5),			/*                               SI */
398    COSTS_N_INSNS (5),			/*                               DI */
399    COSTS_N_INSNS (5)},			/*                               other */
400   0,					/* cost of multiply per each bit set */
401   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
402    COSTS_N_INSNS (26),			/*                          HI */
403    COSTS_N_INSNS (42),			/*                          SI */
404    COSTS_N_INSNS (74),			/*                          DI */
405    COSTS_N_INSNS (74)},			/*                          other */
406   COSTS_N_INSNS (1),			/* cost of movsx */
407   COSTS_N_INSNS (1),			/* cost of movzx */
408   8,					/* "large" insn */
409   9,					/* MOVE_RATIO */
410   4,					/* cost for loading QImode using movzbl */
411   {3, 4, 3},				/* cost of loading integer registers
412 					   in QImode, HImode and SImode.
413 					   Relative to reg-reg move (2).  */
414   {3, 4, 3},				/* cost of storing integer registers */
415   4,					/* cost of reg,reg fld/fst */
416   {4, 4, 12},				/* cost of loading fp registers
417 					   in SFmode, DFmode and XFmode */
418   {6, 6, 8},				/* cost of storing fp registers
419 					   in SFmode, DFmode and XFmode */
420   2,					/* cost of moving MMX register */
421   {4, 4},				/* cost of loading MMX registers
422 					   in SImode and DImode */
423   {4, 4},				/* cost of storing MMX registers
424 					   in SImode and DImode */
425   2,					/* cost of moving SSE register */
426   {4, 4, 6},				/* cost of loading SSE registers
427 					   in SImode, DImode and TImode */
428   {4, 4, 5},				/* cost of storing SSE registers
429 					   in SImode, DImode and TImode */
430   5,					/* MMX or SSE register to integer */
431   64,					/* size of prefetch block */
432   6,					/* number of parallel prefetches */
433   5,					/* Branch cost */
434   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
435   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
436   COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
437   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
438   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
439   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
440 };
441 
442 static const
443 struct processor_costs k8_cost = {
444   COSTS_N_INSNS (1),			/* cost of an add instruction */
445   COSTS_N_INSNS (2),			/* cost of a lea instruction */
446   COSTS_N_INSNS (1),			/* variable shift costs */
447   COSTS_N_INSNS (1),			/* constant shift costs */
448   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
449    COSTS_N_INSNS (4),			/*                               HI */
450    COSTS_N_INSNS (3),			/*                               SI */
451    COSTS_N_INSNS (4),			/*                               DI */
452    COSTS_N_INSNS (5)},			/*                               other */
453   0,					/* cost of multiply per each bit set */
454   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
455    COSTS_N_INSNS (26),			/*                          HI */
456    COSTS_N_INSNS (42),			/*                          SI */
457    COSTS_N_INSNS (74),			/*                          DI */
458    COSTS_N_INSNS (74)},			/*                          other */
459   COSTS_N_INSNS (1),			/* cost of movsx */
460   COSTS_N_INSNS (1),			/* cost of movzx */
461   8,					/* "large" insn */
462   9,					/* MOVE_RATIO */
463   4,					/* cost for loading QImode using movzbl */
464   {3, 4, 3},				/* cost of loading integer registers
465 					   in QImode, HImode and SImode.
466 					   Relative to reg-reg move (2).  */
467   {3, 4, 3},				/* cost of storing integer registers */
468   4,					/* cost of reg,reg fld/fst */
469   {4, 4, 12},				/* cost of loading fp registers
470 					   in SFmode, DFmode and XFmode */
471   {6, 6, 8},				/* cost of storing fp registers
472 					   in SFmode, DFmode and XFmode */
473   2,					/* cost of moving MMX register */
474   {3, 3},				/* cost of loading MMX registers
475 					   in SImode and DImode */
476   {4, 4},				/* cost of storing MMX registers
477 					   in SImode and DImode */
478   2,					/* cost of moving SSE register */
479   {4, 3, 6},				/* cost of loading SSE registers
480 					   in SImode, DImode and TImode */
481   {4, 4, 5},				/* cost of storing SSE registers
482 					   in SImode, DImode and TImode */
483   5,					/* MMX or SSE register to integer */
484   64,					/* size of prefetch block */
485   6,					/* number of parallel prefetches */
486   5,					/* Branch cost */
487   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
488   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
489   COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
490   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
491   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
492   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
493 };
494 
495 static const
496 struct processor_costs pentium4_cost = {
497   COSTS_N_INSNS (1),			/* cost of an add instruction */
498   COSTS_N_INSNS (3),			/* cost of a lea instruction */
499   COSTS_N_INSNS (4),			/* variable shift costs */
500   COSTS_N_INSNS (4),			/* constant shift costs */
501   {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
502    COSTS_N_INSNS (15),			/*                               HI */
503    COSTS_N_INSNS (15),			/*                               SI */
504    COSTS_N_INSNS (15),			/*                               DI */
505    COSTS_N_INSNS (15)},			/*                               other */
506   0,					/* cost of multiply per each bit set */
507   {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
508    COSTS_N_INSNS (56),			/*                          HI */
509    COSTS_N_INSNS (56),			/*                          SI */
510    COSTS_N_INSNS (56),			/*                          DI */
511    COSTS_N_INSNS (56)},			/*                          other */
512   COSTS_N_INSNS (1),			/* cost of movsx */
513   COSTS_N_INSNS (1),			/* cost of movzx */
514   16,					/* "large" insn */
515   6,					/* MOVE_RATIO */
516   2,					/* cost for loading QImode using movzbl */
517   {4, 5, 4},				/* cost of loading integer registers
518 					   in QImode, HImode and SImode.
519 					   Relative to reg-reg move (2).  */
520   {2, 3, 2},				/* cost of storing integer registers */
521   2,					/* cost of reg,reg fld/fst */
522   {2, 2, 6},				/* cost of loading fp registers
523 					   in SFmode, DFmode and XFmode */
524   {4, 4, 6},				/* cost of storing fp registers
525 					   in SFmode, DFmode and XFmode */
526   2,					/* cost of moving MMX register */
527   {2, 2},				/* cost of loading MMX registers
528 					   in SImode and DImode */
529   {2, 2},				/* cost of storing MMX registers
530 					   in SImode and DImode */
531   12,					/* cost of moving SSE register */
532   {12, 12, 12},				/* cost of loading SSE registers
533 					   in SImode, DImode and TImode */
534   {2, 2, 8},				/* cost of storing SSE registers
535 					   in SImode, DImode and TImode */
536   10,					/* MMX or SSE register to integer */
537   64,					/* size of prefetch block */
538   6,					/* number of parallel prefetches */
539   2,					/* Branch cost */
540   COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
541   COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
542   COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
543   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
544   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
545   COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
546 };
547 
548 static const
549 struct processor_costs nocona_cost = {
550   COSTS_N_INSNS (1),			/* cost of an add instruction */
551   COSTS_N_INSNS (1),			/* cost of a lea instruction */
552   COSTS_N_INSNS (1),			/* variable shift costs */
553   COSTS_N_INSNS (1),			/* constant shift costs */
554   {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
555    COSTS_N_INSNS (10),			/*                               HI */
556    COSTS_N_INSNS (10),			/*                               SI */
557    COSTS_N_INSNS (10),			/*                               DI */
558    COSTS_N_INSNS (10)},			/*                               other */
559   0,					/* cost of multiply per each bit set */
560   {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
561    COSTS_N_INSNS (66),			/*                          HI */
562    COSTS_N_INSNS (66),			/*                          SI */
563    COSTS_N_INSNS (66),			/*                          DI */
564    COSTS_N_INSNS (66)},			/*                          other */
565   COSTS_N_INSNS (1),			/* cost of movsx */
566   COSTS_N_INSNS (1),			/* cost of movzx */
567   16,					/* "large" insn */
568   17,					/* MOVE_RATIO */
569   4,					/* cost for loading QImode using movzbl */
570   {4, 4, 4},				/* cost of loading integer registers
571 					   in QImode, HImode and SImode.
572 					   Relative to reg-reg move (2).  */
573   {4, 4, 4},				/* cost of storing integer registers */
574   3,					/* cost of reg,reg fld/fst */
575   {12, 12, 12},				/* cost of loading fp registers
576 					   in SFmode, DFmode and XFmode */
577   {4, 4, 4},				/* cost of storing fp registers
578 					   in SFmode, DFmode and XFmode */
579   6,					/* cost of moving MMX register */
580   {12, 12},				/* cost of loading MMX registers
581 					   in SImode and DImode */
582   {12, 12},				/* cost of storing MMX registers
583 					   in SImode and DImode */
584   6,					/* cost of moving SSE register */
585   {12, 12, 12},				/* cost of loading SSE registers
586 					   in SImode, DImode and TImode */
587   {12, 12, 12},				/* cost of storing SSE registers
588 					   in SImode, DImode and TImode */
589   8,					/* MMX or SSE register to integer */
590   128,					/* size of prefetch block */
591   8,					/* number of parallel prefetches */
592   1,					/* Branch cost */
593   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
594   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
595   COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
596   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
597   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
598   COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
599 };
600 
601 /* Generic64 should produce code tuned for Nocona and K8.  */
602 static const
603 struct processor_costs generic64_cost = {
604   COSTS_N_INSNS (1),			/* cost of an add instruction */
605   /* On all chips taken into consideration lea is 2 cycles and more.  With
606      this cost however our current implementation of synth_mult results in
607      use of unnecessary temporary registers causing regression on several
608      SPECfp benchmarks.  */
609   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
610   COSTS_N_INSNS (1),			/* variable shift costs */
611   COSTS_N_INSNS (1),			/* constant shift costs */
612   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
613    COSTS_N_INSNS (4),			/*                               HI */
614    COSTS_N_INSNS (3),			/*                               SI */
615    COSTS_N_INSNS (4),			/*                               DI */
616    COSTS_N_INSNS (2)},			/*                               other */
617   0,					/* cost of multiply per each bit set */
618   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
619    COSTS_N_INSNS (26),			/*                          HI */
620    COSTS_N_INSNS (42),			/*                          SI */
621    COSTS_N_INSNS (74),			/*                          DI */
622    COSTS_N_INSNS (74)},			/*                          other */
623   COSTS_N_INSNS (1),			/* cost of movsx */
624   COSTS_N_INSNS (1),			/* cost of movzx */
625   8,					/* "large" insn */
626   17,					/* MOVE_RATIO */
627   4,					/* cost for loading QImode using movzbl */
628   {4, 4, 4},				/* cost of loading integer registers
629 					   in QImode, HImode and SImode.
630 					   Relative to reg-reg move (2).  */
631   {4, 4, 4},				/* cost of storing integer registers */
632   4,					/* cost of reg,reg fld/fst */
633   {12, 12, 12},				/* cost of loading fp registers
634 					   in SFmode, DFmode and XFmode */
635   {6, 6, 8},				/* cost of storing fp registers
636 					   in SFmode, DFmode and XFmode */
637   2,					/* cost of moving MMX register */
638   {8, 8},				/* cost of loading MMX registers
639 					   in SImode and DImode */
640   {8, 8},				/* cost of storing MMX registers
641 					   in SImode and DImode */
642   2,					/* cost of moving SSE register */
643   {8, 8, 8},				/* cost of loading SSE registers
644 					   in SImode, DImode and TImode */
645   {8, 8, 8},				/* cost of storing SSE registers
646 					   in SImode, DImode and TImode */
647   5,					/* MMX or SSE register to integer */
648   64,					/* size of prefetch block */
649   6,					/* number of parallel prefetches */
650   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651      is increased to perhaps more appropriate value of 5.  */
652   3,					/* Branch cost */
653   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
654   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
655   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
656   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
657   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
658   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
659 };
660 
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
662 static const
663 struct processor_costs generic32_cost = {
664   COSTS_N_INSNS (1),			/* cost of an add instruction */
665   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
666   COSTS_N_INSNS (1),			/* variable shift costs */
667   COSTS_N_INSNS (1),			/* constant shift costs */
668   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
669    COSTS_N_INSNS (4),			/*                               HI */
670    COSTS_N_INSNS (3),			/*                               SI */
671    COSTS_N_INSNS (4),			/*                               DI */
672    COSTS_N_INSNS (2)},			/*                               other */
673   0,					/* cost of multiply per each bit set */
674   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
675    COSTS_N_INSNS (26),			/*                          HI */
676    COSTS_N_INSNS (42),			/*                          SI */
677    COSTS_N_INSNS (74),			/*                          DI */
678    COSTS_N_INSNS (74)},			/*                          other */
679   COSTS_N_INSNS (1),			/* cost of movsx */
680   COSTS_N_INSNS (1),			/* cost of movzx */
681   8,					/* "large" insn */
682   17,					/* MOVE_RATIO */
683   4,					/* cost for loading QImode using movzbl */
684   {4, 4, 4},				/* cost of loading integer registers
685 					   in QImode, HImode and SImode.
686 					   Relative to reg-reg move (2).  */
687   {4, 4, 4},				/* cost of storing integer registers */
688   4,					/* cost of reg,reg fld/fst */
689   {12, 12, 12},				/* cost of loading fp registers
690 					   in SFmode, DFmode and XFmode */
691   {6, 6, 8},				/* cost of storing fp registers
692 					   in SFmode, DFmode and XFmode */
693   2,					/* cost of moving MMX register */
694   {8, 8},				/* cost of loading MMX registers
695 					   in SImode and DImode */
696   {8, 8},				/* cost of storing MMX registers
697 					   in SImode and DImode */
698   2,					/* cost of moving SSE register */
699   {8, 8, 8},				/* cost of loading SSE registers
700 					   in SImode, DImode and TImode */
701   {8, 8, 8},				/* cost of storing SSE registers
702 					   in SImode, DImode and TImode */
703   5,					/* MMX or SSE register to integer */
704   64,					/* size of prefetch block */
705   6,					/* number of parallel prefetches */
706   3,					/* Branch cost */
707   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
708   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
709   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
710   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
711   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
712   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
713 };
714 
715 const struct processor_costs *ix86_cost = &pentium_cost;
716 
717 /* Processor feature/optimization bitmasks.  */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6  (1<<PROCESSOR_K6)
723 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
724 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
725 #define m_K8  (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
727 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
731 
732 /* Generic instruction choice should be common subset of supported CPUs
733    (PPro/PENT4/NOCONA/Athlon/K8).  */
734 
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736    Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
737    generic because it is not working well with PPro base chips.  */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_3dnow_a = m_ATHLON_K8;
747 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
748 /* Branch hints were put in P4 based on simulation result. But
749    after P4 was made, no performance benefit was observed with
750    branch hints. It also increases the code size. As the result,
751    icc never generates branch hints.  */
752 const int x86_branch_hints = 0;
753 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
754 /* We probably ought to watch for partial register stalls on Generic32
755    compilation setting as well.  However in current implementation the
756    partial register stalls are not eliminated very well - they can
757    be introduced via subregs synthesized by combine and can happen
758    in caller/callee saving sequences.
759    Because this option pays back little on PPro based chips and is in conflict
760    with partial reg. dependencies used by Athlon/P4 based chips, it is better
761    to leave it off for generic32 for now.  */
762 const int x86_partial_reg_stall = m_PPRO;
763 const int x86_partial_flag_reg_stall = m_GENERIC;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls.  Just like
777    the x86_partial_reg_stall this option might be considered for Generic32
778    if our scheme for avoiding partial stalls was more effective.  */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794    that thread 128bit SSE registers as single units versus K8 based chips that
795    divide SSE registers to two 64bit halves.
796    x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797    to allow register renaming on 128bit SSE units, but usually results in one
798    extra microop on 64bit SSE units.  Experimental results shows that disabling
799    this option on P4 brings over 20% SPECfp regression, while enabling it on
800    K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
801    of moves.  */
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804    register parts instead of whole registers, so we may maintain just
805    lower part of scalar values in proper format leaving the upper part
806    undefined.  */
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
813 
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815    integer data in xmm registers.  Which results in pretty abysmal code.  */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
817 
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820    the 16 byte window.  */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486.  */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium.  */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona.  */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486.  */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
833 
834 /* In case the average insn count for single function invocation is
835    lower than this constant, emit fast (but longer) prologue and
836    epilogue code.  */
837 #define FAST_PROLOGUE_INSN_COUNT 20
838 
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
843 
844 /* Array of the smallest class containing reg number REGNO, indexed by
845    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
846 
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
848 {
849   /* ax, dx, cx, bx */
850   AREG, DREG, CREG, BREG,
851   /* si, di, bp, sp */
852   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
853   /* FP registers */
854   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
856   /* arg pointer */
857   NON_Q_REGS,
858   /* flags, fpsr, dirflag, frame */
859   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
861   SSE_REGS, SSE_REGS,
862   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
863   MMX_REGS, MMX_REGS,
864   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
867   SSE_REGS, SSE_REGS,
868 };
869 
870 /* The "default" register map used in 32bit mode.  */
871 
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
873 {
874   0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
875   12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
876   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
877   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
878   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
879   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
880   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
881 };
882 
883 static int const x86_64_int_parameter_registers[6] =
884 {
885   5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
887 };
888 
889 static int const x86_64_int_return_registers[4] =
890 {
891   0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
892 };
893 
894 /* The "default" register map used in 64bit mode.  */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
896 {
897   0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
898   33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
899   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
900   17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
901   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
902   8,9,10,11,12,13,14,15,		/* extended integer registers */
903   25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
904 };
905 
906 /* Define the register numbers to be used in Dwarf debugging information.
907    The SVR4 reference port C compiler uses the following register numbers
908    in its Dwarf output code:
909 	0 for %eax (gcc regno = 0)
910 	1 for %ecx (gcc regno = 2)
911 	2 for %edx (gcc regno = 1)
912 	3 for %ebx (gcc regno = 3)
913 	4 for %esp (gcc regno = 7)
914 	5 for %ebp (gcc regno = 6)
915 	6 for %esi (gcc regno = 4)
916 	7 for %edi (gcc regno = 5)
917    The following three DWARF register numbers are never generated by
918    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919    believes these numbers have these meanings.
920 	8  for %eip    (no gcc equivalent)
921 	9  for %eflags (gcc regno = 17)
922 	10 for %trapno (no gcc equivalent)
923    It is not at all clear how we should number the FP stack registers
924    for the x86 architecture.  If the version of SDB on x86/svr4 were
925    a bit less brain dead with respect to floating-point then we would
926    have a precedent to follow with respect to DWARF register numbers
927    for x86 FP registers, but the SDB on x86/svr4 is so completely
928    broken with respect to FP registers that it is hardly worth thinking
929    of it as something to strive for compatibility with.
930    The version of x86/svr4 SDB I have at the moment does (partially)
931    seem to believe that DWARF register number 11 is associated with
932    the x86 register %st(0), but that's about all.  Higher DWARF
933    register numbers don't seem to be associated with anything in
934    particular, and even for DWARF regno 11, SDB only seems to under-
935    stand that it should say that a variable lives in %st(0) (when
936    asked via an `=' command) if we said it was in DWARF regno 11,
937    but SDB still prints garbage when asked for the value of the
938    variable in question (via a `/' command).
939    (Also note that the labels SDB prints for various FP stack regs
940    when doing an `x' command are all wrong.)
941    Note that these problems generally don't affect the native SVR4
942    C compiler because it doesn't allow the use of -O with -g and
943    because when it is *not* optimizing, it allocates a memory
944    location for each floating-point variable, and the memory
945    location is what gets described in the DWARF AT_location
946    attribute for the variable in question.
947    Regardless of the severe mental illness of the x86/svr4 SDB, we
948    do something sensible here and we use the following DWARF
949    register numbers.  Note that these are all stack-top-relative
950    numbers.
951 	11 for %st(0) (gcc regno = 8)
952 	12 for %st(1) (gcc regno = 9)
953 	13 for %st(2) (gcc regno = 10)
954 	14 for %st(3) (gcc regno = 11)
955 	15 for %st(4) (gcc regno = 12)
956 	16 for %st(5) (gcc regno = 13)
957 	17 for %st(6) (gcc regno = 14)
958 	18 for %st(7) (gcc regno = 15)
959 */
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
961 {
962   0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
963   11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
964   -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
965   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
966   29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
967   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
968   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
969 };
970 
971 /* Test and compare insns in i386.md store the information needed to
972    generate branch and scc insns here.  */
973 
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
977 
978 /* Size of the register save area.  */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
980 
981 /* Define the structure for the machine field in struct function.  */
982 
983 struct stack_local_entry GTY(())
984 {
985   unsigned short mode;
986   unsigned short n;
987   rtx rtl;
988   struct stack_local_entry *next;
989 };
990 
991 /* Structure describing stack frame layout.
992    Stack grows downward:
993 
994    [arguments]
995 					      <- ARG_POINTER
996    saved pc
997 
998    saved frame pointer if frame_pointer_needed
999 					      <- HARD_FRAME_POINTER
1000    [-msave-args]
1001 
1002    [padding0]
1003 
1004    [saved regs]
1005 
1006    [padding1]          \
1007 		        )
1008    [va_arg registers]  (
1009 		        > to_allocate	      <- FRAME_POINTER
1010    [frame]	       (
1011 		        )
1012    [padding2]	       /
1013   */
1014 struct ix86_frame
1015 {
1016   int nmsave_args;
1017   int padding0;
1018   int nregs;
1019   int padding1;
1020   int va_arg_size;
1021   HOST_WIDE_INT frame;
1022   int padding2;
1023   int outgoing_arguments_size;
1024   int red_zone_size;
1025 
1026   HOST_WIDE_INT to_allocate;
1027   /* The offsets relative to ARG_POINTER.  */
1028   HOST_WIDE_INT frame_pointer_offset;
1029   HOST_WIDE_INT hard_frame_pointer_offset;
1030   HOST_WIDE_INT stack_pointer_offset;
1031 
1032   HOST_WIDE_INT local_size;
1033 
1034   /* When save_regs_using_mov is set, emit prologue using
1035      move instead of push instructions.  */
1036   bool save_regs_using_mov;
1037 };
1038 
1039 /* Code model option.  */
1040 enum cmodel ix86_cmodel;
1041 /* Asm dialect.  */
1042 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1043 /* TLS dialects.  */
1044 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1045 
1046 /* Which unit we are generating floating point math for.  */
1047 enum fpmath_unit ix86_fpmath;
1048 
1049 /* Which cpu are we scheduling for.  */
1050 enum processor_type ix86_tune;
1051 /* Which instruction set architecture to use.  */
1052 enum processor_type ix86_arch;
1053 
1054 /* true if sse prefetch instruction is not NOOP.  */
1055 int x86_prefetch_sse;
1056 
1057 /* ix86_regparm_string as a number */
1058 static int ix86_regparm;
1059 
1060 /* -mstackrealign option */
1061 extern int ix86_force_align_arg_pointer;
1062 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1063 
1064 /* Preferred alignment for stack boundary in bits.  */
1065 unsigned int ix86_preferred_stack_boundary;
1066 
1067 /* Values 1-5: see jump.c */
1068 int ix86_branch_cost;
1069 
1070 /* Variables which are this size or smaller are put in the data/bss
1071    or ldata/lbss sections.  */
1072 
1073 int ix86_section_threshold = 65536;
1074 
1075 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1076 char internal_label_prefix[16];
1077 int internal_label_prefix_len;
1078 
1079 static bool ix86_handle_option (size_t, const char *, int);
1080 static void output_pic_addr_const (FILE *, rtx, int);
1081 static void put_condition_code (enum rtx_code, enum machine_mode,
1082 				int, int, FILE *);
1083 static const char *get_some_local_dynamic_name (void);
1084 static int get_some_local_dynamic_name_1 (rtx *, void *);
1085 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1086 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1087 						   rtx *);
1088 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1089 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1090 						   enum machine_mode);
1091 static rtx get_thread_pointer (int);
1092 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1093 static void get_pc_thunk_name (char [32], unsigned int);
1094 static rtx gen_push (rtx);
1095 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1096 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1097 static struct machine_function * ix86_init_machine_status (void);
1098 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1099 static int ix86_nsaved_regs (void);
1100 static void ix86_emit_save_regs (void);
1101 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1102 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1103 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1104 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1105 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1106 static rtx ix86_expand_aligntest (rtx, int);
1107 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1108 static int ix86_issue_rate (void);
1109 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1110 static int ia32_multipass_dfa_lookahead (void);
1111 static void ix86_init_mmx_sse_builtins (void);
1112 static rtx x86_this_parameter (tree);
1113 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1114 				 HOST_WIDE_INT, tree);
1115 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1116 static void x86_file_start (void);
1117 static void ix86_reorg (void);
1118 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1119 static tree ix86_build_builtin_va_list (void);
1120 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1121 					 tree, int *, int);
1122 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1123 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1124 static bool ix86_vector_mode_supported_p (enum machine_mode);
1125 
1126 static int ix86_address_cost (rtx);
1127 static bool ix86_cannot_force_const_mem (rtx);
1128 static rtx ix86_delegitimize_address (rtx);
1129 
1130 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1131 
1132 struct builtin_description;
1133 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1134 				 tree, rtx);
1135 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1136 				    tree, rtx);
1137 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1138 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1139 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1140 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1141 static rtx safe_vector_operand (rtx, enum machine_mode);
1142 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1143 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1144 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1145 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1146 static int ix86_fp_comparison_cost (enum rtx_code code);
1147 static unsigned int ix86_select_alt_pic_regnum (void);
1148 static int ix86_save_reg (unsigned int, int);
1149 static void ix86_compute_frame_layout (struct ix86_frame *);
1150 static int ix86_comp_type_attributes (tree, tree);
1151 static int ix86_function_regparm (tree, tree);
1152 const struct attribute_spec ix86_attribute_table[];
1153 static bool ix86_function_ok_for_sibcall (tree, tree);
1154 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1155 static int ix86_value_regno (enum machine_mode, tree, tree);
1156 static bool contains_128bit_aligned_vector_p (tree);
1157 static rtx ix86_struct_value_rtx (tree, int);
1158 static bool ix86_ms_bitfield_layout_p (tree);
1159 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1160 static int extended_reg_mentioned_1 (rtx *, void *);
1161 static bool ix86_rtx_costs (rtx, int, int, int *);
1162 static int min_insn_size (rtx);
1163 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1164 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1165 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1166 				    tree, bool);
1167 static void ix86_init_builtins (void);
1168 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1169 static const char *ix86_mangle_fundamental_type (tree);
1170 static tree ix86_stack_protect_fail (void);
1171 static rtx ix86_internal_arg_pointer (void);
1172 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1173 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
1174 
1175 /* This function is only used on Solaris.  */
1176 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1177   ATTRIBUTE_UNUSED;
1178 
1179 /* Register class used for passing given 64bit part of the argument.
1180    These represent classes as documented by the PS ABI, with the exception
1181    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1182    use SF or DFmode move instead of DImode to avoid reformatting penalties.
1183 
1184    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1185    whenever possible (upper half does contain padding).
1186  */
1187 enum x86_64_reg_class
1188   {
1189     X86_64_NO_CLASS,
1190     X86_64_INTEGER_CLASS,
1191     X86_64_INTEGERSI_CLASS,
1192     X86_64_SSE_CLASS,
1193     X86_64_SSESF_CLASS,
1194     X86_64_SSEDF_CLASS,
1195     X86_64_SSEUP_CLASS,
1196     X86_64_X87_CLASS,
1197     X86_64_X87UP_CLASS,
1198     X86_64_COMPLEX_X87_CLASS,
1199     X86_64_MEMORY_CLASS
1200   };
1201 static const char * const x86_64_reg_class_name[] = {
1202   "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1203   "sseup", "x87", "x87up", "cplx87", "no"
1204 };
1205 
1206 #define MAX_CLASSES 4
1207 
1208 /* Table of constants used by fldpi, fldln2, etc....  */
1209 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1210 static bool ext_80387_constants_init = 0;
1211 static void init_ext_80387_constants (void);
1212 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1213 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1214 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1215 static section *x86_64_elf_select_section (tree decl, int reloc,
1216 					   unsigned HOST_WIDE_INT align)
1217 					     ATTRIBUTE_UNUSED;
1218 
1219 /* Initialize the GCC target structure.  */
1220 #undef TARGET_ATTRIBUTE_TABLE
1221 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1222 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1223 #  undef TARGET_MERGE_DECL_ATTRIBUTES
1224 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1225 #endif
1226 
1227 #undef TARGET_COMP_TYPE_ATTRIBUTES
1228 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1229 
1230 #undef TARGET_INIT_BUILTINS
1231 #define TARGET_INIT_BUILTINS ix86_init_builtins
1232 #undef TARGET_EXPAND_BUILTIN
1233 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1234 
1235 #undef TARGET_ASM_FUNCTION_EPILOGUE
1236 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1237 
1238 #undef TARGET_ENCODE_SECTION_INFO
1239 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1240 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1241 #else
1242 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1243 #endif
1244 
1245 #undef TARGET_ASM_OPEN_PAREN
1246 #define TARGET_ASM_OPEN_PAREN ""
1247 #undef TARGET_ASM_CLOSE_PAREN
1248 #define TARGET_ASM_CLOSE_PAREN ""
1249 
1250 #undef TARGET_ASM_ALIGNED_HI_OP
1251 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1252 #undef TARGET_ASM_ALIGNED_SI_OP
1253 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1254 #ifdef ASM_QUAD
1255 #undef TARGET_ASM_ALIGNED_DI_OP
1256 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1257 #endif
1258 
1259 #undef TARGET_ASM_UNALIGNED_HI_OP
1260 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1261 #undef TARGET_ASM_UNALIGNED_SI_OP
1262 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1263 #undef TARGET_ASM_UNALIGNED_DI_OP
1264 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1265 
1266 #undef TARGET_SCHED_ADJUST_COST
1267 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1268 #undef TARGET_SCHED_ISSUE_RATE
1269 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1270 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1271 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1272   ia32_multipass_dfa_lookahead
1273 
1274 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1275 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1276 
1277 #ifdef HAVE_AS_TLS
1278 #undef TARGET_HAVE_TLS
1279 #define TARGET_HAVE_TLS true
1280 #endif
1281 #undef TARGET_CANNOT_FORCE_CONST_MEM
1282 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1283 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1284 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1285 
1286 #undef TARGET_DELEGITIMIZE_ADDRESS
1287 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1288 
1289 #undef TARGET_MS_BITFIELD_LAYOUT_P
1290 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1291 
1292 #if TARGET_MACHO
1293 #undef TARGET_BINDS_LOCAL_P
1294 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1295 #endif
1296 
1297 #undef TARGET_ASM_OUTPUT_MI_THUNK
1298 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1301 
1302 #undef TARGET_ASM_FILE_START
1303 #define TARGET_ASM_FILE_START x86_file_start
1304 
1305 #undef TARGET_DEFAULT_TARGET_FLAGS
1306 #define TARGET_DEFAULT_TARGET_FLAGS	\
1307   (TARGET_DEFAULT			\
1308    | TARGET_64BIT_DEFAULT		\
1309    | TARGET_SUBTARGET_DEFAULT		\
1310    | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1311 
1312 #undef TARGET_HANDLE_OPTION
1313 #define TARGET_HANDLE_OPTION ix86_handle_option
1314 
1315 #undef TARGET_RTX_COSTS
1316 #define TARGET_RTX_COSTS ix86_rtx_costs
1317 #undef TARGET_ADDRESS_COST
1318 #define TARGET_ADDRESS_COST ix86_address_cost
1319 
1320 #undef TARGET_FIXED_CONDITION_CODE_REGS
1321 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1322 #undef TARGET_CC_MODES_COMPATIBLE
1323 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1324 
1325 #undef TARGET_MACHINE_DEPENDENT_REORG
1326 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1327 
1328 #undef TARGET_BUILD_BUILTIN_VA_LIST
1329 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1330 
1331 #undef TARGET_MD_ASM_CLOBBERS
1332 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1333 
1334 #undef TARGET_PROMOTE_PROTOTYPES
1335 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1336 #undef TARGET_STRUCT_VALUE_RTX
1337 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1338 #undef TARGET_SETUP_INCOMING_VARARGS
1339 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1340 #undef TARGET_MUST_PASS_IN_STACK
1341 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1342 #undef TARGET_PASS_BY_REFERENCE
1343 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1346 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1347 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1348 
1349 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1350 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1351 
1352 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1353 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1354 
1355 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1356 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1357 
1358 #ifdef HAVE_AS_TLS
1359 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1360 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1361 #endif
1362 
1363 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1364 #undef TARGET_INSERT_ATTRIBUTES
1365 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1366 #endif
1367 
1368 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1369 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1370 
1371 #undef TARGET_STACK_PROTECT_FAIL
1372 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1373 
1374 #undef TARGET_FUNCTION_VALUE
1375 #define TARGET_FUNCTION_VALUE ix86_function_value
1376 
1377 struct gcc_target targetm = TARGET_INITIALIZER;
1378 
1379 
1380 /* The svr4 ABI for the i386 says that records and unions are returned
1381    in memory.  */
1382 #ifndef DEFAULT_PCC_STRUCT_RETURN
1383 #define DEFAULT_PCC_STRUCT_RETURN 1
1384 #endif
1385 
1386 /* Implement TARGET_HANDLE_OPTION.  */
1387 
1388 static bool
1389 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1390 {
1391   switch (code)
1392     {
1393     case OPT_m3dnow:
1394       if (!value)
1395 	{
1396 	  target_flags &= ~MASK_3DNOW_A;
1397 	  target_flags_explicit |= MASK_3DNOW_A;
1398 	}
1399       return true;
1400 
1401     case OPT_mmmx:
1402       if (!value)
1403 	{
1404 	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1405 	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1406 	}
1407       return true;
1408 
1409     case OPT_msse:
1410       if (!value)
1411 	{
1412 	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1413 	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1414 	}
1415       return true;
1416 
1417     case OPT_msse2:
1418       if (!value)
1419 	{
1420 	  target_flags &= ~MASK_SSE3;
1421 	  target_flags_explicit |= MASK_SSE3;
1422 	}
1423       return true;
1424 
1425     default:
1426       return true;
1427     }
1428 }
1429 
1430 /* Sometimes certain combinations of command options do not make
1431    sense on a particular target machine.  You can define a macro
1432    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1433    defined, is executed once just after all the command options have
1434    been parsed.
1435 
1436    Don't use this macro to turn on various extra optimizations for
1437    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1438 
1439 void
1440 override_options (void)
1441 {
1442   int i;
1443   int ix86_tune_defaulted = 0;
1444 
1445   /* Comes from final.c -- no real reason to change it.  */
1446 #define MAX_CODE_ALIGN 16
1447 
1448   static struct ptt
1449     {
1450       const struct processor_costs *cost;	/* Processor costs */
1451       const int target_enable;			/* Target flags to enable.  */
1452       const int target_disable;			/* Target flags to disable.  */
1453       const int align_loop;			/* Default alignments.  */
1454       const int align_loop_max_skip;
1455       const int align_jump;
1456       const int align_jump_max_skip;
1457       const int align_func;
1458     }
1459   const processor_target_table[PROCESSOR_max] =
1460     {
1461       {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1462       {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1463       {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1464       {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1465       {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1466       {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1467       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1468       {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1469       {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1470       {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1471       {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1472     };
1473 
1474   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1475   static struct pta
1476     {
1477       const char *const name;		/* processor name or nickname.  */
1478       const enum processor_type processor;
1479       const enum pta_flags
1480 	{
1481 	  PTA_SSE = 1,
1482 	  PTA_SSE2 = 2,
1483 	  PTA_SSE3 = 4,
1484 	  PTA_MMX = 8,
1485 	  PTA_PREFETCH_SSE = 16,
1486 	  PTA_3DNOW = 32,
1487 	  PTA_3DNOW_A = 64,
1488 	  PTA_64BIT = 128
1489 	} flags;
1490     }
1491   const processor_alias_table[] =
1492     {
1493       {"i386", PROCESSOR_I386, 0},
1494       {"i486", PROCESSOR_I486, 0},
1495       {"i586", PROCESSOR_PENTIUM, 0},
1496       {"pentium", PROCESSOR_PENTIUM, 0},
1497       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1498       {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1499       {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1500       {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1501       {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1502       {"i686", PROCESSOR_PENTIUMPRO, 0},
1503       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1504       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1505       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1506       {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1507       {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1508       {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1509 				       | PTA_MMX | PTA_PREFETCH_SSE},
1510       {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1511 				        | PTA_MMX | PTA_PREFETCH_SSE},
1512       {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1513 				        | PTA_MMX | PTA_PREFETCH_SSE},
1514       {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1515 				        | PTA_MMX | PTA_PREFETCH_SSE},
1516       {"k6", PROCESSOR_K6, PTA_MMX},
1517       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1518       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1519       {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1520 				   | PTA_3DNOW_A},
1521       {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1522 					 | PTA_3DNOW | PTA_3DNOW_A},
1523       {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1524 				    | PTA_3DNOW_A | PTA_SSE},
1525       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1526 				      | PTA_3DNOW_A | PTA_SSE},
1527       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1528 				      | PTA_3DNOW_A | PTA_SSE},
1529       {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1530 			       | PTA_SSE | PTA_SSE2 },
1531       {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1532 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1533       {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1534 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1535       {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1536 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1537       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1538 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1539       {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1540       {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1541     };
1542 
1543   int const pta_size = ARRAY_SIZE (processor_alias_table);
1544 
1545 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1546   SUBTARGET_OVERRIDE_OPTIONS;
1547 #endif
1548 
1549 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1550   SUBSUBTARGET_OVERRIDE_OPTIONS;
1551 #endif
1552 
1553   /* -fPIC is the default for x86_64.  */
1554   if (TARGET_MACHO && TARGET_64BIT)
1555     flag_pic = 2;
1556 
1557   /* Set the default values for switches whose default depends on TARGET_64BIT
1558      in case they weren't overwritten by command line options.  */
1559   if (TARGET_64BIT)
1560     {
1561       /* Mach-O doesn't support omitting the frame pointer for now.  */
1562       if (flag_omit_frame_pointer == 2)
1563 	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1564       if (flag_asynchronous_unwind_tables == 2)
1565 	flag_asynchronous_unwind_tables = 1;
1566       if (flag_pcc_struct_return == 2)
1567 	flag_pcc_struct_return = 0;
1568     }
1569   else
1570     {
1571       if (flag_omit_frame_pointer == 2)
1572 	flag_omit_frame_pointer = 0;
1573       if (flag_asynchronous_unwind_tables == 2)
1574 	flag_asynchronous_unwind_tables = 0;
1575       if (flag_pcc_struct_return == 2)
1576 	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1577     }
1578 
1579   /* Need to check -mtune=generic first.  */
1580   if (ix86_tune_string)
1581     {
1582       if (!strcmp (ix86_tune_string, "generic")
1583 	  || !strcmp (ix86_tune_string, "i686")
1584 	  /* As special support for cross compilers we read -mtune=native
1585 	     as -mtune=generic.  With native compilers we won't see the
1586 	     -mtune=native, as it was changed by the driver.  */
1587 	  || !strcmp (ix86_tune_string, "native"))
1588 	{
1589 	  if (TARGET_64BIT)
1590 	    ix86_tune_string = "generic64";
1591 	  else
1592 	    ix86_tune_string = "generic32";
1593 	}
1594       else if (!strncmp (ix86_tune_string, "generic", 7))
1595 	error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1596     }
1597   else
1598     {
1599       if (ix86_arch_string)
1600 	ix86_tune_string = ix86_arch_string;
1601       if (!ix86_tune_string)
1602 	{
1603 	  ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1604 	  ix86_tune_defaulted = 1;
1605 	}
1606 
1607       /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1608 	 need to use a sensible tune option.  */
1609       if (!strcmp (ix86_tune_string, "generic")
1610 	  || !strcmp (ix86_tune_string, "x86-64")
1611 	  || !strcmp (ix86_tune_string, "i686"))
1612 	{
1613 	  if (TARGET_64BIT)
1614 	    ix86_tune_string = "generic64";
1615 	  else
1616 	    ix86_tune_string = "generic32";
1617 	}
1618     }
1619   if (!strcmp (ix86_tune_string, "x86-64"))
1620     warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1621 	     "-mtune=generic instead as appropriate.");
1622 
1623   if (!ix86_arch_string)
1624     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1625   if (!strcmp (ix86_arch_string, "generic"))
1626     error ("generic CPU can be used only for -mtune= switch");
1627   if (!strncmp (ix86_arch_string, "generic", 7))
1628     error ("bad value (%s) for -march= switch", ix86_arch_string);
1629 
1630   if (ix86_cmodel_string != 0)
1631     {
1632       if (!strcmp (ix86_cmodel_string, "small"))
1633 	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1634       else if (!strcmp (ix86_cmodel_string, "medium"))
1635 	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1636       else if (flag_pic)
1637 	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1638       else if (!strcmp (ix86_cmodel_string, "32"))
1639 	ix86_cmodel = CM_32;
1640       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1641 	ix86_cmodel = CM_KERNEL;
1642       else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1643 	ix86_cmodel = CM_LARGE;
1644       else
1645 	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1646     }
1647   else
1648     {
1649       ix86_cmodel = CM_32;
1650       if (TARGET_64BIT)
1651 	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1652     }
1653   if (ix86_asm_string != 0)
1654     {
1655       if (! TARGET_MACHO
1656 	  && !strcmp (ix86_asm_string, "intel"))
1657 	ix86_asm_dialect = ASM_INTEL;
1658       else if (!strcmp (ix86_asm_string, "att"))
1659 	ix86_asm_dialect = ASM_ATT;
1660       else
1661 	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1662     }
1663   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1664     error ("code model %qs not supported in the %s bit mode",
1665 	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1666   if (ix86_cmodel == CM_LARGE)
1667     sorry ("code model %<large%> not supported yet");
1668   if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1669     sorry ("%i-bit mode not compiled in",
1670 	   (target_flags & MASK_64BIT) ? 64 : 32);
1671 
1672   for (i = 0; i < pta_size; i++)
1673     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1674       {
1675 	ix86_arch = processor_alias_table[i].processor;
1676 	/* Default cpu tuning to the architecture.  */
1677 	ix86_tune = ix86_arch;
1678 	if (processor_alias_table[i].flags & PTA_MMX
1679 	    && !(target_flags_explicit & MASK_MMX))
1680 	  target_flags |= MASK_MMX;
1681 	if (processor_alias_table[i].flags & PTA_3DNOW
1682 	    && !(target_flags_explicit & MASK_3DNOW))
1683 	  target_flags |= MASK_3DNOW;
1684 	if (processor_alias_table[i].flags & PTA_3DNOW_A
1685 	    && !(target_flags_explicit & MASK_3DNOW_A))
1686 	  target_flags |= MASK_3DNOW_A;
1687 	if (processor_alias_table[i].flags & PTA_SSE
1688 	    && !(target_flags_explicit & MASK_SSE))
1689 	  target_flags |= MASK_SSE;
1690 	if (processor_alias_table[i].flags & PTA_SSE2
1691 	    && !(target_flags_explicit & MASK_SSE2))
1692 	  target_flags |= MASK_SSE2;
1693 	if (processor_alias_table[i].flags & PTA_SSE3
1694 	    && !(target_flags_explicit & MASK_SSE3))
1695 	  target_flags |= MASK_SSE3;
1696 	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1697 	  x86_prefetch_sse = true;
1698 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1699 	  error ("CPU you selected does not support x86-64 "
1700 		 "instruction set");
1701 	break;
1702       }
1703 
1704   if (i == pta_size)
1705     error ("bad value (%s) for -march= switch", ix86_arch_string);
1706 
1707   for (i = 0; i < pta_size; i++)
1708     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1709       {
1710 	ix86_tune = processor_alias_table[i].processor;
1711 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1712 	  {
1713 	    if (ix86_tune_defaulted)
1714 	      {
1715 		ix86_tune_string = "x86-64";
1716 		for (i = 0; i < pta_size; i++)
1717 		  if (! strcmp (ix86_tune_string,
1718 				processor_alias_table[i].name))
1719 		    break;
1720 		ix86_tune = processor_alias_table[i].processor;
1721 	      }
1722 	    else
1723 	      error ("CPU you selected does not support x86-64 "
1724 		     "instruction set");
1725 	  }
1726         /* Intel CPUs have always interpreted SSE prefetch instructions as
1727 	   NOPs; so, we can enable SSE prefetch instructions even when
1728 	   -mtune (rather than -march) points us to a processor that has them.
1729 	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1730 	   higher processors.  */
1731 	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1732 	  x86_prefetch_sse = true;
1733 	break;
1734       }
1735   if (i == pta_size)
1736     error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1737 
1738   if (optimize_size)
1739     ix86_cost = &size_cost;
1740   else
1741     ix86_cost = processor_target_table[ix86_tune].cost;
1742   target_flags |= processor_target_table[ix86_tune].target_enable;
1743   target_flags &= ~processor_target_table[ix86_tune].target_disable;
1744 
1745   /* Arrange to set up i386_stack_locals for all functions.  */
1746   init_machine_status = ix86_init_machine_status;
1747 
1748   /* Validate -mregparm= value.  */
1749   if (ix86_regparm_string)
1750     {
1751       i = atoi (ix86_regparm_string);
1752       if (i < 0 || i > REGPARM_MAX)
1753 	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1754       else
1755 	ix86_regparm = i;
1756     }
1757   else
1758    if (TARGET_64BIT)
1759      ix86_regparm = REGPARM_MAX;
1760 
1761   /* If the user has provided any of the -malign-* options,
1762      warn and use that value only if -falign-* is not set.
1763      Remove this code in GCC 3.2 or later.  */
1764   if (ix86_align_loops_string)
1765     {
1766       warning (0, "-malign-loops is obsolete, use -falign-loops");
1767       if (align_loops == 0)
1768 	{
1769 	  i = atoi (ix86_align_loops_string);
1770 	  if (i < 0 || i > MAX_CODE_ALIGN)
1771 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1772 	  else
1773 	    align_loops = 1 << i;
1774 	}
1775     }
1776 
1777   if (ix86_align_jumps_string)
1778     {
1779       warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1780       if (align_jumps == 0)
1781 	{
1782 	  i = atoi (ix86_align_jumps_string);
1783 	  if (i < 0 || i > MAX_CODE_ALIGN)
1784 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1785 	  else
1786 	    align_jumps = 1 << i;
1787 	}
1788     }
1789 
1790   if (ix86_align_funcs_string)
1791     {
1792       warning (0, "-malign-functions is obsolete, use -falign-functions");
1793       if (align_functions == 0)
1794 	{
1795 	  i = atoi (ix86_align_funcs_string);
1796 	  if (i < 0 || i > MAX_CODE_ALIGN)
1797 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1798 	  else
1799 	    align_functions = 1 << i;
1800 	}
1801     }
1802 
1803   /* Default align_* from the processor table.  */
1804   if (align_loops == 0)
1805     {
1806       align_loops = processor_target_table[ix86_tune].align_loop;
1807       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1808     }
1809   if (align_jumps == 0)
1810     {
1811       align_jumps = processor_target_table[ix86_tune].align_jump;
1812       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1813     }
1814   if (align_functions == 0)
1815     {
1816       align_functions = processor_target_table[ix86_tune].align_func;
1817     }
1818 
1819   /* Validate -mbranch-cost= value, or provide default.  */
1820   ix86_branch_cost = ix86_cost->branch_cost;
1821   if (ix86_branch_cost_string)
1822     {
1823       i = atoi (ix86_branch_cost_string);
1824       if (i < 0 || i > 5)
1825 	error ("-mbranch-cost=%d is not between 0 and 5", i);
1826       else
1827 	ix86_branch_cost = i;
1828     }
1829   if (ix86_section_threshold_string)
1830     {
1831       i = atoi (ix86_section_threshold_string);
1832       if (i < 0)
1833 	error ("-mlarge-data-threshold=%d is negative", i);
1834       else
1835 	ix86_section_threshold = i;
1836     }
1837 
1838   if (ix86_tls_dialect_string)
1839     {
1840       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1841 	ix86_tls_dialect = TLS_DIALECT_GNU;
1842       else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1843 	ix86_tls_dialect = TLS_DIALECT_GNU2;
1844       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1845 	ix86_tls_dialect = TLS_DIALECT_SUN;
1846       else
1847 	error ("bad value (%s) for -mtls-dialect= switch",
1848 	       ix86_tls_dialect_string);
1849     }
1850 
1851   /* Keep nonleaf frame pointers.  */
1852   if (flag_omit_frame_pointer)
1853     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1854   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1855     flag_omit_frame_pointer = 1;
1856 
1857   /* If we're doing fast math, we don't care about comparison order
1858      wrt NaNs.  This lets us use a shorter comparison sequence.  */
1859   if (flag_finite_math_only)
1860     target_flags &= ~MASK_IEEE_FP;
1861 
1862   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1863      since the insns won't need emulation.  */
1864   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1865     target_flags &= ~MASK_NO_FANCY_MATH_387;
1866 
1867   /* Likewise, if the target doesn't have a 387, or we've specified
1868      software floating point, don't use 387 inline intrinsics.  */
1869   if (!TARGET_80387)
1870     target_flags |= MASK_NO_FANCY_MATH_387;
1871 
1872   /* Turn on SSE2 builtins for -msse3.  */
1873   if (TARGET_SSE3)
1874     target_flags |= MASK_SSE2;
1875 
1876   /* Turn on SSE builtins for -msse2.  */
1877   if (TARGET_SSE2)
1878     target_flags |= MASK_SSE;
1879 
1880   /* Turn on MMX builtins for -msse.  */
1881   if (TARGET_SSE)
1882     {
1883       target_flags |= MASK_MMX & ~target_flags_explicit;
1884       x86_prefetch_sse = true;
1885     }
1886 
1887   /* Turn on MMX builtins for 3Dnow.  */
1888   if (TARGET_3DNOW)
1889     target_flags |= MASK_MMX;
1890 
1891   if (TARGET_64BIT)
1892     {
1893       if (TARGET_ALIGN_DOUBLE)
1894 	error ("-malign-double makes no sense in the 64bit mode");
1895       if (TARGET_RTD)
1896 	error ("-mrtd calling convention not supported in the 64bit mode");
1897 
1898       /* Enable by default the SSE and MMX builtins.  Do allow the user to
1899 	 explicitly disable any of these.  In particular, disabling SSE and
1900 	 MMX for kernel code is extremely useful.  */
1901       target_flags
1902 	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1903 	    & ~target_flags_explicit);
1904      }
1905   else
1906     {
1907       if (TARGET_SAVE_ARGS)
1908         error ("-msave-args makes no sense in the 32-bit mode");
1909       /* i386 ABI does not specify red zone.  It still makes sense to use it
1910          when programmer takes care to stack from being destroyed.  */
1911       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1912         target_flags |= MASK_NO_RED_ZONE;
1913     }
1914 
1915   /* Validate -mpreferred-stack-boundary= value, or provide default.
1916      The default of 128 bits is for Pentium III's SSE __m128.  We can't
1917      change it because of optimize_size.  Otherwise, we can't mix object
1918      files compiled with -Os and -On.  */
1919   ix86_preferred_stack_boundary = 128;
1920   if (ix86_preferred_stack_boundary_string)
1921     {
1922       i = atoi (ix86_preferred_stack_boundary_string);
1923       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1924 	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1925 	       TARGET_64BIT ? 4 : 2);
1926       else
1927 	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1928     }
1929 
1930   /* Accept -msseregparm only if at least SSE support is enabled.  */
1931   if (TARGET_SSEREGPARM
1932       && ! TARGET_SSE)
1933     error ("-msseregparm used without SSE enabled");
1934 
1935   ix86_fpmath = TARGET_FPMATH_DEFAULT;
1936 
1937   if (ix86_fpmath_string != 0)
1938     {
1939       if (! strcmp (ix86_fpmath_string, "387"))
1940 	ix86_fpmath = FPMATH_387;
1941       else if (! strcmp (ix86_fpmath_string, "sse"))
1942 	{
1943 	  if (!TARGET_SSE)
1944 	    {
1945 	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1946 	      ix86_fpmath = FPMATH_387;
1947 	    }
1948 	  else
1949 	    ix86_fpmath = FPMATH_SSE;
1950 	}
1951       else if (! strcmp (ix86_fpmath_string, "387,sse")
1952 	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1953 	{
1954 	  if (!TARGET_SSE)
1955 	    {
1956 	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1957 	      ix86_fpmath = FPMATH_387;
1958 	    }
1959 	  else if (!TARGET_80387)
1960 	    {
1961 	      warning (0, "387 instruction set disabled, using SSE arithmetics");
1962 	      ix86_fpmath = FPMATH_SSE;
1963 	    }
1964 	  else
1965 	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1966 	}
1967       else
1968 	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1969     }
1970 
1971   /* If the i387 is disabled, then do not return values in it. */
1972   if (!TARGET_80387)
1973     target_flags &= ~MASK_FLOAT_RETURNS;
1974 
1975   if ((x86_accumulate_outgoing_args & TUNEMASK)
1976       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1977       && !optimize_size)
1978     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1979 
1980   /* ??? Unwind info is not correct around the CFG unless either a frame
1981      pointer is present or M_A_O_A is set.  Fixing this requires rewriting
1982      unwind info generation to be aware of the CFG and propagating states
1983      around edges.  */
1984   if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1985        || flag_exceptions || flag_non_call_exceptions)
1986       && flag_omit_frame_pointer
1987       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1988     {
1989       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1990 	warning (0, "unwind tables currently require either a frame pointer "
1991 		 "or -maccumulate-outgoing-args for correctness");
1992       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1993     }
1994 
1995   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1996   {
1997     char *p;
1998     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1999     p = strchr (internal_label_prefix, 'X');
2000     internal_label_prefix_len = p - internal_label_prefix;
2001     *p = '\0';
2002   }
2003 
2004   /* When scheduling description is not available, disable scheduler pass
2005      so it won't slow down the compilation and make x87 code slower.  */
2006   if (!TARGET_SCHEDULE)
2007     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2008 }
2009 
2010 /* switch to the appropriate section for output of DECL.
2011    DECL is either a `VAR_DECL' node or a constant of some sort.
2012    RELOC indicates whether forming the initial value of DECL requires
2013    link-time relocations.  */
2014 
2015 static section *
2016 x86_64_elf_select_section (tree decl, int reloc,
2017 			   unsigned HOST_WIDE_INT align)
2018 {
2019   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2020       && ix86_in_large_data_p (decl))
2021     {
2022       const char *sname = NULL;
2023       unsigned int flags = SECTION_WRITE;
2024       switch (categorize_decl_for_section (decl, reloc))
2025 	{
2026 	case SECCAT_DATA:
2027 	  sname = ".ldata";
2028 	  break;
2029 	case SECCAT_DATA_REL:
2030 	  sname = ".ldata.rel";
2031 	  break;
2032 	case SECCAT_DATA_REL_LOCAL:
2033 	  sname = ".ldata.rel.local";
2034 	  break;
2035 	case SECCAT_DATA_REL_RO:
2036 	  sname = ".ldata.rel.ro";
2037 	  break;
2038 	case SECCAT_DATA_REL_RO_LOCAL:
2039 	  sname = ".ldata.rel.ro.local";
2040 	  break;
2041 	case SECCAT_BSS:
2042 	  sname = ".lbss";
2043 	  flags |= SECTION_BSS;
2044 	  break;
2045 	case SECCAT_RODATA:
2046 	case SECCAT_RODATA_MERGE_STR:
2047 	case SECCAT_RODATA_MERGE_STR_INIT:
2048 	case SECCAT_RODATA_MERGE_CONST:
2049 	  sname = ".lrodata";
2050 	  flags = 0;
2051 	  break;
2052 	case SECCAT_SRODATA:
2053 	case SECCAT_SDATA:
2054 	case SECCAT_SBSS:
2055 	  gcc_unreachable ();
2056 	case SECCAT_TEXT:
2057 	case SECCAT_TDATA:
2058 	case SECCAT_TBSS:
2059 	  /* We don't split these for medium model.  Place them into
2060 	     default sections and hope for best.  */
2061 	  break;
2062 	}
2063       if (sname)
2064 	{
2065 	  /* We might get called with string constants, but get_named_section
2066 	     doesn't like them as they are not DECLs.  Also, we need to set
2067 	     flags in that case.  */
2068 	  if (!DECL_P (decl))
2069 	    return get_section (sname, flags, NULL);
2070 	  return get_named_section (decl, sname, reloc);
2071 	}
2072     }
2073   return default_elf_select_section (decl, reloc, align);
2074 }
2075 
2076 /* Build up a unique section name, expressed as a
2077    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2078    RELOC indicates whether the initial value of EXP requires
2079    link-time relocations.  */
2080 
2081 static void
2082 x86_64_elf_unique_section (tree decl, int reloc)
2083 {
2084   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2085       && ix86_in_large_data_p (decl))
2086     {
2087       const char *prefix = NULL;
2088       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2089       bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2090 
2091       switch (categorize_decl_for_section (decl, reloc))
2092 	{
2093 	case SECCAT_DATA:
2094 	case SECCAT_DATA_REL:
2095 	case SECCAT_DATA_REL_LOCAL:
2096 	case SECCAT_DATA_REL_RO:
2097 	case SECCAT_DATA_REL_RO_LOCAL:
2098           prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2099 	  break;
2100 	case SECCAT_BSS:
2101           prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2102 	  break;
2103 	case SECCAT_RODATA:
2104 	case SECCAT_RODATA_MERGE_STR:
2105 	case SECCAT_RODATA_MERGE_STR_INIT:
2106 	case SECCAT_RODATA_MERGE_CONST:
2107           prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2108 	  break;
2109 	case SECCAT_SRODATA:
2110 	case SECCAT_SDATA:
2111 	case SECCAT_SBSS:
2112 	  gcc_unreachable ();
2113 	case SECCAT_TEXT:
2114 	case SECCAT_TDATA:
2115 	case SECCAT_TBSS:
2116 	  /* We don't split these for medium model.  Place them into
2117 	     default sections and hope for best.  */
2118 	  break;
2119 	}
2120       if (prefix)
2121 	{
2122 	  const char *name;
2123 	  size_t nlen, plen;
2124 	  char *string;
2125 	  plen = strlen (prefix);
2126 
2127 	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2128 	  name = targetm.strip_name_encoding (name);
2129 	  nlen = strlen (name);
2130 
2131 	  string = alloca (nlen + plen + 1);
2132 	  memcpy (string, prefix, plen);
2133 	  memcpy (string + plen, name, nlen + 1);
2134 
2135 	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2136 	  return;
2137 	}
2138     }
2139   default_unique_section (decl, reloc);
2140 }
2141 
2142 #ifdef COMMON_ASM_OP
2143 /* This says how to output assembler code to declare an
2144    uninitialized external linkage data object.
2145 
2146    For medium model x86-64 we need to use .largecomm opcode for
2147    large objects.  */
2148 void
2149 x86_elf_aligned_common (FILE *file,
2150 			const char *name, unsigned HOST_WIDE_INT size,
2151 			int align)
2152 {
2153   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2154       && size > (unsigned int)ix86_section_threshold)
2155     fprintf (file, ".largecomm\t");
2156   else
2157     fprintf (file, "%s", COMMON_ASM_OP);
2158   assemble_name (file, name);
2159   fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2160 	   size, align / BITS_PER_UNIT);
2161 }
2162 
2163 /* Utility function for targets to use in implementing
2164    ASM_OUTPUT_ALIGNED_BSS.  */
2165 
2166 void
2167 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2168 			const char *name, unsigned HOST_WIDE_INT size,
2169 			int align)
2170 {
2171   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2172       && size > (unsigned int)ix86_section_threshold)
2173     switch_to_section (get_named_section (decl, ".lbss", 0));
2174   else
2175     switch_to_section (bss_section);
2176   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2177 #ifdef ASM_DECLARE_OBJECT_NAME
2178   last_assemble_variable_decl = decl;
2179   ASM_DECLARE_OBJECT_NAME (file, name, decl);
2180 #else
2181   /* Standard thing is just output label for the object.  */
2182   ASM_OUTPUT_LABEL (file, name);
2183 #endif /* ASM_DECLARE_OBJECT_NAME */
2184   ASM_OUTPUT_SKIP (file, size ? size : 1);
2185 }
2186 #endif
2187 
2188 void
2189 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2190 {
2191   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2192      make the problem with not enough registers even worse.  */
2193 #ifdef INSN_SCHEDULING
2194   if (level > 1)
2195     flag_schedule_insns = 0;
2196 #endif
2197 
2198   if (TARGET_MACHO)
2199     /* The Darwin libraries never set errno, so we might as well
2200        avoid calling them when that's the only reason we would.  */
2201     flag_errno_math = 0;
2202 
2203   /* The default values of these switches depend on the TARGET_64BIT
2204      that is not known at this moment.  Mark these values with 2 and
2205      let user the to override these.  In case there is no command line option
2206      specifying them, we will set the defaults in override_options.  */
2207   if (optimize >= 1)
2208     flag_omit_frame_pointer = 2;
2209   flag_pcc_struct_return = 2;
2210   flag_asynchronous_unwind_tables = 2;
2211 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2212   SUBTARGET_OPTIMIZATION_OPTIONS;
2213 #endif
2214 }
2215 
2216 /* Table of valid machine attributes.  */
2217 const struct attribute_spec ix86_attribute_table[] =
2218 {
2219   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2220   /* Stdcall attribute says callee is responsible for popping arguments
2221      if they are not variable.  */
2222   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2223   /* Fastcall attribute says callee is responsible for popping arguments
2224      if they are not variable.  */
2225   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2226   /* Cdecl attribute says the callee is a normal C declaration */
2227   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2228   /* Regparm attribute specifies how many integer arguments are to be
2229      passed in registers.  */
2230   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2231   /* Sseregparm attribute says we are using x86_64 calling conventions
2232      for FP arguments.  */
2233   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2234   /* force_align_arg_pointer says this function realigns the stack at entry.  */
2235   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2236     false, true,  true, ix86_handle_cconv_attribute },
2237 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2238   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2239   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2240   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2241 #endif
2242   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2243   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2244 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2245   SUBTARGET_ATTRIBUTE_TABLE,
2246 #endif
2247   { NULL,        0, 0, false, false, false, NULL }
2248 };
2249 
2250 /* Decide whether we can make a sibling call to a function.  DECL is the
2251    declaration of the function being targeted by the call and EXP is the
2252    CALL_EXPR representing the call.  */
2253 
2254 static bool
2255 ix86_function_ok_for_sibcall (tree decl, tree exp)
2256 {
2257   tree func;
2258   rtx a, b;
2259 
2260   /* If we are generating position-independent code, we cannot sibcall
2261      optimize any indirect call, or a direct call to a global function,
2262      as the PLT requires %ebx be live.  */
2263   if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2264     return false;
2265 
2266   if (decl)
2267     func = decl;
2268   else
2269     {
2270       func = TREE_TYPE (TREE_OPERAND (exp, 0));
2271       if (POINTER_TYPE_P (func))
2272         func = TREE_TYPE (func);
2273     }
2274 
2275   /* Check that the return value locations are the same.  Like
2276      if we are returning floats on the 80387 register stack, we cannot
2277      make a sibcall from a function that doesn't return a float to a
2278      function that does or, conversely, from a function that does return
2279      a float to a function that doesn't; the necessary stack adjustment
2280      would not be executed.  This is also the place we notice
2281      differences in the return value ABI.  Note that it is ok for one
2282      of the functions to have void return type as long as the return
2283      value of the other is passed in a register.  */
2284   a = ix86_function_value (TREE_TYPE (exp), func, false);
2285   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2286 			   cfun->decl, false);
2287   if (STACK_REG_P (a) || STACK_REG_P (b))
2288     {
2289       if (!rtx_equal_p (a, b))
2290 	return false;
2291     }
2292   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2293     ;
2294   else if (!rtx_equal_p (a, b))
2295     return false;
2296 
2297   /* If this call is indirect, we'll need to be able to use a call-clobbered
2298      register for the address of the target function.  Make sure that all
2299      such registers are not used for passing parameters.  */
2300   if (!decl && !TARGET_64BIT)
2301     {
2302       tree type;
2303 
2304       /* We're looking at the CALL_EXPR, we need the type of the function.  */
2305       type = TREE_OPERAND (exp, 0);		/* pointer expression */
2306       type = TREE_TYPE (type);			/* pointer type */
2307       type = TREE_TYPE (type);			/* function type */
2308 
2309       if (ix86_function_regparm (type, NULL) >= 3)
2310 	{
2311 	  /* ??? Need to count the actual number of registers to be used,
2312 	     not the possible number of registers.  Fix later.  */
2313 	  return false;
2314 	}
2315     }
2316 
2317 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2318   /* Dllimport'd functions are also called indirectly.  */
2319   if (decl && DECL_DLLIMPORT_P (decl)
2320       && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2321     return false;
2322 #endif
2323 
2324   /* If we forced aligned the stack, then sibcalling would unalign the
2325      stack, which may break the called function.  */
2326   if (cfun->machine->force_align_arg_pointer)
2327     return false;
2328 
2329   /* Otherwise okay.  That also includes certain types of indirect calls.  */
2330   return true;
2331 }
2332 
2333 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2334    calling convention attributes;
2335    arguments as in struct attribute_spec.handler.  */
2336 
2337 static tree
2338 ix86_handle_cconv_attribute (tree *node, tree name,
2339 				   tree args,
2340 				   int flags ATTRIBUTE_UNUSED,
2341 				   bool *no_add_attrs)
2342 {
2343   if (TREE_CODE (*node) != FUNCTION_TYPE
2344       && TREE_CODE (*node) != METHOD_TYPE
2345       && TREE_CODE (*node) != FIELD_DECL
2346       && TREE_CODE (*node) != TYPE_DECL)
2347     {
2348       warning (OPT_Wattributes, "%qs attribute only applies to functions",
2349 	       IDENTIFIER_POINTER (name));
2350       *no_add_attrs = true;
2351       return NULL_TREE;
2352     }
2353 
2354   /* Can combine regparm with all attributes but fastcall.  */
2355   if (is_attribute_p ("regparm", name))
2356     {
2357       tree cst;
2358 
2359       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2360         {
2361 	  error ("fastcall and regparm attributes are not compatible");
2362 	}
2363 
2364       cst = TREE_VALUE (args);
2365       if (TREE_CODE (cst) != INTEGER_CST)
2366 	{
2367 	  warning (OPT_Wattributes,
2368 		   "%qs attribute requires an integer constant argument",
2369 		   IDENTIFIER_POINTER (name));
2370 	  *no_add_attrs = true;
2371 	}
2372       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2373 	{
2374 	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2375 		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2376 	  *no_add_attrs = true;
2377 	}
2378 
2379       if (!TARGET_64BIT
2380 	  && lookup_attribute (ix86_force_align_arg_pointer_string,
2381 			       TYPE_ATTRIBUTES (*node))
2382 	  && compare_tree_int (cst, REGPARM_MAX-1))
2383 	{
2384 	  error ("%s functions limited to %d register parameters",
2385 		 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2386 	}
2387 
2388       return NULL_TREE;
2389     }
2390 
2391   if (TARGET_64BIT)
2392     {
2393       warning (OPT_Wattributes, "%qs attribute ignored",
2394 	       IDENTIFIER_POINTER (name));
2395       *no_add_attrs = true;
2396       return NULL_TREE;
2397     }
2398 
2399   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2400   if (is_attribute_p ("fastcall", name))
2401     {
2402       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2403         {
2404 	  error ("fastcall and cdecl attributes are not compatible");
2405 	}
2406       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2407         {
2408 	  error ("fastcall and stdcall attributes are not compatible");
2409 	}
2410       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2411         {
2412 	  error ("fastcall and regparm attributes are not compatible");
2413 	}
2414     }
2415 
2416   /* Can combine stdcall with fastcall (redundant), regparm and
2417      sseregparm.  */
2418   else if (is_attribute_p ("stdcall", name))
2419     {
2420       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2421         {
2422 	  error ("stdcall and cdecl attributes are not compatible");
2423 	}
2424       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2425         {
2426 	  error ("stdcall and fastcall attributes are not compatible");
2427 	}
2428     }
2429 
2430   /* Can combine cdecl with regparm and sseregparm.  */
2431   else if (is_attribute_p ("cdecl", name))
2432     {
2433       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2434         {
2435 	  error ("stdcall and cdecl attributes are not compatible");
2436 	}
2437       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2438         {
2439 	  error ("fastcall and cdecl attributes are not compatible");
2440 	}
2441     }
2442 
2443   /* Can combine sseregparm with all attributes.  */
2444 
2445   return NULL_TREE;
2446 }
2447 
2448 /* Return 0 if the attributes for two types are incompatible, 1 if they
2449    are compatible, and 2 if they are nearly compatible (which causes a
2450    warning to be generated).  */
2451 
2452 static int
2453 ix86_comp_type_attributes (tree type1, tree type2)
2454 {
2455   /* Check for mismatch of non-default calling convention.  */
2456   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2457 
2458   if (TREE_CODE (type1) != FUNCTION_TYPE)
2459     return 1;
2460 
2461   /* Check for mismatched fastcall/regparm types.  */
2462   if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2463        != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2464       || (ix86_function_regparm (type1, NULL)
2465 	  != ix86_function_regparm (type2, NULL)))
2466     return 0;
2467 
2468   /* Check for mismatched sseregparm types.  */
2469   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2470       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2471     return 0;
2472 
2473   /* Check for mismatched return types (cdecl vs stdcall).  */
2474   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2475       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2476     return 0;
2477 
2478   return 1;
2479 }
2480 
2481 /* Return the regparm value for a function with the indicated TYPE and DECL.
2482    DECL may be NULL when calling function indirectly
2483    or considering a libcall.  */
2484 
2485 static int
2486 ix86_function_regparm (tree type, tree decl)
2487 {
2488   tree attr;
2489   int regparm = ix86_regparm;
2490   bool user_convention = false;
2491 
2492   if (!TARGET_64BIT)
2493     {
2494       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2495       if (attr)
2496 	{
2497 	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2498 	  user_convention = true;
2499 	}
2500 
2501       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2502 	{
2503 	  regparm = 2;
2504 	  user_convention = true;
2505 	}
2506 
2507       /* Use register calling convention for local functions when possible.  */
2508       if (!TARGET_64BIT && !user_convention && decl
2509 	  && flag_unit_at_a_time && !profile_flag)
2510 	{
2511 	  struct cgraph_local_info *i = cgraph_local_info (decl);
2512 	  if (i && i->local)
2513 	    {
2514 	      int local_regparm, globals = 0, regno;
2515 
2516 	      /* Make sure no regparm register is taken by a global register
2517 		 variable.  */
2518 	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2519 		if (global_regs[local_regparm])
2520 		  break;
2521 	      /* We can't use regparm(3) for nested functions as these use
2522 		 static chain pointer in third argument.  */
2523 	      if (local_regparm == 3
2524 		  && decl_function_context (decl)
2525 		  && !DECL_NO_STATIC_CHAIN (decl))
2526 		local_regparm = 2;
2527 	      /* If the function realigns its stackpointer, the
2528 		 prologue will clobber %ecx.  If we've already
2529 		 generated code for the callee, the callee
2530 		 DECL_STRUCT_FUNCTION is gone, so we fall back to
2531 		 scanning the attributes for the self-realigning
2532 		 property.  */
2533 	      if ((DECL_STRUCT_FUNCTION (decl)
2534 		   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2535 		  || (!DECL_STRUCT_FUNCTION (decl)
2536 		      && lookup_attribute (ix86_force_align_arg_pointer_string,
2537 					   TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2538 		local_regparm = 2;
2539 	      /* Each global register variable increases register preassure,
2540 		 so the more global reg vars there are, the smaller regparm
2541 		 optimization use, unless requested by the user explicitly.  */
2542 	      for (regno = 0; regno < 6; regno++)
2543 		if (global_regs[regno])
2544 		  globals++;
2545 	      local_regparm
2546 		= globals < local_regparm ? local_regparm - globals : 0;
2547 
2548 	      if (local_regparm > regparm)
2549 		regparm = local_regparm;
2550 	    }
2551 	}
2552     }
2553   return regparm;
2554 }
2555 
2556 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2557    DFmode (2) arguments in SSE registers for a function with the
2558    indicated TYPE and DECL.  DECL may be NULL when calling function
2559    indirectly or considering a libcall.  Otherwise return 0.  */
2560 
2561 static int
2562 ix86_function_sseregparm (tree type, tree decl)
2563 {
2564   /* Use SSE registers to pass SFmode and DFmode arguments if requested
2565      by the sseregparm attribute.  */
2566   if (TARGET_SSEREGPARM
2567       || (type
2568 	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2569     {
2570       if (!TARGET_SSE)
2571 	{
2572 	  if (decl)
2573 	    error ("Calling %qD with attribute sseregparm without "
2574 		   "SSE/SSE2 enabled", decl);
2575 	  else
2576 	    error ("Calling %qT with attribute sseregparm without "
2577 		   "SSE/SSE2 enabled", type);
2578 	  return 0;
2579 	}
2580 
2581       return 2;
2582     }
2583 
2584   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2585      (and DFmode for SSE2) arguments in SSE registers,
2586      even for 32-bit targets.  */
2587   if (!TARGET_64BIT && decl
2588       && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2589     {
2590       struct cgraph_local_info *i = cgraph_local_info (decl);
2591       if (i && i->local)
2592 	return TARGET_SSE2 ? 2 : 1;
2593     }
2594 
2595   return 0;
2596 }
2597 
2598 /* Return true if EAX is live at the start of the function.  Used by
2599    ix86_expand_prologue to determine if we need special help before
2600    calling allocate_stack_worker.  */
2601 
2602 static bool
2603 ix86_eax_live_at_start_p (void)
2604 {
2605   /* Cheat.  Don't bother working forward from ix86_function_regparm
2606      to the function type to whether an actual argument is located in
2607      eax.  Instead just look at cfg info, which is still close enough
2608      to correct at this point.  This gives false positives for broken
2609      functions that might use uninitialized data that happens to be
2610      allocated in eax, but who cares?  */
2611   return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2612 }
2613 
2614 /* Value is the number of bytes of arguments automatically
2615    popped when returning from a subroutine call.
2616    FUNDECL is the declaration node of the function (as a tree),
2617    FUNTYPE is the data type of the function (as a tree),
2618    or for a library call it is an identifier node for the subroutine name.
2619    SIZE is the number of bytes of arguments passed on the stack.
2620 
2621    On the 80386, the RTD insn may be used to pop them if the number
2622      of args is fixed, but if the number is variable then the caller
2623      must pop them all.  RTD can't be used for library calls now
2624      because the library is compiled with the Unix compiler.
2625    Use of RTD is a selectable option, since it is incompatible with
2626    standard Unix calling sequences.  If the option is not selected,
2627    the caller must always pop the args.
2628 
2629    The attribute stdcall is equivalent to RTD on a per module basis.  */
2630 
2631 int
2632 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2633 {
2634   int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2635 
2636   /* Cdecl functions override -mrtd, and never pop the stack.  */
2637   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2638 
2639     /* Stdcall and fastcall functions will pop the stack if not
2640        variable args.  */
2641     if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2642         || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2643       rtd = 1;
2644 
2645     if (rtd
2646         && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2647 	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2648 		== void_type_node)))
2649       return size;
2650   }
2651 
2652   /* Lose any fake structure return argument if it is passed on the stack.  */
2653   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2654       && !TARGET_64BIT
2655       && !KEEP_AGGREGATE_RETURN_POINTER)
2656     {
2657       int nregs = ix86_function_regparm (funtype, fundecl);
2658 
2659       if (!nregs)
2660 	return GET_MODE_SIZE (Pmode);
2661     }
2662 
2663   return 0;
2664 }
2665 
2666 /* Argument support functions.  */
2667 
2668 /* Return true when register may be used to pass function parameters.  */
2669 bool
2670 ix86_function_arg_regno_p (int regno)
2671 {
2672   int i;
2673   if (!TARGET_64BIT)
2674     {
2675       if (TARGET_MACHO)
2676         return (regno < REGPARM_MAX
2677                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2678       else
2679         return (regno < REGPARM_MAX
2680 	        || (TARGET_MMX && MMX_REGNO_P (regno)
2681 	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2682 	        || (TARGET_SSE && SSE_REGNO_P (regno)
2683 		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2684     }
2685 
2686   if (TARGET_MACHO)
2687     {
2688       if (SSE_REGNO_P (regno) && TARGET_SSE)
2689         return true;
2690     }
2691   else
2692     {
2693       if (TARGET_SSE && SSE_REGNO_P (regno)
2694           && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2695         return true;
2696     }
2697   /* RAX is used as hidden argument to va_arg functions.  */
2698   if (!regno)
2699     return true;
2700   for (i = 0; i < REGPARM_MAX; i++)
2701     if (regno == x86_64_int_parameter_registers[i])
2702       return true;
2703   return false;
2704 }
2705 
2706 /* Return if we do not know how to pass TYPE solely in registers.  */
2707 
2708 static bool
2709 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2710 {
2711   if (must_pass_in_stack_var_size_or_pad (mode, type))
2712     return true;
2713 
2714   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2715      The layout_type routine is crafty and tries to trick us into passing
2716      currently unsupported vector types on the stack by using TImode.  */
2717   return (!TARGET_64BIT && mode == TImode
2718 	  && type && TREE_CODE (type) != VECTOR_TYPE);
2719 }
2720 
2721 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2722    for a call to a function whose data type is FNTYPE.
2723    For a library call, FNTYPE is 0.  */
2724 
2725 void
2726 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2727 		      tree fntype,	/* tree ptr for function decl */
2728 		      rtx libname,	/* SYMBOL_REF of library name or 0 */
2729 		      tree fndecl)
2730 {
2731   static CUMULATIVE_ARGS zero_cum;
2732   tree param, next_param;
2733 
2734   if (TARGET_DEBUG_ARG)
2735     {
2736       fprintf (stderr, "\ninit_cumulative_args (");
2737       if (fntype)
2738 	fprintf (stderr, "fntype code = %s, ret code = %s",
2739 		 tree_code_name[(int) TREE_CODE (fntype)],
2740 		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2741       else
2742 	fprintf (stderr, "no fntype");
2743 
2744       if (libname)
2745 	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2746     }
2747 
2748   *cum = zero_cum;
2749 
2750   /* Set up the number of registers to use for passing arguments.  */
2751   cum->nregs = ix86_regparm;
2752   if (TARGET_SSE)
2753     cum->sse_nregs = SSE_REGPARM_MAX;
2754   if (TARGET_MMX)
2755     cum->mmx_nregs = MMX_REGPARM_MAX;
2756   cum->warn_sse = true;
2757   cum->warn_mmx = true;
2758   cum->maybe_vaarg = false;
2759 
2760   /* Use ecx and edx registers if function has fastcall attribute,
2761      else look for regparm information.  */
2762   if (fntype && !TARGET_64BIT)
2763     {
2764       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2765 	{
2766 	  cum->nregs = 2;
2767 	  cum->fastcall = 1;
2768 	}
2769       else
2770 	cum->nregs = ix86_function_regparm (fntype, fndecl);
2771     }
2772 
2773   /* Set up the number of SSE registers used for passing SFmode
2774      and DFmode arguments.  Warn for mismatching ABI.  */
2775   cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2776 
2777   /* Determine if this function has variable arguments.  This is
2778      indicated by the last argument being 'void_type_mode' if there
2779      are no variable arguments.  If there are variable arguments, then
2780      we won't pass anything in registers in 32-bit mode. */
2781 
2782   if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2783     {
2784       for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2785 	   param != 0; param = next_param)
2786 	{
2787 	  next_param = TREE_CHAIN (param);
2788 	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2789 	    {
2790 	      if (!TARGET_64BIT)
2791 		{
2792 		  cum->nregs = 0;
2793 		  cum->sse_nregs = 0;
2794 		  cum->mmx_nregs = 0;
2795 		  cum->warn_sse = 0;
2796 		  cum->warn_mmx = 0;
2797 		  cum->fastcall = 0;
2798 		  cum->float_in_sse = 0;
2799 		}
2800 	      cum->maybe_vaarg = true;
2801 	    }
2802 	}
2803     }
2804   if ((!fntype && !libname)
2805       || (fntype && !TYPE_ARG_TYPES (fntype)))
2806     cum->maybe_vaarg = true;
2807 
2808   if (TARGET_DEBUG_ARG)
2809     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2810 
2811   return;
2812 }
2813 
2814 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2815    But in the case of vector types, it is some vector mode.
2816 
2817    When we have only some of our vector isa extensions enabled, then there
2818    are some modes for which vector_mode_supported_p is false.  For these
2819    modes, the generic vector support in gcc will choose some non-vector mode
2820    in order to implement the type.  By computing the natural mode, we'll
2821    select the proper ABI location for the operand and not depend on whatever
2822    the middle-end decides to do with these vector types.  */
2823 
2824 static enum machine_mode
2825 type_natural_mode (tree type)
2826 {
2827   enum machine_mode mode = TYPE_MODE (type);
2828 
2829   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2830     {
2831       HOST_WIDE_INT size = int_size_in_bytes (type);
2832       if ((size == 8 || size == 16)
2833 	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2834 	  && TYPE_VECTOR_SUBPARTS (type) > 1)
2835 	{
2836 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2837 
2838 	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2839 	    mode = MIN_MODE_VECTOR_FLOAT;
2840 	  else
2841 	    mode = MIN_MODE_VECTOR_INT;
2842 
2843 	  /* Get the mode which has this inner mode and number of units.  */
2844 	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2845 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2846 		&& GET_MODE_INNER (mode) == innermode)
2847 	      return mode;
2848 
2849 	  gcc_unreachable ();
2850 	}
2851     }
2852 
2853   return mode;
2854 }
2855 
2856 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2857    this may not agree with the mode that the type system has chosen for the
2858    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2859    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2860 
2861 static rtx
2862 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2863 		     unsigned int regno)
2864 {
2865   rtx tmp;
2866 
2867   if (orig_mode != BLKmode)
2868     tmp = gen_rtx_REG (orig_mode, regno);
2869   else
2870     {
2871       tmp = gen_rtx_REG (mode, regno);
2872       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2873       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2874     }
2875 
2876   return tmp;
2877 }
2878 
2879 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
2880    of this code is to classify each 8bytes of incoming argument by the register
2881    class and assign registers accordingly.  */
2882 
2883 /* Return the union class of CLASS1 and CLASS2.
2884    See the x86-64 PS ABI for details.  */
2885 
2886 static enum x86_64_reg_class
2887 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2888 {
2889   /* Rule #1: If both classes are equal, this is the resulting class.  */
2890   if (class1 == class2)
2891     return class1;
2892 
2893   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2894      the other class.  */
2895   if (class1 == X86_64_NO_CLASS)
2896     return class2;
2897   if (class2 == X86_64_NO_CLASS)
2898     return class1;
2899 
2900   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2901   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2902     return X86_64_MEMORY_CLASS;
2903 
2904   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2905   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2906       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2907     return X86_64_INTEGERSI_CLASS;
2908   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2909       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2910     return X86_64_INTEGER_CLASS;
2911 
2912   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2913      MEMORY is used.  */
2914   if (class1 == X86_64_X87_CLASS
2915       || class1 == X86_64_X87UP_CLASS
2916       || class1 == X86_64_COMPLEX_X87_CLASS
2917       || class2 == X86_64_X87_CLASS
2918       || class2 == X86_64_X87UP_CLASS
2919       || class2 == X86_64_COMPLEX_X87_CLASS)
2920     return X86_64_MEMORY_CLASS;
2921 
2922   /* Rule #6: Otherwise class SSE is used.  */
2923   return X86_64_SSE_CLASS;
2924 }
2925 
2926 /* Classify the argument of type TYPE and mode MODE.
2927    CLASSES will be filled by the register class used to pass each word
2928    of the operand.  The number of words is returned.  In case the parameter
2929    should be passed in memory, 0 is returned. As a special case for zero
2930    sized containers, classes[0] will be NO_CLASS and 1 is returned.
2931 
2932    BIT_OFFSET is used internally for handling records and specifies offset
2933    of the offset in bits modulo 256 to avoid overflow cases.
2934 
2935    See the x86-64 PS ABI for details.
2936 */
2937 
2938 static int
2939 classify_argument (enum machine_mode mode, tree type,
2940 		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2941 {
2942   HOST_WIDE_INT bytes =
2943     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2944   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2945 
2946   /* Variable sized entities are always passed/returned in memory.  */
2947   if (bytes < 0)
2948     return 0;
2949 
2950   if (mode != VOIDmode
2951       && targetm.calls.must_pass_in_stack (mode, type))
2952     return 0;
2953 
2954   if (type && AGGREGATE_TYPE_P (type))
2955     {
2956       int i;
2957       tree field;
2958       enum x86_64_reg_class subclasses[MAX_CLASSES];
2959 
2960       /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2961       if (bytes > 16)
2962 	return 0;
2963 
2964       for (i = 0; i < words; i++)
2965 	classes[i] = X86_64_NO_CLASS;
2966 
2967       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2968 	 signalize memory class, so handle it as special case.  */
2969       if (!words)
2970 	{
2971 	  classes[0] = X86_64_NO_CLASS;
2972 	  return 1;
2973 	}
2974 
2975       /* Classify each field of record and merge classes.  */
2976       switch (TREE_CODE (type))
2977 	{
2978 	case RECORD_TYPE:
2979 	  /* For classes first merge in the field of the subclasses.  */
2980 	  if (TYPE_BINFO (type))
2981 	    {
2982 	      tree binfo, base_binfo;
2983 	      int basenum;
2984 
2985 	      for (binfo = TYPE_BINFO (type), basenum = 0;
2986 		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2987 		{
2988 		   int num;
2989 		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2990 		   tree type = BINFO_TYPE (base_binfo);
2991 
2992 		   num = classify_argument (TYPE_MODE (type),
2993 					    type, subclasses,
2994 					    (offset + bit_offset) % 256);
2995 		   if (!num)
2996 		     return 0;
2997 		   for (i = 0; i < num; i++)
2998 		     {
2999 		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
3000 		       classes[i + pos] =
3001 			 merge_classes (subclasses[i], classes[i + pos]);
3002 		     }
3003 		}
3004 	    }
3005 	  /* And now merge the fields of structure.  */
3006 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3007 	    {
3008 	      if (TREE_CODE (field) == FIELD_DECL)
3009 		{
3010 		  int num;
3011 
3012 		  if (TREE_TYPE (field) == error_mark_node)
3013 		    continue;
3014 
3015 		  /* Bitfields are always classified as integer.  Handle them
3016 		     early, since later code would consider them to be
3017 		     misaligned integers.  */
3018 		  if (DECL_BIT_FIELD (field))
3019 		    {
3020 		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3021 			   i < ((int_bit_position (field) + (bit_offset % 64))
3022 			        + tree_low_cst (DECL_SIZE (field), 0)
3023 				+ 63) / 8 / 8; i++)
3024 			classes[i] =
3025 			  merge_classes (X86_64_INTEGER_CLASS,
3026 					 classes[i]);
3027 		    }
3028 		  else
3029 		    {
3030 		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3031 					       TREE_TYPE (field), subclasses,
3032 					       (int_bit_position (field)
3033 						+ bit_offset) % 256);
3034 		      if (!num)
3035 			return 0;
3036 		      for (i = 0; i < num; i++)
3037 			{
3038 			  int pos =
3039 			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3040 			  classes[i + pos] =
3041 			    merge_classes (subclasses[i], classes[i + pos]);
3042 			}
3043 		    }
3044 		}
3045 	    }
3046 	  break;
3047 
3048 	case ARRAY_TYPE:
3049 	  /* Arrays are handled as small records.  */
3050 	  {
3051 	    int num;
3052 	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3053 				     TREE_TYPE (type), subclasses, bit_offset);
3054 	    if (!num)
3055 	      return 0;
3056 
3057 	    /* The partial classes are now full classes.  */
3058 	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3059 	      subclasses[0] = X86_64_SSE_CLASS;
3060 	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3061 	      subclasses[0] = X86_64_INTEGER_CLASS;
3062 
3063 	    for (i = 0; i < words; i++)
3064 	      classes[i] = subclasses[i % num];
3065 
3066 	    break;
3067 	  }
3068 	case UNION_TYPE:
3069 	case QUAL_UNION_TYPE:
3070 	  /* Unions are similar to RECORD_TYPE but offset is always 0.
3071 	     */
3072 
3073 	  /* Unions are not derived.  */
3074 	  gcc_assert (!TYPE_BINFO (type)
3075 		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3076 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3077 	    {
3078 	      if (TREE_CODE (field) == FIELD_DECL)
3079 		{
3080 		  int num;
3081 
3082 		  if (TREE_TYPE (field) == error_mark_node)
3083 		    continue;
3084 
3085 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3086 					   TREE_TYPE (field), subclasses,
3087 					   bit_offset);
3088 		  if (!num)
3089 		    return 0;
3090 		  for (i = 0; i < num; i++)
3091 		    classes[i] = merge_classes (subclasses[i], classes[i]);
3092 		}
3093 	    }
3094 	  break;
3095 
3096 	default:
3097 	  gcc_unreachable ();
3098 	}
3099 
3100       /* Final merger cleanup.  */
3101       for (i = 0; i < words; i++)
3102 	{
3103 	  /* If one class is MEMORY, everything should be passed in
3104 	     memory.  */
3105 	  if (classes[i] == X86_64_MEMORY_CLASS)
3106 	    return 0;
3107 
3108 	  /* The X86_64_SSEUP_CLASS should be always preceded by
3109 	     X86_64_SSE_CLASS.  */
3110 	  if (classes[i] == X86_64_SSEUP_CLASS
3111 	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3112 	    classes[i] = X86_64_SSE_CLASS;
3113 
3114 	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3115 	  if (classes[i] == X86_64_X87UP_CLASS
3116 	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3117 	    classes[i] = X86_64_SSE_CLASS;
3118 	}
3119       return words;
3120     }
3121 
3122   /* Compute alignment needed.  We align all types to natural boundaries with
3123      exception of XFmode that is aligned to 64bits.  */
3124   if (mode != VOIDmode && mode != BLKmode)
3125     {
3126       int mode_alignment = GET_MODE_BITSIZE (mode);
3127 
3128       if (mode == XFmode)
3129 	mode_alignment = 128;
3130       else if (mode == XCmode)
3131 	mode_alignment = 256;
3132       if (COMPLEX_MODE_P (mode))
3133 	mode_alignment /= 2;
3134       /* Misaligned fields are always returned in memory.  */
3135       if (bit_offset % mode_alignment)
3136 	return 0;
3137     }
3138 
3139   /* for V1xx modes, just use the base mode */
3140   if (VECTOR_MODE_P (mode)
3141       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3142     mode = GET_MODE_INNER (mode);
3143 
3144   /* Classification of atomic types.  */
3145   switch (mode)
3146     {
3147     case SDmode:
3148     case DDmode:
3149       classes[0] = X86_64_SSE_CLASS;
3150       return 1;
3151     case TDmode:
3152       classes[0] = X86_64_SSE_CLASS;
3153       classes[1] = X86_64_SSEUP_CLASS;
3154       return 2;
3155     case DImode:
3156     case SImode:
3157     case HImode:
3158     case QImode:
3159     case CSImode:
3160     case CHImode:
3161     case CQImode:
3162       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3163 	classes[0] = X86_64_INTEGERSI_CLASS;
3164       else
3165 	classes[0] = X86_64_INTEGER_CLASS;
3166       return 1;
3167     case CDImode:
3168     case TImode:
3169       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3170       return 2;
3171     case CTImode:
3172       return 0;
3173     case SFmode:
3174       if (!(bit_offset % 64))
3175 	classes[0] = X86_64_SSESF_CLASS;
3176       else
3177 	classes[0] = X86_64_SSE_CLASS;
3178       return 1;
3179     case DFmode:
3180       classes[0] = X86_64_SSEDF_CLASS;
3181       return 1;
3182     case XFmode:
3183       classes[0] = X86_64_X87_CLASS;
3184       classes[1] = X86_64_X87UP_CLASS;
3185       return 2;
3186     case TFmode:
3187       classes[0] = X86_64_SSE_CLASS;
3188       classes[1] = X86_64_SSEUP_CLASS;
3189       return 2;
3190     case SCmode:
3191       classes[0] = X86_64_SSE_CLASS;
3192       return 1;
3193     case DCmode:
3194       classes[0] = X86_64_SSEDF_CLASS;
3195       classes[1] = X86_64_SSEDF_CLASS;
3196       return 2;
3197     case XCmode:
3198       classes[0] = X86_64_COMPLEX_X87_CLASS;
3199       return 1;
3200     case TCmode:
3201       /* This modes is larger than 16 bytes.  */
3202       return 0;
3203     case V4SFmode:
3204     case V4SImode:
3205     case V16QImode:
3206     case V8HImode:
3207     case V2DFmode:
3208     case V2DImode:
3209       classes[0] = X86_64_SSE_CLASS;
3210       classes[1] = X86_64_SSEUP_CLASS;
3211       return 2;
3212     case V2SFmode:
3213     case V2SImode:
3214     case V4HImode:
3215     case V8QImode:
3216       classes[0] = X86_64_SSE_CLASS;
3217       return 1;
3218     case BLKmode:
3219     case VOIDmode:
3220       return 0;
3221     default:
3222       gcc_assert (VECTOR_MODE_P (mode));
3223 
3224       if (bytes > 16)
3225 	return 0;
3226 
3227       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3228 
3229       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3230 	classes[0] = X86_64_INTEGERSI_CLASS;
3231       else
3232 	classes[0] = X86_64_INTEGER_CLASS;
3233       classes[1] = X86_64_INTEGER_CLASS;
3234       return 1 + (bytes > 8);
3235     }
3236 }
3237 
3238 /* Examine the argument and return set number of register required in each
3239    class.  Return 0 iff parameter should be passed in memory.  */
3240 static int
3241 examine_argument (enum machine_mode mode, tree type, int in_return,
3242 		  int *int_nregs, int *sse_nregs)
3243 {
3244   enum x86_64_reg_class class[MAX_CLASSES];
3245   int n = classify_argument (mode, type, class, 0);
3246 
3247   *int_nregs = 0;
3248   *sse_nregs = 0;
3249   if (!n)
3250     return 0;
3251   for (n--; n >= 0; n--)
3252     switch (class[n])
3253       {
3254       case X86_64_INTEGER_CLASS:
3255       case X86_64_INTEGERSI_CLASS:
3256 	(*int_nregs)++;
3257 	break;
3258       case X86_64_SSE_CLASS:
3259       case X86_64_SSESF_CLASS:
3260       case X86_64_SSEDF_CLASS:
3261 	(*sse_nregs)++;
3262 	break;
3263       case X86_64_NO_CLASS:
3264       case X86_64_SSEUP_CLASS:
3265 	break;
3266       case X86_64_X87_CLASS:
3267       case X86_64_X87UP_CLASS:
3268 	if (!in_return)
3269 	  return 0;
3270 	break;
3271       case X86_64_COMPLEX_X87_CLASS:
3272 	return in_return ? 2 : 0;
3273       case X86_64_MEMORY_CLASS:
3274 	gcc_unreachable ();
3275       }
3276   return 1;
3277 }
3278 
3279 /* Construct container for the argument used by GCC interface.  See
3280    FUNCTION_ARG for the detailed description.  */
3281 
3282 static rtx
3283 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3284 		     tree type, int in_return, int nintregs, int nsseregs,
3285 		     const int *intreg, int sse_regno)
3286 {
3287   /* The following variables hold the static issued_error state.  */
3288   static bool issued_sse_arg_error;
3289   static bool issued_sse_ret_error;
3290   static bool issued_x87_ret_error;
3291 
3292   enum machine_mode tmpmode;
3293   int bytes =
3294     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3295   enum x86_64_reg_class class[MAX_CLASSES];
3296   int n;
3297   int i;
3298   int nexps = 0;
3299   int needed_sseregs, needed_intregs;
3300   rtx exp[MAX_CLASSES];
3301   rtx ret;
3302 
3303   n = classify_argument (mode, type, class, 0);
3304   if (TARGET_DEBUG_ARG)
3305     {
3306       if (!n)
3307 	fprintf (stderr, "Memory class\n");
3308       else
3309 	{
3310 	  fprintf (stderr, "Classes:");
3311 	  for (i = 0; i < n; i++)
3312 	    {
3313 	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3314 	    }
3315 	   fprintf (stderr, "\n");
3316 	}
3317     }
3318   if (!n)
3319     return NULL;
3320   if (!examine_argument (mode, type, in_return, &needed_intregs,
3321 			 &needed_sseregs))
3322     return NULL;
3323   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3324     return NULL;
3325 
3326   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3327      some less clueful developer tries to use floating-point anyway.  */
3328   if (needed_sseregs && !TARGET_SSE)
3329     {
3330       if (in_return)
3331 	{
3332 	  if (!issued_sse_ret_error)
3333 	    {
3334 	      error ("SSE register return with SSE disabled");
3335 	      issued_sse_ret_error = true;
3336 	    }
3337 	}
3338       else if (!issued_sse_arg_error)
3339 	{
3340 	  error ("SSE register argument with SSE disabled");
3341 	  issued_sse_arg_error = true;
3342 	}
3343       return NULL;
3344     }
3345 
3346   /* Likewise, error if the ABI requires us to return values in the
3347      x87 registers and the user specified -mno-80387.  */
3348   if (!TARGET_80387 && in_return)
3349     for (i = 0; i < n; i++)
3350       if (class[i] == X86_64_X87_CLASS
3351 	  || class[i] == X86_64_X87UP_CLASS
3352 	  || class[i] == X86_64_COMPLEX_X87_CLASS)
3353 	{
3354 	  if (!issued_x87_ret_error)
3355 	    {
3356 	      error ("x87 register return with x87 disabled");
3357 	      issued_x87_ret_error = true;
3358 	    }
3359 	  return NULL;
3360 	}
3361 
3362   /* First construct simple cases.  Avoid SCmode, since we want to use
3363      single register to pass this type.  */
3364   if (n == 1 && mode != SCmode)
3365     switch (class[0])
3366       {
3367       case X86_64_INTEGER_CLASS:
3368       case X86_64_INTEGERSI_CLASS:
3369 	return gen_rtx_REG (mode, intreg[0]);
3370       case X86_64_SSE_CLASS:
3371       case X86_64_SSESF_CLASS:
3372       case X86_64_SSEDF_CLASS:
3373 	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3374       case X86_64_X87_CLASS:
3375       case X86_64_COMPLEX_X87_CLASS:
3376 	return gen_rtx_REG (mode, FIRST_STACK_REG);
3377       case X86_64_NO_CLASS:
3378 	/* Zero sized array, struct or class.  */
3379 	return NULL;
3380       default:
3381 	gcc_unreachable ();
3382       }
3383   if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3384       && mode != BLKmode)
3385     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3386   if (n == 2
3387       && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3388     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3389   if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3390       && class[1] == X86_64_INTEGER_CLASS
3391       && (mode == CDImode || mode == TImode || mode == TFmode)
3392       && intreg[0] + 1 == intreg[1])
3393     return gen_rtx_REG (mode, intreg[0]);
3394 
3395   /* Otherwise figure out the entries of the PARALLEL.  */
3396   for (i = 0; i < n; i++)
3397     {
3398       switch (class[i])
3399         {
3400 	  case X86_64_NO_CLASS:
3401 	    break;
3402 	  case X86_64_INTEGER_CLASS:
3403 	  case X86_64_INTEGERSI_CLASS:
3404 	    /* Merge TImodes on aligned occasions here too.  */
3405 	    if (i * 8 + 8 > bytes)
3406 	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3407 	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3408 	      tmpmode = SImode;
3409 	    else
3410 	      tmpmode = DImode;
3411 	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3412 	    if (tmpmode == BLKmode)
3413 	      tmpmode = DImode;
3414 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3415 					       gen_rtx_REG (tmpmode, *intreg),
3416 					       GEN_INT (i*8));
3417 	    intreg++;
3418 	    break;
3419 	  case X86_64_SSESF_CLASS:
3420 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3421 					       gen_rtx_REG (SFmode,
3422 							    SSE_REGNO (sse_regno)),
3423 					       GEN_INT (i*8));
3424 	    sse_regno++;
3425 	    break;
3426 	  case X86_64_SSEDF_CLASS:
3427 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3428 					       gen_rtx_REG (DFmode,
3429 							    SSE_REGNO (sse_regno)),
3430 					       GEN_INT (i*8));
3431 	    sse_regno++;
3432 	    break;
3433 	  case X86_64_SSE_CLASS:
3434 	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3435 	      tmpmode = TImode;
3436 	    else
3437 	      tmpmode = DImode;
3438 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3439 					       gen_rtx_REG (tmpmode,
3440 							    SSE_REGNO (sse_regno)),
3441 					       GEN_INT (i*8));
3442 	    if (tmpmode == TImode)
3443 	      i++;
3444 	    sse_regno++;
3445 	    break;
3446 	  default:
3447 	    gcc_unreachable ();
3448 	}
3449     }
3450 
3451   /* Empty aligned struct, union or class.  */
3452   if (nexps == 0)
3453     return NULL;
3454 
3455   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3456   for (i = 0; i < nexps; i++)
3457     XVECEXP (ret, 0, i) = exp [i];
3458   return ret;
3459 }
3460 
3461 /* Update the data in CUM to advance over an argument
3462    of mode MODE and data type TYPE.
3463    (TYPE is null for libcalls where that information may not be available.)  */
3464 
3465 void
3466 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3467 		      tree type, int named)
3468 {
3469   int bytes =
3470     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3471   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3472 
3473   if (type)
3474     mode = type_natural_mode (type);
3475 
3476   if (TARGET_DEBUG_ARG)
3477     fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3478 	     "mode=%s, named=%d)\n\n",
3479 	     words, cum->words, cum->nregs, cum->sse_nregs,
3480 	     GET_MODE_NAME (mode), named);
3481 
3482   if (TARGET_64BIT)
3483     {
3484       int int_nregs, sse_nregs;
3485       if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3486 	cum->words += words;
3487       else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3488 	{
3489 	  cum->nregs -= int_nregs;
3490 	  cum->sse_nregs -= sse_nregs;
3491 	  cum->regno += int_nregs;
3492 	  cum->sse_regno += sse_nregs;
3493 	}
3494       else
3495 	cum->words += words;
3496     }
3497   else
3498     {
3499       switch (mode)
3500 	{
3501 	default:
3502 	  break;
3503 
3504 	case BLKmode:
3505 	  if (bytes < 0)
3506 	    break;
3507 	  /* FALLTHRU */
3508 
3509 	case DImode:
3510 	case SImode:
3511 	case HImode:
3512 	case QImode:
3513 	  cum->words += words;
3514 	  cum->nregs -= words;
3515 	  cum->regno += words;
3516 
3517 	  if (cum->nregs <= 0)
3518 	    {
3519 	      cum->nregs = 0;
3520 	      cum->regno = 0;
3521 	    }
3522 	  break;
3523 
3524 	case DFmode:
3525 	  if (cum->float_in_sse < 2)
3526 	    break;
3527 	case SFmode:
3528 	  if (cum->float_in_sse < 1)
3529 	    break;
3530 	  /* FALLTHRU */
3531 
3532 	case TImode:
3533 	case V16QImode:
3534 	case V8HImode:
3535 	case V4SImode:
3536 	case V2DImode:
3537 	case V4SFmode:
3538 	case V2DFmode:
3539 	  if (!type || !AGGREGATE_TYPE_P (type))
3540 	    {
3541 	      cum->sse_words += words;
3542 	      cum->sse_nregs -= 1;
3543 	      cum->sse_regno += 1;
3544 	      if (cum->sse_nregs <= 0)
3545 		{
3546 		  cum->sse_nregs = 0;
3547 		  cum->sse_regno = 0;
3548 		}
3549 	    }
3550 	  break;
3551 
3552 	case V8QImode:
3553 	case V4HImode:
3554 	case V2SImode:
3555 	case V2SFmode:
3556 	  if (!type || !AGGREGATE_TYPE_P (type))
3557 	    {
3558 	      cum->mmx_words += words;
3559 	      cum->mmx_nregs -= 1;
3560 	      cum->mmx_regno += 1;
3561 	      if (cum->mmx_nregs <= 0)
3562 		{
3563 		  cum->mmx_nregs = 0;
3564 		  cum->mmx_regno = 0;
3565 		}
3566 	    }
3567 	  break;
3568 	}
3569     }
3570 }
3571 
3572 /* Define where to put the arguments to a function.
3573    Value is zero to push the argument on the stack,
3574    or a hard register in which to store the argument.
3575 
3576    MODE is the argument's machine mode.
3577    TYPE is the data type of the argument (as a tree).
3578     This is null for libcalls where that information may
3579     not be available.
3580    CUM is a variable of type CUMULATIVE_ARGS which gives info about
3581     the preceding args and about the function being called.
3582    NAMED is nonzero if this argument is a named parameter
3583     (otherwise it is an extra parameter matching an ellipsis).  */
3584 
3585 rtx
3586 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3587 	      tree type, int named)
3588 {
3589   enum machine_mode mode = orig_mode;
3590   rtx ret = NULL_RTX;
3591   int bytes =
3592     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3593   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3594   static bool warnedsse, warnedmmx;
3595 
3596   /* To simplify the code below, represent vector types with a vector mode
3597      even if MMX/SSE are not active.  */
3598   if (type && TREE_CODE (type) == VECTOR_TYPE)
3599     mode = type_natural_mode (type);
3600 
3601   /* Handle a hidden AL argument containing number of registers for varargs
3602      x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3603      any AL settings.  */
3604   if (mode == VOIDmode)
3605     {
3606       if (TARGET_64BIT)
3607 	return GEN_INT (cum->maybe_vaarg
3608 			? (cum->sse_nregs < 0
3609 			   ? SSE_REGPARM_MAX
3610 			   : cum->sse_regno)
3611 			: -1);
3612       else
3613 	return constm1_rtx;
3614     }
3615   if (TARGET_64BIT)
3616     ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3617 			       cum->sse_nregs,
3618 			       &x86_64_int_parameter_registers [cum->regno],
3619 			       cum->sse_regno);
3620   else
3621     switch (mode)
3622       {
3623 	/* For now, pass fp/complex values on the stack.  */
3624       default:
3625 	break;
3626 
3627       case BLKmode:
3628 	if (bytes < 0)
3629 	  break;
3630 	/* FALLTHRU */
3631       case DImode:
3632       case SImode:
3633       case HImode:
3634       case QImode:
3635 	if (words <= cum->nregs)
3636 	  {
3637 	    int regno = cum->regno;
3638 
3639 	    /* Fastcall allocates the first two DWORD (SImode) or
3640 	       smaller arguments to ECX and EDX.  */
3641 	    if (cum->fastcall)
3642 	      {
3643 	        if (mode == BLKmode || mode == DImode)
3644 	          break;
3645 
3646 	        /* ECX not EAX is the first allocated register.  */
3647 	        if (regno == 0)
3648 		  regno = 2;
3649 	      }
3650 	    ret = gen_rtx_REG (mode, regno);
3651 	  }
3652 	break;
3653       case DFmode:
3654 	if (cum->float_in_sse < 2)
3655 	  break;
3656       case SFmode:
3657 	if (cum->float_in_sse < 1)
3658 	  break;
3659 	/* FALLTHRU */
3660       case TImode:
3661       case V16QImode:
3662       case V8HImode:
3663       case V4SImode:
3664       case V2DImode:
3665       case V4SFmode:
3666       case V2DFmode:
3667 	if (!type || !AGGREGATE_TYPE_P (type))
3668 	  {
3669 	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3670 	      {
3671 		warnedsse = true;
3672 		warning (0, "SSE vector argument without SSE enabled "
3673 			 "changes the ABI");
3674 	      }
3675 	    if (cum->sse_nregs)
3676 	      ret = gen_reg_or_parallel (mode, orig_mode,
3677 					 cum->sse_regno + FIRST_SSE_REG);
3678 	  }
3679 	break;
3680       case V8QImode:
3681       case V4HImode:
3682       case V2SImode:
3683       case V2SFmode:
3684 	if (!type || !AGGREGATE_TYPE_P (type))
3685 	  {
3686 	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3687 	      {
3688 		warnedmmx = true;
3689 		warning (0, "MMX vector argument without MMX enabled "
3690 			 "changes the ABI");
3691 	      }
3692 	    if (cum->mmx_nregs)
3693 	      ret = gen_reg_or_parallel (mode, orig_mode,
3694 					 cum->mmx_regno + FIRST_MMX_REG);
3695 	  }
3696 	break;
3697       }
3698 
3699   if (TARGET_DEBUG_ARG)
3700     {
3701       fprintf (stderr,
3702 	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3703 	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3704 
3705       if (ret)
3706 	print_simple_rtl (stderr, ret);
3707       else
3708 	fprintf (stderr, ", stack");
3709 
3710       fprintf (stderr, " )\n");
3711     }
3712 
3713   return ret;
3714 }
3715 
3716 /* A C expression that indicates when an argument must be passed by
3717    reference.  If nonzero for an argument, a copy of that argument is
3718    made in memory and a pointer to the argument is passed instead of
3719    the argument itself.  The pointer is passed in whatever way is
3720    appropriate for passing a pointer to that type.  */
3721 
3722 static bool
3723 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3724 			enum machine_mode mode ATTRIBUTE_UNUSED,
3725 			tree type, bool named ATTRIBUTE_UNUSED)
3726 {
3727   if (!TARGET_64BIT)
3728     return 0;
3729 
3730   if (type && int_size_in_bytes (type) == -1)
3731     {
3732       if (TARGET_DEBUG_ARG)
3733 	fprintf (stderr, "function_arg_pass_by_reference\n");
3734       return 1;
3735     }
3736 
3737   return 0;
3738 }
3739 
3740 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3741    ABI.  Only called if TARGET_SSE.  */
3742 static bool
3743 contains_128bit_aligned_vector_p (tree type)
3744 {
3745   enum machine_mode mode = TYPE_MODE (type);
3746   if (SSE_REG_MODE_P (mode)
3747       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3748     return true;
3749   if (TYPE_ALIGN (type) < 128)
3750     return false;
3751 
3752   if (AGGREGATE_TYPE_P (type))
3753     {
3754       /* Walk the aggregates recursively.  */
3755       switch (TREE_CODE (type))
3756 	{
3757 	case RECORD_TYPE:
3758 	case UNION_TYPE:
3759 	case QUAL_UNION_TYPE:
3760 	  {
3761 	    tree field;
3762 
3763 	    if (TYPE_BINFO (type))
3764 	      {
3765 		tree binfo, base_binfo;
3766 		int i;
3767 
3768 		for (binfo = TYPE_BINFO (type), i = 0;
3769 		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3770 		  if (contains_128bit_aligned_vector_p
3771 		      (BINFO_TYPE (base_binfo)))
3772 		    return true;
3773 	      }
3774 	    /* And now merge the fields of structure.  */
3775 	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3776 	      {
3777 		if (TREE_CODE (field) == FIELD_DECL
3778 		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3779 		  return true;
3780 	      }
3781 	    break;
3782 	  }
3783 
3784 	case ARRAY_TYPE:
3785 	  /* Just for use if some languages passes arrays by value.  */
3786 	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3787 	    return true;
3788 	  break;
3789 
3790 	default:
3791 	  gcc_unreachable ();
3792 	}
3793     }
3794   return false;
3795 }
3796 
3797 /* Gives the alignment boundary, in bits, of an argument with the
3798    specified mode and type.  */
3799 
3800 int
3801 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3802 {
3803   int align;
3804   if (type)
3805     align = TYPE_ALIGN (type);
3806   else
3807     align = GET_MODE_ALIGNMENT (mode);
3808   if (align < PARM_BOUNDARY)
3809     align = PARM_BOUNDARY;
3810   if (!TARGET_64BIT)
3811     {
3812       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3813 	 make an exception for SSE modes since these require 128bit
3814 	 alignment.
3815 
3816 	 The handling here differs from field_alignment.  ICC aligns MMX
3817 	 arguments to 4 byte boundaries, while structure fields are aligned
3818 	 to 8 byte boundaries.  */
3819       if (!TARGET_SSE)
3820 	align = PARM_BOUNDARY;
3821       else if (!type)
3822 	{
3823 	  if (!SSE_REG_MODE_P (mode))
3824 	    align = PARM_BOUNDARY;
3825 	}
3826       else
3827 	{
3828 	  if (!contains_128bit_aligned_vector_p (type))
3829 	    align = PARM_BOUNDARY;
3830 	}
3831     }
3832   if (align > 128)
3833     align = 128;
3834   return align;
3835 }
3836 
3837 /* Return true if N is a possible register number of function value.  */
3838 bool
3839 ix86_function_value_regno_p (int regno)
3840 {
3841   if (TARGET_MACHO)
3842     {
3843       if (!TARGET_64BIT)
3844         {
3845           return ((regno) == 0
3846                   || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3847                   || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3848         }
3849       return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3850               || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3851               || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3852       }
3853   else
3854     {
3855       if (regno == 0
3856           || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3857           || (regno == FIRST_SSE_REG && TARGET_SSE))
3858         return true;
3859 
3860       if (!TARGET_64BIT
3861           && (regno == FIRST_MMX_REG && TARGET_MMX))
3862 	    return true;
3863 
3864       return false;
3865     }
3866 }
3867 
3868 /* Define how to find the value returned by a function.
3869    VALTYPE is the data type of the value (as a tree).
3870    If the precise function being called is known, FUNC is its FUNCTION_DECL;
3871    otherwise, FUNC is 0.  */
3872 rtx
3873 ix86_function_value (tree valtype, tree fntype_or_decl,
3874 		     bool outgoing ATTRIBUTE_UNUSED)
3875 {
3876   enum machine_mode natmode = type_natural_mode (valtype);
3877 
3878   if (TARGET_64BIT)
3879     {
3880       rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3881 				     1, REGPARM_MAX, SSE_REGPARM_MAX,
3882 				     x86_64_int_return_registers, 0);
3883       /* For zero sized structures, construct_container return NULL, but we
3884 	 need to keep rest of compiler happy by returning meaningful value.  */
3885       if (!ret)
3886 	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3887       return ret;
3888     }
3889   else
3890     {
3891       tree fn = NULL_TREE, fntype;
3892       if (fntype_or_decl
3893 	  && DECL_P (fntype_or_decl))
3894         fn = fntype_or_decl;
3895       fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3896       return gen_rtx_REG (TYPE_MODE (valtype),
3897 			  ix86_value_regno (natmode, fn, fntype));
3898     }
3899 }
3900 
3901 /* Return true iff type is returned in memory.  */
3902 int
3903 ix86_return_in_memory (tree type)
3904 {
3905   int needed_intregs, needed_sseregs, size;
3906   enum machine_mode mode = type_natural_mode (type);
3907 
3908   if (TARGET_64BIT)
3909     return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3910 
3911   if (mode == BLKmode)
3912     return 1;
3913 
3914   size = int_size_in_bytes (type);
3915 
3916   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3917     return 0;
3918 
3919   if (VECTOR_MODE_P (mode) || mode == TImode)
3920     {
3921       /* User-created vectors small enough to fit in EAX.  */
3922       if (size < 8)
3923 	return 0;
3924 
3925       /* MMX/3dNow values are returned in MM0,
3926 	 except when it doesn't exits.  */
3927       if (size == 8)
3928 	return (TARGET_MMX ? 0 : 1);
3929 
3930       /* SSE values are returned in XMM0, except when it doesn't exist.  */
3931       if (size == 16)
3932 	return (TARGET_SSE ? 0 : 1);
3933     }
3934 
3935   if (mode == XFmode)
3936     return 0;
3937 
3938   if (mode == TDmode)
3939     return 1;
3940 
3941   if (size > 12)
3942     return 1;
3943   return 0;
3944 }
3945 
3946 /* When returning SSE vector types, we have a choice of either
3947      (1) being abi incompatible with a -march switch, or
3948      (2) generating an error.
3949    Given no good solution, I think the safest thing is one warning.
3950    The user won't be able to use -Werror, but....
3951 
3952    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3953    called in response to actually generating a caller or callee that
3954    uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
3955    via aggregate_value_p for general type probing from tree-ssa.  */
3956 
3957 static rtx
3958 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3959 {
3960   static bool warnedsse, warnedmmx;
3961 
3962   if (type)
3963     {
3964       /* Look at the return type of the function, not the function type.  */
3965       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3966 
3967       if (!TARGET_SSE && !warnedsse)
3968 	{
3969 	  if (mode == TImode
3970 	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3971 	    {
3972 	      warnedsse = true;
3973 	      warning (0, "SSE vector return without SSE enabled "
3974 		       "changes the ABI");
3975 	    }
3976 	}
3977 
3978       if (!TARGET_MMX && !warnedmmx)
3979 	{
3980 	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3981 	    {
3982 	      warnedmmx = true;
3983 	      warning (0, "MMX vector return without MMX enabled "
3984 		       "changes the ABI");
3985 	    }
3986 	}
3987     }
3988 
3989   return NULL;
3990 }
3991 
3992 /* Define how to find the value returned by a library function
3993    assuming the value has mode MODE.  */
3994 rtx
3995 ix86_libcall_value (enum machine_mode mode)
3996 {
3997   if (TARGET_64BIT)
3998     {
3999       switch (mode)
4000 	{
4001 	case SFmode:
4002 	case SCmode:
4003 	case DFmode:
4004 	case DCmode:
4005 	case TFmode:
4006 	case SDmode:
4007 	case DDmode:
4008 	case TDmode:
4009 	  return gen_rtx_REG (mode, FIRST_SSE_REG);
4010 	case XFmode:
4011 	case XCmode:
4012 	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4013 	case TCmode:
4014 	  return NULL;
4015 	default:
4016 	  return gen_rtx_REG (mode, 0);
4017 	}
4018     }
4019   else
4020     return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4021 }
4022 
4023 /* Given a mode, return the register to use for a return value.  */
4024 
4025 static int
4026 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4027 {
4028   gcc_assert (!TARGET_64BIT);
4029 
4030   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4031      we normally prevent this case when mmx is not available.  However
4032      some ABIs may require the result to be returned like DImode.  */
4033   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4034     return TARGET_MMX ? FIRST_MMX_REG : 0;
4035 
4036   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4037      we prevent this case when sse is not available.  However some ABIs
4038      may require the result to be returned like integer TImode.  */
4039   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4040     return TARGET_SSE ? FIRST_SSE_REG : 0;
4041 
4042   /* Decimal floating point values can go in %eax, unlike other float modes.  */
4043   if (DECIMAL_FLOAT_MODE_P (mode))
4044     return 0;
4045 
4046   /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4047   if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4048     return 0;
4049 
4050   /* Floating point return values in %st(0), except for local functions when
4051      SSE math is enabled or for functions with sseregparm attribute.  */
4052   if ((func || fntype)
4053       && (mode == SFmode || mode == DFmode))
4054     {
4055       int sse_level = ix86_function_sseregparm (fntype, func);
4056       if ((sse_level >= 1 && mode == SFmode)
4057 	  || (sse_level == 2 && mode == DFmode))
4058         return FIRST_SSE_REG;
4059     }
4060 
4061   return FIRST_FLOAT_REG;
4062 }
4063 
4064 /* Create the va_list data type.  */
4065 
4066 static tree
4067 ix86_build_builtin_va_list (void)
4068 {
4069   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4070 
4071   /* For i386 we use plain pointer to argument area.  */
4072   if (!TARGET_64BIT)
4073     return build_pointer_type (char_type_node);
4074 
4075   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4076   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4077 
4078   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4079 		      unsigned_type_node);
4080   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4081 		      unsigned_type_node);
4082   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4083 		      ptr_type_node);
4084   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4085 		      ptr_type_node);
4086 
4087   va_list_gpr_counter_field = f_gpr;
4088   va_list_fpr_counter_field = f_fpr;
4089 
4090   DECL_FIELD_CONTEXT (f_gpr) = record;
4091   DECL_FIELD_CONTEXT (f_fpr) = record;
4092   DECL_FIELD_CONTEXT (f_ovf) = record;
4093   DECL_FIELD_CONTEXT (f_sav) = record;
4094 
4095   TREE_CHAIN (record) = type_decl;
4096   TYPE_NAME (record) = type_decl;
4097   TYPE_FIELDS (record) = f_gpr;
4098   TREE_CHAIN (f_gpr) = f_fpr;
4099   TREE_CHAIN (f_fpr) = f_ovf;
4100   TREE_CHAIN (f_ovf) = f_sav;
4101 
4102   layout_type (record);
4103 
4104   /* The correct type is an array type of one element.  */
4105   return build_array_type (record, build_index_type (size_zero_node));
4106 }
4107 
4108 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4109 
4110 static void
4111 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4112 			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
4113 			     int no_rtl)
4114 {
4115   CUMULATIVE_ARGS next_cum;
4116   rtx save_area = NULL_RTX, mem;
4117   rtx label;
4118   rtx label_ref;
4119   rtx tmp_reg;
4120   rtx nsse_reg;
4121   int set;
4122   tree fntype;
4123   int stdarg_p;
4124   int i;
4125 
4126   if (!TARGET_64BIT)
4127     return;
4128 
4129   if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4130     return;
4131 
4132   /* Indicate to allocate space on the stack for varargs save area.  */
4133   ix86_save_varrargs_registers = 1;
4134 
4135   cfun->stack_alignment_needed = 128;
4136 
4137   fntype = TREE_TYPE (current_function_decl);
4138   stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4139 	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4140 		  != void_type_node));
4141 
4142   /* For varargs, we do not want to skip the dummy va_dcl argument.
4143      For stdargs, we do want to skip the last named argument.  */
4144   next_cum = *cum;
4145   if (stdarg_p)
4146     function_arg_advance (&next_cum, mode, type, 1);
4147 
4148   if (!no_rtl)
4149     save_area = frame_pointer_rtx;
4150 
4151   set = get_varargs_alias_set ();
4152 
4153   for (i = next_cum.regno;
4154        i < ix86_regparm
4155        && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4156        i++)
4157     {
4158       mem = gen_rtx_MEM (Pmode,
4159 			 plus_constant (save_area, i * UNITS_PER_WORD));
4160       MEM_NOTRAP_P (mem) = 1;
4161       set_mem_alias_set (mem, set);
4162       emit_move_insn (mem, gen_rtx_REG (Pmode,
4163 					x86_64_int_parameter_registers[i]));
4164     }
4165 
4166   if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4167     {
4168       /* Now emit code to save SSE registers.  The AX parameter contains number
4169 	 of SSE parameter registers used to call this function.  We use
4170 	 sse_prologue_save insn template that produces computed jump across
4171 	 SSE saves.  We need some preparation work to get this working.  */
4172 
4173       label = gen_label_rtx ();
4174       label_ref = gen_rtx_LABEL_REF (Pmode, label);
4175 
4176       /* Compute address to jump to :
4177          label - 5*eax + nnamed_sse_arguments*5  */
4178       tmp_reg = gen_reg_rtx (Pmode);
4179       nsse_reg = gen_reg_rtx (Pmode);
4180       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4181       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4182 			      gen_rtx_MULT (Pmode, nsse_reg,
4183 					    GEN_INT (4))));
4184       if (next_cum.sse_regno)
4185 	emit_move_insn
4186 	  (nsse_reg,
4187 	   gen_rtx_CONST (DImode,
4188 			  gen_rtx_PLUS (DImode,
4189 					label_ref,
4190 					GEN_INT (next_cum.sse_regno * 4))));
4191       else
4192 	emit_move_insn (nsse_reg, label_ref);
4193       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4194 
4195       /* Compute address of memory block we save into.  We always use pointer
4196 	 pointing 127 bytes after first byte to store - this is needed to keep
4197 	 instruction size limited by 4 bytes.  */
4198       tmp_reg = gen_reg_rtx (Pmode);
4199       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4200 			      plus_constant (save_area,
4201 					     8 * REGPARM_MAX + 127)));
4202       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4203       MEM_NOTRAP_P (mem) = 1;
4204       set_mem_alias_set (mem, set);
4205       set_mem_align (mem, BITS_PER_WORD);
4206 
4207       /* And finally do the dirty job!  */
4208       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4209 					GEN_INT (next_cum.sse_regno), label));
4210     }
4211 
4212 }
4213 
4214 /* Implement va_start.  */
4215 
4216 void
4217 ix86_va_start (tree valist, rtx nextarg)
4218 {
4219   HOST_WIDE_INT words, n_gpr, n_fpr;
4220   tree f_gpr, f_fpr, f_ovf, f_sav;
4221   tree gpr, fpr, ovf, sav, t;
4222   tree type;
4223 
4224   /* Only 64bit target needs something special.  */
4225   if (!TARGET_64BIT)
4226     {
4227       std_expand_builtin_va_start (valist, nextarg);
4228       return;
4229     }
4230 
4231   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4232   f_fpr = TREE_CHAIN (f_gpr);
4233   f_ovf = TREE_CHAIN (f_fpr);
4234   f_sav = TREE_CHAIN (f_ovf);
4235 
4236   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4237   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4238   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4239   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4240   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4241 
4242   /* Count number of gp and fp argument registers used.  */
4243   words = current_function_args_info.words;
4244   n_gpr = current_function_args_info.regno;
4245   n_fpr = current_function_args_info.sse_regno;
4246 
4247   if (TARGET_DEBUG_ARG)
4248     fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4249 	     (int) words, (int) n_gpr, (int) n_fpr);
4250 
4251   if (cfun->va_list_gpr_size)
4252     {
4253       type = TREE_TYPE (gpr);
4254       t = build2 (MODIFY_EXPR, type, gpr,
4255 		  build_int_cst (type, n_gpr * 8));
4256       TREE_SIDE_EFFECTS (t) = 1;
4257       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4258     }
4259 
4260   if (cfun->va_list_fpr_size)
4261     {
4262       type = TREE_TYPE (fpr);
4263       t = build2 (MODIFY_EXPR, type, fpr,
4264 		  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4265       TREE_SIDE_EFFECTS (t) = 1;
4266       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4267     }
4268 
4269   /* Find the overflow area.  */
4270   type = TREE_TYPE (ovf);
4271   t = make_tree (type, virtual_incoming_args_rtx);
4272   if (words != 0)
4273     t = build2 (PLUS_EXPR, type, t,
4274 	        build_int_cst (type, words * UNITS_PER_WORD));
4275   t = build2 (MODIFY_EXPR, type, ovf, t);
4276   TREE_SIDE_EFFECTS (t) = 1;
4277   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4278 
4279   if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4280     {
4281       /* Find the register save area.
4282 	 Prologue of the function save it right above stack frame.  */
4283       type = TREE_TYPE (sav);
4284       t = make_tree (type, frame_pointer_rtx);
4285       t = build2 (MODIFY_EXPR, type, sav, t);
4286       TREE_SIDE_EFFECTS (t) = 1;
4287       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4288     }
4289 }
4290 
4291 /* Implement va_arg.  */
4292 
4293 tree
4294 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4295 {
4296   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4297   tree f_gpr, f_fpr, f_ovf, f_sav;
4298   tree gpr, fpr, ovf, sav, t;
4299   int size, rsize;
4300   tree lab_false, lab_over = NULL_TREE;
4301   tree addr, t2;
4302   rtx container;
4303   int indirect_p = 0;
4304   tree ptrtype;
4305   enum machine_mode nat_mode;
4306 
4307   /* Only 64bit target needs something special.  */
4308   if (!TARGET_64BIT)
4309     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4310 
4311   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4312   f_fpr = TREE_CHAIN (f_gpr);
4313   f_ovf = TREE_CHAIN (f_fpr);
4314   f_sav = TREE_CHAIN (f_ovf);
4315 
4316   valist = build_va_arg_indirect_ref (valist);
4317   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4318   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4319   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4320   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4321 
4322   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4323   if (indirect_p)
4324     type = build_pointer_type (type);
4325   size = int_size_in_bytes (type);
4326   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4327 
4328   nat_mode = type_natural_mode (type);
4329   container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4330 				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4331 
4332   /* Pull the value out of the saved registers.  */
4333 
4334   addr = create_tmp_var (ptr_type_node, "addr");
4335   DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4336 
4337   if (container)
4338     {
4339       int needed_intregs, needed_sseregs;
4340       bool need_temp;
4341       tree int_addr, sse_addr;
4342 
4343       lab_false = create_artificial_label ();
4344       lab_over = create_artificial_label ();
4345 
4346       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4347 
4348       need_temp = (!REG_P (container)
4349 		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4350 		       || TYPE_ALIGN (type) > 128));
4351 
4352       /* In case we are passing structure, verify that it is consecutive block
4353          on the register save area.  If not we need to do moves.  */
4354       if (!need_temp && !REG_P (container))
4355 	{
4356 	  /* Verify that all registers are strictly consecutive  */
4357 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4358 	    {
4359 	      int i;
4360 
4361 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4362 		{
4363 		  rtx slot = XVECEXP (container, 0, i);
4364 		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4365 		      || INTVAL (XEXP (slot, 1)) != i * 16)
4366 		    need_temp = 1;
4367 		}
4368 	    }
4369 	  else
4370 	    {
4371 	      int i;
4372 
4373 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4374 		{
4375 		  rtx slot = XVECEXP (container, 0, i);
4376 		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4377 		      || INTVAL (XEXP (slot, 1)) != i * 8)
4378 		    need_temp = 1;
4379 		}
4380 	    }
4381 	}
4382       if (!need_temp)
4383 	{
4384 	  int_addr = addr;
4385 	  sse_addr = addr;
4386 	}
4387       else
4388 	{
4389 	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4390 	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4391 	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4392 	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4393 	}
4394 
4395       /* First ensure that we fit completely in registers.  */
4396       if (needed_intregs)
4397 	{
4398 	  t = build_int_cst (TREE_TYPE (gpr),
4399 			     (REGPARM_MAX - needed_intregs + 1) * 8);
4400 	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4401 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4402 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4403 	  gimplify_and_add (t, pre_p);
4404 	}
4405       if (needed_sseregs)
4406 	{
4407 	  t = build_int_cst (TREE_TYPE (fpr),
4408 			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4409 			     + REGPARM_MAX * 8);
4410 	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4411 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4412 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4413 	  gimplify_and_add (t, pre_p);
4414 	}
4415 
4416       /* Compute index to start of area used for integer regs.  */
4417       if (needed_intregs)
4418 	{
4419 	  /* int_addr = gpr + sav; */
4420 	  t = fold_convert (ptr_type_node, gpr);
4421 	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4422 	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4423 	  gimplify_and_add (t, pre_p);
4424 	}
4425       if (needed_sseregs)
4426 	{
4427 	  /* sse_addr = fpr + sav; */
4428 	  t = fold_convert (ptr_type_node, fpr);
4429 	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4430 	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4431 	  gimplify_and_add (t, pre_p);
4432 	}
4433       if (need_temp)
4434 	{
4435 	  int i;
4436 	  tree temp = create_tmp_var (type, "va_arg_tmp");
4437 
4438 	  /* addr = &temp; */
4439 	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4440 	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4441 	  gimplify_and_add (t, pre_p);
4442 
4443 	  for (i = 0; i < XVECLEN (container, 0); i++)
4444 	    {
4445 	      rtx slot = XVECEXP (container, 0, i);
4446 	      rtx reg = XEXP (slot, 0);
4447 	      enum machine_mode mode = GET_MODE (reg);
4448 	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4449 	      tree addr_type = build_pointer_type (piece_type);
4450 	      tree src_addr, src;
4451 	      int src_offset;
4452 	      tree dest_addr, dest;
4453 
4454 	      if (SSE_REGNO_P (REGNO (reg)))
4455 		{
4456 		  src_addr = sse_addr;
4457 		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4458 		}
4459 	      else
4460 		{
4461 		  src_addr = int_addr;
4462 		  src_offset = REGNO (reg) * 8;
4463 		}
4464 	      src_addr = fold_convert (addr_type, src_addr);
4465 	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4466 				       size_int (src_offset)));
4467 	      src = build_va_arg_indirect_ref (src_addr);
4468 
4469 	      dest_addr = fold_convert (addr_type, addr);
4470 	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4471 					size_int (INTVAL (XEXP (slot, 1)))));
4472 	      dest = build_va_arg_indirect_ref (dest_addr);
4473 
4474 	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4475 	      gimplify_and_add (t, pre_p);
4476 	    }
4477 	}
4478 
4479       if (needed_intregs)
4480 	{
4481 	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4482 		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4483 	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4484 	  gimplify_and_add (t, pre_p);
4485 	}
4486       if (needed_sseregs)
4487 	{
4488 	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4489 		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4490 	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4491 	  gimplify_and_add (t, pre_p);
4492 	}
4493 
4494       t = build1 (GOTO_EXPR, void_type_node, lab_over);
4495       gimplify_and_add (t, pre_p);
4496 
4497       t = build1 (LABEL_EXPR, void_type_node, lab_false);
4498       append_to_statement_list (t, pre_p);
4499     }
4500 
4501   /* ... otherwise out of the overflow area.  */
4502 
4503   /* Care for on-stack alignment if needed.  */
4504   if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4505       || integer_zerop (TYPE_SIZE (type)))
4506     t = ovf;
4507   else
4508     {
4509       HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4510       t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4511 		  build_int_cst (TREE_TYPE (ovf), align - 1));
4512       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4513 		  build_int_cst (TREE_TYPE (t), -align));
4514     }
4515   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4516 
4517   t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4518   gimplify_and_add (t2, pre_p);
4519 
4520   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4521 	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4522   t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4523   gimplify_and_add (t, pre_p);
4524 
4525   if (container)
4526     {
4527       t = build1 (LABEL_EXPR, void_type_node, lab_over);
4528       append_to_statement_list (t, pre_p);
4529     }
4530 
4531   ptrtype = build_pointer_type (type);
4532   addr = fold_convert (ptrtype, addr);
4533 
4534   if (indirect_p)
4535     addr = build_va_arg_indirect_ref (addr);
4536   return build_va_arg_indirect_ref (addr);
4537 }
4538 
4539 /* Return nonzero if OPNUM's MEM should be matched
4540    in movabs* patterns.  */
4541 
4542 int
4543 ix86_check_movabs (rtx insn, int opnum)
4544 {
4545   rtx set, mem;
4546 
4547   set = PATTERN (insn);
4548   if (GET_CODE (set) == PARALLEL)
4549     set = XVECEXP (set, 0, 0);
4550   gcc_assert (GET_CODE (set) == SET);
4551   mem = XEXP (set, opnum);
4552   while (GET_CODE (mem) == SUBREG)
4553     mem = SUBREG_REG (mem);
4554   gcc_assert (GET_CODE (mem) == MEM);
4555   return (volatile_ok || !MEM_VOLATILE_P (mem));
4556 }
4557 
4558 /* Initialize the table of extra 80387 mathematical constants.  */
4559 
4560 static void
4561 init_ext_80387_constants (void)
4562 {
4563   static const char * cst[5] =
4564   {
4565     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4566     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4567     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4568     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4569     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4570   };
4571   int i;
4572 
4573   for (i = 0; i < 5; i++)
4574     {
4575       real_from_string (&ext_80387_constants_table[i], cst[i]);
4576       /* Ensure each constant is rounded to XFmode precision.  */
4577       real_convert (&ext_80387_constants_table[i],
4578 		    XFmode, &ext_80387_constants_table[i]);
4579     }
4580 
4581   ext_80387_constants_init = 1;
4582 }
4583 
4584 /* Return true if the constant is something that can be loaded with
4585    a special instruction.  */
4586 
4587 int
4588 standard_80387_constant_p (rtx x)
4589 {
4590   if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4591     return -1;
4592 
4593   if (x == CONST0_RTX (GET_MODE (x)))
4594     return 1;
4595   if (x == CONST1_RTX (GET_MODE (x)))
4596     return 2;
4597 
4598   /* For XFmode constants, try to find a special 80387 instruction when
4599      optimizing for size or on those CPUs that benefit from them.  */
4600   if (GET_MODE (x) == XFmode
4601       && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4602     {
4603       REAL_VALUE_TYPE r;
4604       int i;
4605 
4606       if (! ext_80387_constants_init)
4607 	init_ext_80387_constants ();
4608 
4609       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4610       for (i = 0; i < 5; i++)
4611         if (real_identical (&r, &ext_80387_constants_table[i]))
4612 	  return i + 3;
4613     }
4614 
4615   return 0;
4616 }
4617 
4618 /* Return the opcode of the special instruction to be used to load
4619    the constant X.  */
4620 
4621 const char *
4622 standard_80387_constant_opcode (rtx x)
4623 {
4624   switch (standard_80387_constant_p (x))
4625     {
4626     case 1:
4627       return "fldz";
4628     case 2:
4629       return "fld1";
4630     case 3:
4631       return "fldlg2";
4632     case 4:
4633       return "fldln2";
4634     case 5:
4635       return "fldl2e";
4636     case 6:
4637       return "fldl2t";
4638     case 7:
4639       return "fldpi";
4640     default:
4641       gcc_unreachable ();
4642     }
4643 }
4644 
4645 /* Return the CONST_DOUBLE representing the 80387 constant that is
4646    loaded by the specified special instruction.  The argument IDX
4647    matches the return value from standard_80387_constant_p.  */
4648 
4649 rtx
4650 standard_80387_constant_rtx (int idx)
4651 {
4652   int i;
4653 
4654   if (! ext_80387_constants_init)
4655     init_ext_80387_constants ();
4656 
4657   switch (idx)
4658     {
4659     case 3:
4660     case 4:
4661     case 5:
4662     case 6:
4663     case 7:
4664       i = idx - 3;
4665       break;
4666 
4667     default:
4668       gcc_unreachable ();
4669     }
4670 
4671   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4672 				       XFmode);
4673 }
4674 
4675 /* Return 1 if mode is a valid mode for sse.  */
4676 static int
4677 standard_sse_mode_p (enum machine_mode mode)
4678 {
4679   switch (mode)
4680     {
4681     case V16QImode:
4682     case V8HImode:
4683     case V4SImode:
4684     case V2DImode:
4685     case V4SFmode:
4686     case V2DFmode:
4687       return 1;
4688 
4689     default:
4690       return 0;
4691     }
4692 }
4693 
4694 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4695  */
4696 int
4697 standard_sse_constant_p (rtx x)
4698 {
4699   enum machine_mode mode = GET_MODE (x);
4700 
4701   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4702     return 1;
4703   if (vector_all_ones_operand (x, mode)
4704       && standard_sse_mode_p (mode))
4705     return TARGET_SSE2 ? 2 : -1;
4706 
4707   return 0;
4708 }
4709 
4710 /* Return the opcode of the special instruction to be used to load
4711    the constant X.  */
4712 
4713 const char *
4714 standard_sse_constant_opcode (rtx insn, rtx x)
4715 {
4716   switch (standard_sse_constant_p (x))
4717     {
4718     case 1:
4719       if (get_attr_mode (insn) == MODE_V4SF)
4720         return "xorps\t%0, %0";
4721       else if (get_attr_mode (insn) == MODE_V2DF)
4722         return "xorpd\t%0, %0";
4723       else
4724         return "pxor\t%0, %0";
4725     case 2:
4726       return "pcmpeqd\t%0, %0";
4727     }
4728   gcc_unreachable ();
4729 }
4730 
4731 int
4732 cmpxchg8b_mem_constraint (rtx op)
4733 {
4734   struct ix86_address parts;
4735 
4736   if (TARGET_64BIT || !flag_pic)
4737     return 1;
4738 
4739   if (GET_CODE (op) != MEM)
4740     return 0;
4741   if (!ix86_decompose_address (XEXP (op, 0), &parts))
4742     return 0;
4743 
4744   if (parts.base && GET_CODE (parts.base) == SUBREG)
4745     parts.base = SUBREG_REG (parts.base);
4746   if (parts.index && GET_CODE (parts.index) == SUBREG)
4747     parts.index = SUBREG_REG (parts.index);
4748 
4749   if (parts.base && REG_P (parts.base)
4750       && REGNO_REG_CLASS (REGNO (parts.base)) == BREG)
4751     return 0;
4752   if (parts.index && REG_P (parts.index)
4753       && REGNO_REG_CLASS (REGNO (parts.index)) == BREG)
4754     return 0;
4755 
4756   return 1;
4757 }
4758 
4759 /* Returns 1 if OP contains a symbol reference */
4760 
4761 int
4762 symbolic_reference_mentioned_p (rtx op)
4763 {
4764   const char *fmt;
4765   int i;
4766 
4767   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4768     return 1;
4769 
4770   fmt = GET_RTX_FORMAT (GET_CODE (op));
4771   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4772     {
4773       if (fmt[i] == 'E')
4774 	{
4775 	  int j;
4776 
4777 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4778 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4779 	      return 1;
4780 	}
4781 
4782       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4783 	return 1;
4784     }
4785 
4786   return 0;
4787 }
4788 
4789 /* Return 1 if it is appropriate to emit `ret' instructions in the
4790    body of a function.  Do this only if the epilogue is simple, needing a
4791    couple of insns.  Prior to reloading, we can't tell how many registers
4792    must be saved, so return 0 then.  Return 0 if there is no frame
4793    marker to de-allocate.  */
4794 
4795 int
4796 ix86_can_use_return_insn_p (void)
4797 {
4798   struct ix86_frame frame;
4799 
4800   if (! reload_completed || frame_pointer_needed)
4801     return 0;
4802 
4803   /* Don't allow more than 32 pop, since that's all we can do
4804      with one instruction.  */
4805   if (current_function_pops_args
4806       && current_function_args_size >= 32768)
4807     return 0;
4808 
4809   ix86_compute_frame_layout (&frame);
4810   return frame.to_allocate == 0 && frame.nmsave_args == 0 && frame.nregs == 0;
4811 }
4812 
4813 /* Value should be nonzero if functions must have frame pointers.
4814    Zero means the frame pointer need not be set up (and parms may
4815    be accessed via the stack pointer) in functions that seem suitable.  */
4816 
4817 int
4818 ix86_frame_pointer_required (void)
4819 {
4820   /* If we accessed previous frames, then the generated code expects
4821      to be able to access the saved ebp value in our frame.  */
4822   if (cfun->machine->accesses_prev_frame)
4823     return 1;
4824 
4825   /* Several x86 os'es need a frame pointer for other reasons,
4826      usually pertaining to setjmp.  */
4827   if (SUBTARGET_FRAME_POINTER_REQUIRED)
4828     return 1;
4829 
4830   if (TARGET_SAVE_ARGS)
4831     return 1;
4832 
4833   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4834      the frame pointer by default.  Turn it back on now if we've not
4835      got a leaf function.  */
4836   if (TARGET_OMIT_LEAF_FRAME_POINTER
4837       && (!current_function_is_leaf
4838 	  || ix86_current_function_calls_tls_descriptor))
4839     return 1;
4840 
4841   if (current_function_profile)
4842     return 1;
4843 
4844   return 0;
4845 }
4846 
4847 /* Record that the current function accesses previous call frames.  */
4848 
4849 void
4850 ix86_setup_frame_addresses (void)
4851 {
4852   cfun->machine->accesses_prev_frame = 1;
4853 }
4854 
4855 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4856 # define USE_HIDDEN_LINKONCE 1
4857 #else
4858 # define USE_HIDDEN_LINKONCE 0
4859 #endif
4860 
4861 static int pic_labels_used;
4862 
4863 /* Fills in the label name that should be used for a pc thunk for
4864    the given register.  */
4865 
4866 static void
4867 get_pc_thunk_name (char name[32], unsigned int regno)
4868 {
4869   gcc_assert (!TARGET_64BIT);
4870 
4871   if (USE_HIDDEN_LINKONCE)
4872     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4873   else
4874     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4875 }
4876 
4877 
4878 /* This function generates code for -fpic that loads %ebx with
4879    the return address of the caller and then returns.  */
4880 
4881 void
4882 ix86_file_end (void)
4883 {
4884   rtx xops[2];
4885   int regno;
4886 
4887   for (regno = 0; regno < 8; ++regno)
4888     {
4889       char name[32];
4890 
4891       if (! ((pic_labels_used >> regno) & 1))
4892 	continue;
4893 
4894       get_pc_thunk_name (name, regno);
4895 
4896 #if TARGET_MACHO
4897       if (TARGET_MACHO)
4898 	{
4899 	  switch_to_section (darwin_sections[text_coal_section]);
4900 	  fputs ("\t.weak_definition\t", asm_out_file);
4901 	  assemble_name (asm_out_file, name);
4902 	  fputs ("\n\t.private_extern\t", asm_out_file);
4903 	  assemble_name (asm_out_file, name);
4904 	  fputs ("\n", asm_out_file);
4905 	  ASM_OUTPUT_LABEL (asm_out_file, name);
4906 	}
4907       else
4908 #endif
4909       if (USE_HIDDEN_LINKONCE)
4910 	{
4911 	  tree decl;
4912 
4913 	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4914 			     error_mark_node);
4915 	  TREE_PUBLIC (decl) = 1;
4916 	  TREE_STATIC (decl) = 1;
4917 	  DECL_ONE_ONLY (decl) = 1;
4918 
4919 	  (*targetm.asm_out.unique_section) (decl, 0);
4920 	  switch_to_section (get_named_section (decl, NULL, 0));
4921 
4922 	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4923 	  fputs ("\t.hidden\t", asm_out_file);
4924 	  assemble_name (asm_out_file, name);
4925 	  fputc ('\n', asm_out_file);
4926 	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4927 	}
4928       else
4929 	{
4930 	  switch_to_section (text_section);
4931 	  ASM_OUTPUT_LABEL (asm_out_file, name);
4932 	}
4933 
4934       xops[0] = gen_rtx_REG (SImode, regno);
4935       xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4936       output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4937       output_asm_insn ("ret", xops);
4938     }
4939 
4940   if (NEED_INDICATE_EXEC_STACK)
4941     file_end_indicate_exec_stack ();
4942 }
4943 
4944 /* Emit code for the SET_GOT patterns.  */
4945 
4946 const char *
4947 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4948 {
4949   rtx xops[3];
4950 
4951   xops[0] = dest;
4952   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4953 
4954   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4955     {
4956       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4957 
4958       if (!flag_pic)
4959 	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4960       else
4961 	output_asm_insn ("call\t%a2", xops);
4962 
4963 #if TARGET_MACHO
4964       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4965          is what will be referenced by the Mach-O PIC subsystem.  */
4966       if (!label)
4967 	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4968 #endif
4969 
4970       (*targetm.asm_out.internal_label) (asm_out_file, "L",
4971 				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4972 
4973       if (flag_pic)
4974 	output_asm_insn ("pop{l}\t%0", xops);
4975     }
4976   else
4977     {
4978       char name[32];
4979       get_pc_thunk_name (name, REGNO (dest));
4980       pic_labels_used |= 1 << REGNO (dest);
4981 
4982       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4983       xops[2] = gen_rtx_MEM (QImode, xops[2]);
4984       output_asm_insn ("call\t%X2", xops);
4985       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4986          is what will be referenced by the Mach-O PIC subsystem.  */
4987 #if TARGET_MACHO
4988       if (!label)
4989 	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4990       else
4991         targetm.asm_out.internal_label (asm_out_file, "L",
4992 					   CODE_LABEL_NUMBER (label));
4993 #endif
4994     }
4995 
4996   if (TARGET_MACHO)
4997     return "";
4998 
4999   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5000     output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5001   else
5002     output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5003 
5004   return "";
5005 }
5006 
5007 /* Generate an "push" pattern for input ARG.  */
5008 
5009 static rtx
5010 gen_push (rtx arg)
5011 {
5012   return gen_rtx_SET (VOIDmode,
5013 		      gen_rtx_MEM (Pmode,
5014 				   gen_rtx_PRE_DEC (Pmode,
5015 						    stack_pointer_rtx)),
5016 		      arg);
5017 }
5018 
5019 /* Return >= 0 if there is an unused call-clobbered register available
5020    for the entire function.  */
5021 
5022 static unsigned int
5023 ix86_select_alt_pic_regnum (void)
5024 {
5025   if (current_function_is_leaf && !current_function_profile
5026       && !ix86_current_function_calls_tls_descriptor)
5027     {
5028       int i;
5029       for (i = 2; i >= 0; --i)
5030         if (!regs_ever_live[i])
5031 	  return i;
5032     }
5033 
5034   return INVALID_REGNUM;
5035 }
5036 
5037 /* Return 1 if we need to save REGNO.  */
5038 static int
5039 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5040 {
5041   if (pic_offset_table_rtx
5042       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5043       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5044 	  || current_function_profile
5045 	  || current_function_calls_eh_return
5046 	  || current_function_uses_const_pool))
5047     {
5048       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5049 	return 0;
5050       return 1;
5051     }
5052 
5053   if (current_function_calls_eh_return && maybe_eh_return)
5054     {
5055       unsigned i;
5056       for (i = 0; ; i++)
5057 	{
5058 	  unsigned test = EH_RETURN_DATA_REGNO (i);
5059 	  if (test == INVALID_REGNUM)
5060 	    break;
5061 	  if (test == regno)
5062 	    return 1;
5063 	}
5064     }
5065 
5066   if (cfun->machine->force_align_arg_pointer
5067       && regno == REGNO (cfun->machine->force_align_arg_pointer))
5068     return 1;
5069 
5070   return (regs_ever_live[regno]
5071 	  && !call_used_regs[regno]
5072 	  && !fixed_regs[regno]
5073 	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5074 }
5075 
5076 /* Return number of registers to be saved on the stack.  */
5077 
5078 static int
5079 ix86_nsaved_regs (void)
5080 {
5081   int nregs = 0;
5082   int regno;
5083 
5084   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5085     if (ix86_save_reg (regno, true))
5086       nregs++;
5087   return nregs;
5088 }
5089 
5090 /* Return number of arguments to be saved on the stack with
5091    -msave-args.  */
5092 
5093 static int
5094 ix86_nsaved_args (void)
5095 {
5096   if (TARGET_SAVE_ARGS)
5097     return current_function_args_info.regno - current_function_returns_struct;
5098   else
5099     return 0;
5100 }
5101 
5102 /* Return the offset between two registers, one to be eliminated, and the other
5103    its replacement, at the start of a routine.  */
5104 
5105 HOST_WIDE_INT
5106 ix86_initial_elimination_offset (int from, int to)
5107 {
5108   struct ix86_frame frame;
5109   ix86_compute_frame_layout (&frame);
5110 
5111   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5112     return frame.hard_frame_pointer_offset;
5113   else if (from == FRAME_POINTER_REGNUM
5114 	   && to == HARD_FRAME_POINTER_REGNUM)
5115     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5116   else
5117     {
5118       gcc_assert (to == STACK_POINTER_REGNUM);
5119 
5120       if (from == ARG_POINTER_REGNUM)
5121 	return frame.stack_pointer_offset;
5122 
5123       gcc_assert (from == FRAME_POINTER_REGNUM);
5124       return frame.stack_pointer_offset - frame.frame_pointer_offset;
5125     }
5126 }
5127 
5128 /* Fill structure ix86_frame about frame of currently computed function.  */
5129 
5130 static void
5131 ix86_compute_frame_layout (struct ix86_frame *frame)
5132 {
5133   HOST_WIDE_INT total_size;
5134   unsigned int stack_alignment_needed;
5135   HOST_WIDE_INT offset;
5136   unsigned int preferred_alignment;
5137   HOST_WIDE_INT size = get_frame_size ();
5138 
5139   frame->local_size = size;
5140   frame->nregs = ix86_nsaved_regs ();
5141   frame->nmsave_args = ix86_nsaved_args ();
5142   total_size = size;
5143 
5144   stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5145   preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5146 
5147   /* During reload iteration the amount of registers saved can change.
5148      Recompute the value as needed.  Do not recompute when amount of registers
5149      didn't change as reload does multiple calls to the function and does not
5150      expect the decision to change within single iteration.  */
5151   if (!optimize_size
5152       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5153     {
5154       int count = frame->nregs;
5155 
5156       cfun->machine->use_fast_prologue_epilogue_nregs = count;
5157       /* The fast prologue uses move instead of push to save registers.  This
5158          is significantly longer, but also executes faster as modern hardware
5159          can execute the moves in parallel, but can't do that for push/pop.
5160 
5161 	 Be careful about choosing what prologue to emit:  When function takes
5162 	 many instructions to execute we may use slow version as well as in
5163 	 case function is known to be outside hot spot (this is known with
5164 	 feedback only).  Weight the size of function by number of registers
5165 	 to save as it is cheap to use one or two push instructions but very
5166 	 slow to use many of them.  */
5167       if (count)
5168 	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5169       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5170 	  || (flag_branch_probabilities
5171 	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5172         cfun->machine->use_fast_prologue_epilogue = false;
5173       else
5174         cfun->machine->use_fast_prologue_epilogue
5175 	   = !expensive_function_p (count);
5176     }
5177   if (TARGET_PROLOGUE_USING_MOVE
5178       && cfun->machine->use_fast_prologue_epilogue)
5179     frame->save_regs_using_mov = true;
5180   else
5181     frame->save_regs_using_mov = false;
5182 
5183   if (TARGET_SAVE_ARGS)
5184     {
5185 	cfun->machine->use_fast_prologue_epilogue = true;
5186 	frame->save_regs_using_mov = true;
5187     }
5188 
5189   /* Skip return address and saved base pointer.  */
5190   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5191 
5192   frame->hard_frame_pointer_offset = offset;
5193 
5194   /* Do some sanity checking of stack_alignment_needed and
5195      preferred_alignment, since i386 port is the only using those features
5196      that may break easily.  */
5197 
5198   gcc_assert (!size || stack_alignment_needed);
5199   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5200   gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5201   gcc_assert (stack_alignment_needed
5202 	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5203 
5204   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5205     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5206 
5207   /* Argument save area */
5208   if (TARGET_SAVE_ARGS)
5209     {
5210 	offset += frame->nmsave_args * UNITS_PER_WORD;
5211 	frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
5212 	offset += frame->padding0;
5213     }
5214   else
5215     frame->padding0 = 0;
5216 
5217   /* Register save area */
5218   offset += frame->nregs * UNITS_PER_WORD;
5219 
5220   /* Va-arg area */
5221   if (ix86_save_varrargs_registers)
5222     {
5223       offset += X86_64_VARARGS_SIZE;
5224       frame->va_arg_size = X86_64_VARARGS_SIZE;
5225     }
5226   else
5227     frame->va_arg_size = 0;
5228 
5229   /* Align start of frame for local function.  */
5230   frame->padding1 = ((offset + stack_alignment_needed - 1)
5231 		     & -stack_alignment_needed) - offset;
5232 
5233   offset += frame->padding1;
5234 
5235   /* Frame pointer points here.  */
5236   frame->frame_pointer_offset = offset;
5237 
5238   offset += size;
5239 
5240   /* Add outgoing arguments area.  Can be skipped if we eliminated
5241      all the function calls as dead code.
5242      Skipping is however impossible when function calls alloca.  Alloca
5243      expander assumes that last current_function_outgoing_args_size
5244      of stack frame are unused.  */
5245   if (ACCUMULATE_OUTGOING_ARGS
5246       && (!current_function_is_leaf || current_function_calls_alloca
5247 	  || ix86_current_function_calls_tls_descriptor))
5248     {
5249       offset += current_function_outgoing_args_size;
5250       frame->outgoing_arguments_size = current_function_outgoing_args_size;
5251     }
5252   else
5253     frame->outgoing_arguments_size = 0;
5254 
5255   /* Align stack boundary.  Only needed if we're calling another function
5256      or using alloca.  */
5257   if (!current_function_is_leaf || current_function_calls_alloca
5258       || ix86_current_function_calls_tls_descriptor)
5259     frame->padding2 = ((offset + preferred_alignment - 1)
5260 		       & -preferred_alignment) - offset;
5261   else
5262     frame->padding2 = 0;
5263 
5264   offset += frame->padding2;
5265 
5266   /* We've reached end of stack frame.  */
5267   frame->stack_pointer_offset = offset;
5268 
5269   /* Size prologue needs to allocate.  */
5270   frame->to_allocate =
5271     (size + frame->padding1 + frame->padding2
5272      + frame->outgoing_arguments_size + frame->va_arg_size);
5273 
5274   if (!TARGET_SAVE_ARGS
5275       && ((!frame->to_allocate && frame->nregs <= 1)
5276 	  || (TARGET_64BIT
5277 	      && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
5278     frame->save_regs_using_mov = false;
5279 
5280   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5281       && current_function_is_leaf
5282       && !ix86_current_function_calls_tls_descriptor)
5283     {
5284       frame->red_zone_size = frame->to_allocate;
5285       if (frame->save_regs_using_mov)
5286       {
5287 	  frame->red_zone_size
5288 	    += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
5289 	  frame->red_zone_size += frame->padding0;
5290       }
5291       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5292 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5293     }
5294   else
5295     frame->red_zone_size = 0;
5296   frame->to_allocate -= frame->red_zone_size;
5297   frame->stack_pointer_offset -= frame->red_zone_size;
5298 #if 0
5299   fprintf (stderr, "nmsave_args: %i\n", frame->nmsave_args);
5300   fprintf (stderr, "padding0: %i\n", frame->padding0);
5301   fprintf (stderr, "nregs: %i\n", frame->nregs);
5302   fprintf (stderr, "size: %i\n", size);
5303   fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5304   fprintf (stderr, "padding1: %i\n", frame->padding1);
5305   fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5306   fprintf (stderr, "padding2: %i\n", frame->padding2);
5307   fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5308   fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5309   fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5310   fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5311 	   frame->hard_frame_pointer_offset);
5312   fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5313 #endif
5314 }
5315 
5316 /* Emit code to save registers in the prologue.  */
5317 
5318 static void
5319 ix86_emit_save_regs (void)
5320 {
5321   unsigned int regno;
5322   rtx insn;
5323 
5324   if (TARGET_SAVE_ARGS)
5325     {
5326       int i;
5327       int nsaved = ix86_nsaved_args ();
5328       int start = cfun->returns_struct;
5329       for (i = start; i < start + nsaved; i++)
5330 	{
5331 	  regno = x86_64_int_parameter_registers[i];
5332 	  insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5333 	  RTX_FRAME_RELATED_P (insn) = 1;
5334 	}
5335       if (nsaved % 2 != 0)
5336 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5337 				   GEN_INT (-UNITS_PER_WORD), -1);
5338     }
5339 
5340   for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5341     if (ix86_save_reg (regno, true))
5342       {
5343 	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5344 	RTX_FRAME_RELATED_P (insn) = 1;
5345       }
5346 }
5347 
5348 /* Emit code to save registers using MOV insns.  First register
5349    is restored from POINTER + OFFSET.  */
5350 static void
5351 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5352 {
5353   unsigned int regno;
5354   rtx insn;
5355 
5356   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5357     if (ix86_save_reg (regno, true))
5358       {
5359 	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5360 					       Pmode, offset),
5361 			       gen_rtx_REG (Pmode, regno));
5362 	RTX_FRAME_RELATED_P (insn) = 1;
5363 	offset += UNITS_PER_WORD;
5364       }
5365 
5366   if (TARGET_SAVE_ARGS)
5367     {
5368       int i;
5369       int nsaved = ix86_nsaved_args ();
5370       int start = cfun->returns_struct;
5371       if (nsaved % 2 != 0)
5372 	offset += UNITS_PER_WORD;
5373       for (i = start + nsaved - 1; i >= start; i--)
5374 	{
5375 	  regno = x86_64_int_parameter_registers[i];
5376 	  insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5377 						 Pmode, offset),
5378 				 gen_rtx_REG (Pmode, regno));
5379 	  RTX_FRAME_RELATED_P (insn) = 1;
5380 	  offset += UNITS_PER_WORD;
5381 	}
5382     }
5383 
5384 }
5385 
5386 /* Expand prologue or epilogue stack adjustment.
5387    The pattern exist to put a dependency on all ebp-based memory accesses.
5388    STYLE should be negative if instructions should be marked as frame related,
5389    zero if %r11 register is live and cannot be freely used and positive
5390    otherwise.  */
5391 
5392 static void
5393 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5394 {
5395   rtx insn;
5396 
5397   if (! TARGET_64BIT)
5398     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5399   else if (x86_64_immediate_operand (offset, DImode))
5400     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5401   else
5402     {
5403       rtx r11;
5404       /* r11 is used by indirect sibcall return as well, set before the
5405 	 epilogue and used after the epilogue.  ATM indirect sibcall
5406 	 shouldn't be used together with huge frame sizes in one
5407 	 function because of the frame_size check in sibcall.c.  */
5408       gcc_assert (style);
5409       r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5410       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5411       if (style < 0)
5412 	RTX_FRAME_RELATED_P (insn) = 1;
5413       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5414 							       offset));
5415     }
5416   if (style < 0)
5417     RTX_FRAME_RELATED_P (insn) = 1;
5418 }
5419 
5420 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5421 
5422 static rtx
5423 ix86_internal_arg_pointer (void)
5424 {
5425   bool has_force_align_arg_pointer =
5426     (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5427 			    TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5428   if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5429        && DECL_NAME (current_function_decl)
5430        && MAIN_NAME_P (DECL_NAME (current_function_decl))
5431        && DECL_FILE_SCOPE_P (current_function_decl))
5432       || ix86_force_align_arg_pointer
5433       || has_force_align_arg_pointer)
5434     {
5435       /* Nested functions can't realign the stack due to a register
5436 	 conflict.  */
5437       if (DECL_CONTEXT (current_function_decl)
5438 	  && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5439 	{
5440 	  if (ix86_force_align_arg_pointer)
5441 	    warning (0, "-mstackrealign ignored for nested functions");
5442 	  if (has_force_align_arg_pointer)
5443 	    error ("%s not supported for nested functions",
5444 		   ix86_force_align_arg_pointer_string);
5445 	  return virtual_incoming_args_rtx;
5446 	}
5447       cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5448       return copy_to_reg (cfun->machine->force_align_arg_pointer);
5449     }
5450   else
5451     return virtual_incoming_args_rtx;
5452 }
5453 
5454 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5455    This is called from dwarf2out.c to emit call frame instructions
5456    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5457 static void
5458 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5459 {
5460   rtx unspec = SET_SRC (pattern);
5461   gcc_assert (GET_CODE (unspec) == UNSPEC);
5462 
5463   switch (index)
5464     {
5465     case UNSPEC_REG_SAVE:
5466       dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5467 			      SET_DEST (pattern));
5468       break;
5469     case UNSPEC_DEF_CFA:
5470       dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5471 			 INTVAL (XVECEXP (unspec, 0, 0)));
5472       break;
5473     default:
5474       gcc_unreachable ();
5475     }
5476 }
5477 
5478 /* Expand the prologue into a bunch of separate insns.  */
5479 
5480 void
5481 ix86_expand_prologue (void)
5482 {
5483   rtx insn;
5484   bool pic_reg_used;
5485   struct ix86_frame frame;
5486   HOST_WIDE_INT allocate;
5487 
5488   ix86_compute_frame_layout (&frame);
5489 
5490   if (cfun->machine->force_align_arg_pointer)
5491     {
5492       rtx x, y;
5493 
5494       /* Grab the argument pointer.  */
5495       x = plus_constant (stack_pointer_rtx, 4);
5496       y = cfun->machine->force_align_arg_pointer;
5497       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5498       RTX_FRAME_RELATED_P (insn) = 1;
5499 
5500       /* The unwind info consists of two parts: install the fafp as the cfa,
5501 	 and record the fafp as the "save register" of the stack pointer.
5502 	 The later is there in order that the unwinder can see where it
5503 	 should restore the stack pointer across the and insn.  */
5504       x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5505       x = gen_rtx_SET (VOIDmode, y, x);
5506       RTX_FRAME_RELATED_P (x) = 1;
5507       y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5508 			  UNSPEC_REG_SAVE);
5509       y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5510       RTX_FRAME_RELATED_P (y) = 1;
5511       x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5512       x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5513       REG_NOTES (insn) = x;
5514 
5515       /* Align the stack.  */
5516       emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5517 			     GEN_INT (-16)));
5518 
5519       /* And here we cheat like madmen with the unwind info.  We force the
5520 	 cfa register back to sp+4, which is exactly what it was at the
5521 	 start of the function.  Re-pushing the return address results in
5522 	 the return at the same spot relative to the cfa, and thus is
5523 	 correct wrt the unwind info.  */
5524       x = cfun->machine->force_align_arg_pointer;
5525       x = gen_frame_mem (Pmode, plus_constant (x, -4));
5526       insn = emit_insn (gen_push (x));
5527       RTX_FRAME_RELATED_P (insn) = 1;
5528 
5529       x = GEN_INT (4);
5530       x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5531       x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5532       x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5533       REG_NOTES (insn) = x;
5534     }
5535 
5536   if (warn_stack_larger_than && frame.local_size > stack_larger_than_size)
5537     warning (0, "stack usage is %d bytes", frame.local_size);
5538 
5539   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5540      slower on all targets.  Also sdb doesn't like it.  */
5541 
5542   if (frame_pointer_needed)
5543     {
5544       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5545       RTX_FRAME_RELATED_P (insn) = 1;
5546 
5547       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5548       RTX_FRAME_RELATED_P (insn) = 1;
5549     }
5550 
5551   allocate = frame.to_allocate;
5552 
5553   if (!frame.save_regs_using_mov)
5554     ix86_emit_save_regs ();
5555   else
5556     allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
5557       + frame.padding0;
5558 
5559   /* When using red zone we may start register saving before allocating
5560      the stack frame saving one cycle of the prologue.  */
5561   if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5562     ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5563 				   : stack_pointer_rtx,
5564 				   -(frame.nregs + frame.nmsave_args)
5565 				    * UNITS_PER_WORD - frame.padding0);
5566 
5567   if (allocate == 0)
5568     ;
5569   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5570     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5571 			       GEN_INT (-allocate), -1);
5572   else
5573     {
5574       /* Only valid for Win32.  */
5575       rtx eax = gen_rtx_REG (SImode, 0);
5576       bool eax_live = ix86_eax_live_at_start_p ();
5577       rtx t;
5578 
5579       gcc_assert (!TARGET_64BIT);
5580 
5581       if (eax_live)
5582 	{
5583 	  emit_insn (gen_push (eax));
5584 	  allocate -= 4;
5585 	}
5586 
5587       emit_move_insn (eax, GEN_INT (allocate));
5588 
5589       insn = emit_insn (gen_allocate_stack_worker (eax));
5590       RTX_FRAME_RELATED_P (insn) = 1;
5591       t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5592       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5593       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5594 					    t, REG_NOTES (insn));
5595 
5596       if (eax_live)
5597 	{
5598 	  if (frame_pointer_needed)
5599 	    t = plus_constant (hard_frame_pointer_rtx,
5600 			       allocate
5601 			       - frame.to_allocate
5602 			       - (frame.nregs + frame.nmsave_args)
5603 				 * UNITS_PER_WORD - frame.padding0);
5604 	  else
5605 	    t = plus_constant (stack_pointer_rtx, allocate);
5606 	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5607 	}
5608     }
5609 
5610   if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5611     {
5612       if (!TARGET_SAVE_ARGS &&
5613        (!frame_pointer_needed || !frame.to_allocate))
5614         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5615       else
5616         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5617 				       -(frame.nregs + frame.nmsave_args)
5618 					* UNITS_PER_WORD - frame.padding0);
5619     }
5620 
5621   pic_reg_used = false;
5622   if (pic_offset_table_rtx
5623       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5624 	  || current_function_profile))
5625     {
5626       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5627 
5628       if (alt_pic_reg_used != INVALID_REGNUM)
5629 	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5630 
5631       pic_reg_used = true;
5632     }
5633 
5634   if (pic_reg_used)
5635     {
5636       if (TARGET_64BIT)
5637         insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5638       else
5639         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5640 
5641       /* Even with accurate pre-reload life analysis, we can wind up
5642 	 deleting all references to the pic register after reload.
5643 	 Consider if cross-jumping unifies two sides of a branch
5644 	 controlled by a comparison vs the only read from a global.
5645 	 In which case, allow the set_got to be deleted, though we're
5646 	 too late to do anything about the ebx save in the prologue.  */
5647       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5648     }
5649 
5650   /* Prevent function calls from be scheduled before the call to mcount.
5651      In the pic_reg_used case, make sure that the got load isn't deleted.  */
5652   if (current_function_profile)
5653     emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5654 }
5655 
5656 /* Emit code to restore saved registers using MOV insns.  First register
5657    is restored from POINTER + OFFSET.  */
5658 static void
5659 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5660 				  int maybe_eh_return)
5661 {
5662   int regno;
5663   rtx base_address = gen_rtx_MEM (Pmode, pointer);
5664 
5665   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5666     if (ix86_save_reg (regno, maybe_eh_return))
5667       {
5668 	/* Ensure that adjust_address won't be forced to produce pointer
5669 	   out of range allowed by x86-64 instruction set.  */
5670 	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5671 	  {
5672 	    rtx r11;
5673 
5674 	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5675 	    emit_move_insn (r11, GEN_INT (offset));
5676 	    emit_insn (gen_adddi3 (r11, r11, pointer));
5677 	    base_address = gen_rtx_MEM (Pmode, r11);
5678 	    offset = 0;
5679 	  }
5680 	emit_move_insn (gen_rtx_REG (Pmode, regno),
5681 			adjust_address (base_address, Pmode, offset));
5682 	offset += UNITS_PER_WORD;
5683       }
5684 }
5685 
5686 /* Restore function stack, frame, and registers.  */
5687 
5688 void
5689 ix86_expand_epilogue (int style)
5690 {
5691   int regno;
5692   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5693   struct ix86_frame frame;
5694   HOST_WIDE_INT offset;
5695 
5696   ix86_compute_frame_layout (&frame);
5697 
5698   /* Calculate start of saved registers relative to ebp.  Special care
5699      must be taken for the normal return case of a function using
5700      eh_return: the eax and edx registers are marked as saved, but not
5701      restored along this path.  */
5702   offset = frame.nregs + frame.nmsave_args;
5703   if (current_function_calls_eh_return && style != 2)
5704     offset -= 2;
5705   offset *= -UNITS_PER_WORD;
5706   offset -= frame.padding0;
5707 
5708   /* If we're only restoring one register and sp is not valid then
5709      using a move instruction to restore the register since it's
5710      less work than reloading sp and popping the register.
5711 
5712      The default code result in stack adjustment using add/lea instruction,
5713      while this code results in LEAVE instruction (or discrete equivalent),
5714      so it is profitable in some other cases as well.  Especially when there
5715      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5716      and there is exactly one register to pop. This heuristic may need some
5717      tuning in future.  */
5718   if ((!sp_valid && frame.nregs <= 1)
5719       || (TARGET_EPILOGUE_USING_MOVE
5720 	  && cfun->machine->use_fast_prologue_epilogue
5721 	  && (frame.nregs > 1 || frame.to_allocate))
5722       || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5723       || (frame_pointer_needed && TARGET_USE_LEAVE
5724 	  && cfun->machine->use_fast_prologue_epilogue
5725 	  && frame.nregs == 1)
5726       || current_function_calls_eh_return)
5727     {
5728       /* Restore registers.  We can use ebp or esp to address the memory
5729 	 locations.  If both are available, default to ebp, since offsets
5730 	 are known to be small.  Only exception is esp pointing directly to the
5731 	 end of block of saved registers, where we may simplify addressing
5732 	 mode.  */
5733 
5734       if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5735 	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5736 					  frame.to_allocate, style == 2);
5737       else
5738 	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5739 					  offset, style == 2);
5740 
5741       /* eh_return epilogues need %ecx added to the stack pointer.  */
5742       if (style == 2)
5743 	{
5744 	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5745 
5746 	  if (frame_pointer_needed)
5747 	    {
5748 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5749 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5750 	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5751 
5752 	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5753 	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5754 
5755 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5756 					 const0_rtx, style);
5757 	    }
5758 	  else
5759 	    {
5760 	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5761 	      tmp = plus_constant (tmp, (frame.to_allocate
5762                                          + (frame.nregs + frame.nmsave_args)
5763 					   * UNITS_PER_WORD + frame.padding0));
5764 	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5765 	    }
5766 	}
5767       else if (!frame_pointer_needed)
5768 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5769 				   GEN_INT (frame.to_allocate
5770 					    + (frame.nregs + frame.nmsave_args)
5771 					     * UNITS_PER_WORD + frame.padding0),
5772 				   style);
5773       /* If not an i386, mov & pop is faster than "leave".  */
5774       else if (TARGET_USE_LEAVE || optimize_size
5775 	       || !cfun->machine->use_fast_prologue_epilogue)
5776 	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5777       else
5778 	{
5779 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5780 				     hard_frame_pointer_rtx,
5781 				     const0_rtx, style);
5782 	  if (TARGET_64BIT)
5783 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5784 	  else
5785 	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5786 	}
5787     }
5788   else
5789     {
5790       /* First step is to deallocate the stack frame so that we can
5791 	 pop the registers.  */
5792       if (!sp_valid)
5793 	{
5794 	  gcc_assert (frame_pointer_needed);
5795 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5796 				     hard_frame_pointer_rtx,
5797 				     GEN_INT (offset), style);
5798 	}
5799       else if (frame.to_allocate)
5800 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5801 				   GEN_INT (frame.to_allocate), style);
5802 
5803       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5804 	if (ix86_save_reg (regno, false))
5805 	  {
5806 	    if (TARGET_64BIT)
5807 	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5808 	    else
5809 	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5810 	  }
5811       if (frame.nmsave_args)
5812         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5813 				 GEN_INT (frame.nmsave_args * UNITS_PER_WORD
5814 					  + frame.padding0), style);
5815       if (frame_pointer_needed)
5816 	{
5817 	  /* Leave results in shorter dependency chains on CPUs that are
5818 	     able to grok it fast.  */
5819 	  if (TARGET_USE_LEAVE)
5820 	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5821 	  else if (TARGET_64BIT)
5822 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5823 	  else
5824 	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5825 	}
5826     }
5827 
5828   if (cfun->machine->force_align_arg_pointer)
5829     {
5830       emit_insn (gen_addsi3 (stack_pointer_rtx,
5831 			     cfun->machine->force_align_arg_pointer,
5832 			     GEN_INT (-4)));
5833     }
5834 
5835   /* Sibcall epilogues don't want a return instruction.  */
5836   if (style == 0)
5837     return;
5838 
5839   if (current_function_pops_args && current_function_args_size)
5840     {
5841       rtx popc = GEN_INT (current_function_pops_args);
5842 
5843       /* i386 can only pop 64K bytes.  If asked to pop more, pop
5844 	 return address, do explicit add, and jump indirectly to the
5845 	 caller.  */
5846 
5847       if (current_function_pops_args >= 65536)
5848 	{
5849 	  rtx ecx = gen_rtx_REG (SImode, 2);
5850 
5851 	  /* There is no "pascal" calling convention in 64bit ABI.  */
5852 	  gcc_assert (!TARGET_64BIT);
5853 
5854 	  emit_insn (gen_popsi1 (ecx));
5855 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5856 	  emit_jump_insn (gen_return_indirect_internal (ecx));
5857 	}
5858       else
5859 	emit_jump_insn (gen_return_pop_internal (popc));
5860     }
5861   else
5862     emit_jump_insn (gen_return_internal ());
5863 }
5864 
5865 /* Reset from the function's potential modifications.  */
5866 
5867 static void
5868 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5869 			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5870 {
5871   if (pic_offset_table_rtx)
5872     REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5873 #if TARGET_MACHO
5874   /* Mach-O doesn't support labels at the end of objects, so if
5875      it looks like we might want one, insert a NOP.  */
5876   {
5877     rtx insn = get_last_insn ();
5878     while (insn
5879 	   && NOTE_P (insn)
5880 	   && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5881       insn = PREV_INSN (insn);
5882     if (insn
5883 	&& (LABEL_P (insn)
5884 	    || (NOTE_P (insn)
5885 		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5886       fputs ("\tnop\n", file);
5887   }
5888 #endif
5889 
5890 }
5891 
5892 /* Extract the parts of an RTL expression that is a valid memory address
5893    for an instruction.  Return 0 if the structure of the address is
5894    grossly off.  Return -1 if the address contains ASHIFT, so it is not
5895    strictly valid, but still used for computing length of lea instruction.  */
5896 
5897 int
5898 ix86_decompose_address (rtx addr, struct ix86_address *out)
5899 {
5900   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5901   rtx base_reg, index_reg;
5902   HOST_WIDE_INT scale = 1;
5903   rtx scale_rtx = NULL_RTX;
5904   int retval = 1;
5905   enum ix86_address_seg seg = SEG_DEFAULT;
5906 
5907   if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5908     base = addr;
5909   else if (GET_CODE (addr) == PLUS)
5910     {
5911       rtx addends[4], op;
5912       int n = 0, i;
5913 
5914       op = addr;
5915       do
5916 	{
5917 	  if (n >= 4)
5918 	    return 0;
5919 	  addends[n++] = XEXP (op, 1);
5920 	  op = XEXP (op, 0);
5921 	}
5922       while (GET_CODE (op) == PLUS);
5923       if (n >= 4)
5924 	return 0;
5925       addends[n] = op;
5926 
5927       for (i = n; i >= 0; --i)
5928 	{
5929 	  op = addends[i];
5930 	  switch (GET_CODE (op))
5931 	    {
5932 	    case MULT:
5933 	      if (index)
5934 		return 0;
5935 	      index = XEXP (op, 0);
5936 	      scale_rtx = XEXP (op, 1);
5937 	      break;
5938 
5939 	    case UNSPEC:
5940 	      if (XINT (op, 1) == UNSPEC_TP
5941 	          && TARGET_TLS_DIRECT_SEG_REFS
5942 	          && seg == SEG_DEFAULT)
5943 		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5944 	      else
5945 		return 0;
5946 	      break;
5947 
5948 	    case REG:
5949 	    case SUBREG:
5950 	      if (!base)
5951 		base = op;
5952 	      else if (!index)
5953 		index = op;
5954 	      else
5955 		return 0;
5956 	      break;
5957 
5958 	    case CONST:
5959 	    case CONST_INT:
5960 	    case SYMBOL_REF:
5961 	    case LABEL_REF:
5962 	      if (disp)
5963 		return 0;
5964 	      disp = op;
5965 	      break;
5966 
5967 	    default:
5968 	      return 0;
5969 	    }
5970 	}
5971     }
5972   else if (GET_CODE (addr) == MULT)
5973     {
5974       index = XEXP (addr, 0);		/* index*scale */
5975       scale_rtx = XEXP (addr, 1);
5976     }
5977   else if (GET_CODE (addr) == ASHIFT)
5978     {
5979       rtx tmp;
5980 
5981       /* We're called for lea too, which implements ashift on occasion.  */
5982       index = XEXP (addr, 0);
5983       tmp = XEXP (addr, 1);
5984       if (GET_CODE (tmp) != CONST_INT)
5985 	return 0;
5986       scale = INTVAL (tmp);
5987       if ((unsigned HOST_WIDE_INT) scale > 3)
5988 	return 0;
5989       scale = 1 << scale;
5990       retval = -1;
5991     }
5992   else
5993     disp = addr;			/* displacement */
5994 
5995   /* Extract the integral value of scale.  */
5996   if (scale_rtx)
5997     {
5998       if (GET_CODE (scale_rtx) != CONST_INT)
5999 	return 0;
6000       scale = INTVAL (scale_rtx);
6001     }
6002 
6003   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6004   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6005 
6006   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
6007   if (base_reg && index_reg && scale == 1
6008       && (index_reg == arg_pointer_rtx
6009 	  || index_reg == frame_pointer_rtx
6010 	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6011     {
6012       rtx tmp;
6013       tmp = base, base = index, index = tmp;
6014       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6015     }
6016 
6017   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
6018   if ((base_reg == hard_frame_pointer_rtx
6019        || base_reg == frame_pointer_rtx
6020        || base_reg == arg_pointer_rtx) && !disp)
6021     disp = const0_rtx;
6022 
6023   /* Special case: on K6, [%esi] makes the instruction vector decoded.
6024      Avoid this by transforming to [%esi+0].  */
6025   if (ix86_tune == PROCESSOR_K6 && !optimize_size
6026       && base_reg && !index_reg && !disp
6027       && REG_P (base_reg)
6028       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6029     disp = const0_rtx;
6030 
6031   /* Special case: encode reg+reg instead of reg*2.  */
6032   if (!base && index && scale && scale == 2)
6033     base = index, base_reg = index_reg, scale = 1;
6034 
6035   /* Special case: scaling cannot be encoded without base or displacement.  */
6036   if (!base && !disp && index && scale != 1)
6037     disp = const0_rtx;
6038 
6039   out->base = base;
6040   out->index = index;
6041   out->disp = disp;
6042   out->scale = scale;
6043   out->seg = seg;
6044 
6045   return retval;
6046 }
6047 
6048 /* Return cost of the memory address x.
6049    For i386, it is better to use a complex address than let gcc copy
6050    the address into a reg and make a new pseudo.  But not if the address
6051    requires to two regs - that would mean more pseudos with longer
6052    lifetimes.  */
6053 static int
6054 ix86_address_cost (rtx x)
6055 {
6056   struct ix86_address parts;
6057   int cost = 1;
6058   int ok = ix86_decompose_address (x, &parts);
6059 
6060   gcc_assert (ok);
6061 
6062   if (parts.base && GET_CODE (parts.base) == SUBREG)
6063     parts.base = SUBREG_REG (parts.base);
6064   if (parts.index && GET_CODE (parts.index) == SUBREG)
6065     parts.index = SUBREG_REG (parts.index);
6066 
6067   /* More complex memory references are better.  */
6068   if (parts.disp && parts.disp != const0_rtx)
6069     cost--;
6070   if (parts.seg != SEG_DEFAULT)
6071     cost--;
6072 
6073   /* Attempt to minimize number of registers in the address.  */
6074   if ((parts.base
6075        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6076       || (parts.index
6077 	  && (!REG_P (parts.index)
6078 	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6079     cost++;
6080 
6081   if (parts.base
6082       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6083       && parts.index
6084       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6085       && parts.base != parts.index)
6086     cost++;
6087 
6088   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6089      since it's predecode logic can't detect the length of instructions
6090      and it degenerates to vector decoded.  Increase cost of such
6091      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
6092      to split such addresses or even refuse such addresses at all.
6093 
6094      Following addressing modes are affected:
6095       [base+scale*index]
6096       [scale*index+disp]
6097       [base+index]
6098 
6099      The first and last case  may be avoidable by explicitly coding the zero in
6100      memory address, but I don't have AMD-K6 machine handy to check this
6101      theory.  */
6102 
6103   if (TARGET_K6
6104       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6105 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6106 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6107     cost += 10;
6108 
6109   return cost;
6110 }
6111 
6112 /* If X is a machine specific address (i.e. a symbol or label being
6113    referenced as a displacement from the GOT implemented using an
6114    UNSPEC), then return the base term.  Otherwise return X.  */
6115 
6116 rtx
6117 ix86_find_base_term (rtx x)
6118 {
6119   rtx term;
6120 
6121   if (TARGET_64BIT)
6122     {
6123       if (GET_CODE (x) != CONST)
6124 	return x;
6125       term = XEXP (x, 0);
6126       if (GET_CODE (term) == PLUS
6127 	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
6128 	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6129 	term = XEXP (term, 0);
6130       if (GET_CODE (term) != UNSPEC
6131 	  || XINT (term, 1) != UNSPEC_GOTPCREL)
6132 	return x;
6133 
6134       term = XVECEXP (term, 0, 0);
6135 
6136       if (GET_CODE (term) != SYMBOL_REF
6137 	  && GET_CODE (term) != LABEL_REF)
6138 	return x;
6139 
6140       return term;
6141     }
6142 
6143   term = ix86_delegitimize_address (x);
6144 
6145   if (GET_CODE (term) != SYMBOL_REF
6146       && GET_CODE (term) != LABEL_REF)
6147     return x;
6148 
6149   return term;
6150 }
6151 
6152 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6153    this is used for to form addresses to local data when -fPIC is in
6154    use.  */
6155 
6156 static bool
6157 darwin_local_data_pic (rtx disp)
6158 {
6159   if (GET_CODE (disp) == MINUS)
6160     {
6161       if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6162           || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6163         if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6164           {
6165             const char *sym_name = XSTR (XEXP (disp, 1), 0);
6166             if (! strcmp (sym_name, "<pic base>"))
6167               return true;
6168           }
6169     }
6170 
6171   return false;
6172 }
6173 
6174 /* Determine if a given RTX is a valid constant.  We already know this
6175    satisfies CONSTANT_P.  */
6176 
6177 bool
6178 legitimate_constant_p (rtx x)
6179 {
6180   switch (GET_CODE (x))
6181     {
6182     case CONST:
6183       x = XEXP (x, 0);
6184 
6185       if (GET_CODE (x) == PLUS)
6186 	{
6187 	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6188 	    return false;
6189 	  x = XEXP (x, 0);
6190 	}
6191 
6192       if (TARGET_MACHO && darwin_local_data_pic (x))
6193 	return true;
6194 
6195       /* Only some unspecs are valid as "constants".  */
6196       if (GET_CODE (x) == UNSPEC)
6197 	switch (XINT (x, 1))
6198 	  {
6199 	  case UNSPEC_GOTOFF:
6200 	    return TARGET_64BIT;
6201 	  case UNSPEC_TPOFF:
6202 	  case UNSPEC_NTPOFF:
6203 	    x = XVECEXP (x, 0, 0);
6204 	    return (GET_CODE (x) == SYMBOL_REF
6205 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6206 	  case UNSPEC_DTPOFF:
6207 	    x = XVECEXP (x, 0, 0);
6208 	    return (GET_CODE (x) == SYMBOL_REF
6209 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6210 	  default:
6211 	    return false;
6212 	  }
6213 
6214       /* We must have drilled down to a symbol.  */
6215       if (GET_CODE (x) == LABEL_REF)
6216 	return true;
6217       if (GET_CODE (x) != SYMBOL_REF)
6218 	return false;
6219       /* FALLTHRU */
6220 
6221     case SYMBOL_REF:
6222       /* TLS symbols are never valid.  */
6223       if (SYMBOL_REF_TLS_MODEL (x))
6224 	return false;
6225       break;
6226 
6227     case CONST_DOUBLE:
6228       if (GET_MODE (x) == TImode
6229 	  && x != CONST0_RTX (TImode)
6230           && !TARGET_64BIT)
6231 	return false;
6232       break;
6233 
6234     case CONST_VECTOR:
6235       if (x == CONST0_RTX (GET_MODE (x)))
6236 	return true;
6237       return false;
6238 
6239     default:
6240       break;
6241     }
6242 
6243   /* Otherwise we handle everything else in the move patterns.  */
6244   return true;
6245 }
6246 
6247 /* Determine if it's legal to put X into the constant pool.  This
6248    is not possible for the address of thread-local symbols, which
6249    is checked above.  */
6250 
6251 static bool
6252 ix86_cannot_force_const_mem (rtx x)
6253 {
6254   /* We can always put integral constants and vectors in memory.  */
6255   switch (GET_CODE (x))
6256     {
6257     case CONST_INT:
6258     case CONST_DOUBLE:
6259     case CONST_VECTOR:
6260       return false;
6261 
6262     default:
6263       break;
6264     }
6265   return !legitimate_constant_p (x);
6266 }
6267 
6268 /* Determine if a given RTX is a valid constant address.  */
6269 
6270 bool
6271 constant_address_p (rtx x)
6272 {
6273   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6274 }
6275 
6276 /* Nonzero if the constant value X is a legitimate general operand
6277    when generating PIC code.  It is given that flag_pic is on and
6278    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6279 
6280 bool
6281 legitimate_pic_operand_p (rtx x)
6282 {
6283   rtx inner;
6284 
6285   switch (GET_CODE (x))
6286     {
6287     case CONST:
6288       inner = XEXP (x, 0);
6289       if (GET_CODE (inner) == PLUS
6290 	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6291 	inner = XEXP (inner, 0);
6292 
6293       /* Only some unspecs are valid as "constants".  */
6294       if (GET_CODE (inner) == UNSPEC)
6295 	switch (XINT (inner, 1))
6296 	  {
6297 	  case UNSPEC_GOTOFF:
6298 	    return TARGET_64BIT;
6299 	  case UNSPEC_TPOFF:
6300 	    x = XVECEXP (inner, 0, 0);
6301 	    return (GET_CODE (x) == SYMBOL_REF
6302 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6303 	  default:
6304 	    return false;
6305 	  }
6306       /* FALLTHRU */
6307 
6308     case SYMBOL_REF:
6309     case LABEL_REF:
6310       return legitimate_pic_address_disp_p (x);
6311 
6312     default:
6313       return true;
6314     }
6315 }
6316 
6317 /* Determine if a given CONST RTX is a valid memory displacement
6318    in PIC mode.  */
6319 
6320 int
6321 legitimate_pic_address_disp_p (rtx disp)
6322 {
6323   bool saw_plus;
6324 
6325   /* In 64bit mode we can allow direct addresses of symbols and labels
6326      when they are not dynamic symbols.  */
6327   if (TARGET_64BIT)
6328     {
6329       rtx op0 = disp, op1;
6330 
6331       switch (GET_CODE (disp))
6332 	{
6333 	case LABEL_REF:
6334 	  return true;
6335 
6336 	case CONST:
6337 	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
6338 	    break;
6339 	  op0 = XEXP (XEXP (disp, 0), 0);
6340 	  op1 = XEXP (XEXP (disp, 0), 1);
6341 	  if (GET_CODE (op1) != CONST_INT
6342 	      || INTVAL (op1) >= 16*1024*1024
6343 	      || INTVAL (op1) < -16*1024*1024)
6344             break;
6345 	  if (GET_CODE (op0) == LABEL_REF)
6346 	    return true;
6347 	  if (GET_CODE (op0) != SYMBOL_REF)
6348 	    break;
6349 	  /* FALLTHRU */
6350 
6351 	case SYMBOL_REF:
6352 	  /* TLS references should always be enclosed in UNSPEC.  */
6353 	  if (SYMBOL_REF_TLS_MODEL (op0))
6354 	    return false;
6355 	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6356 	    return true;
6357 	  break;
6358 
6359 	default:
6360 	  break;
6361 	}
6362     }
6363   if (GET_CODE (disp) != CONST)
6364     return 0;
6365   disp = XEXP (disp, 0);
6366 
6367   if (TARGET_64BIT)
6368     {
6369       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6370          of GOT tables.  We should not need these anyway.  */
6371       if (GET_CODE (disp) != UNSPEC
6372 	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
6373 	      && XINT (disp, 1) != UNSPEC_GOTOFF))
6374 	return 0;
6375 
6376       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6377 	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6378 	return 0;
6379       return 1;
6380     }
6381 
6382   saw_plus = false;
6383   if (GET_CODE (disp) == PLUS)
6384     {
6385       if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6386 	return 0;
6387       disp = XEXP (disp, 0);
6388       saw_plus = true;
6389     }
6390 
6391   if (TARGET_MACHO && darwin_local_data_pic (disp))
6392     return 1;
6393 
6394   if (GET_CODE (disp) != UNSPEC)
6395     return 0;
6396 
6397   switch (XINT (disp, 1))
6398     {
6399     case UNSPEC_GOT:
6400       if (saw_plus)
6401 	return false;
6402       return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6403     case UNSPEC_GOTOFF:
6404       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6405 	 While ABI specify also 32bit relocation but we don't produce it in
6406 	 small PIC model at all.  */
6407       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6408 	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6409 	  && !TARGET_64BIT)
6410         return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6411       return false;
6412     case UNSPEC_GOTTPOFF:
6413     case UNSPEC_GOTNTPOFF:
6414     case UNSPEC_INDNTPOFF:
6415       if (saw_plus)
6416 	return false;
6417       disp = XVECEXP (disp, 0, 0);
6418       return (GET_CODE (disp) == SYMBOL_REF
6419 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6420     case UNSPEC_NTPOFF:
6421       disp = XVECEXP (disp, 0, 0);
6422       return (GET_CODE (disp) == SYMBOL_REF
6423 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6424     case UNSPEC_DTPOFF:
6425       disp = XVECEXP (disp, 0, 0);
6426       return (GET_CODE (disp) == SYMBOL_REF
6427 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6428     }
6429 
6430   return 0;
6431 }
6432 
6433 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6434    memory address for an instruction.  The MODE argument is the machine mode
6435    for the MEM expression that wants to use this address.
6436 
6437    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6438    convert common non-canonical forms to canonical form so that they will
6439    be recognized.  */
6440 
6441 int
6442 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6443 {
6444   struct ix86_address parts;
6445   rtx base, index, disp;
6446   HOST_WIDE_INT scale;
6447   const char *reason = NULL;
6448   rtx reason_rtx = NULL_RTX;
6449 
6450   if (TARGET_DEBUG_ADDR)
6451     {
6452       fprintf (stderr,
6453 	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6454 	       GET_MODE_NAME (mode), strict);
6455       debug_rtx (addr);
6456     }
6457 
6458   if (ix86_decompose_address (addr, &parts) <= 0)
6459     {
6460       reason = "decomposition failed";
6461       goto report_error;
6462     }
6463 
6464   base = parts.base;
6465   index = parts.index;
6466   disp = parts.disp;
6467   scale = parts.scale;
6468 
6469   /* Validate base register.
6470 
6471      Don't allow SUBREG's that span more than a word here.  It can lead to spill
6472      failures when the base is one word out of a two word structure, which is
6473      represented internally as a DImode int.  */
6474 
6475   if (base)
6476     {
6477       rtx reg;
6478       reason_rtx = base;
6479 
6480       if (REG_P (base))
6481   	reg = base;
6482       else if (GET_CODE (base) == SUBREG
6483 	       && REG_P (SUBREG_REG (base))
6484 	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6485 		  <= UNITS_PER_WORD)
6486   	reg = SUBREG_REG (base);
6487       else
6488 	{
6489 	  reason = "base is not a register";
6490 	  goto report_error;
6491 	}
6492 
6493       if (GET_MODE (base) != Pmode)
6494 	{
6495 	  reason = "base is not in Pmode";
6496 	  goto report_error;
6497 	}
6498 
6499       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6500 	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6501 	{
6502 	  reason = "base is not valid";
6503 	  goto report_error;
6504 	}
6505     }
6506 
6507   /* Validate index register.
6508 
6509      Don't allow SUBREG's that span more than a word here -- same as above.  */
6510 
6511   if (index)
6512     {
6513       rtx reg;
6514       reason_rtx = index;
6515 
6516       if (REG_P (index))
6517   	reg = index;
6518       else if (GET_CODE (index) == SUBREG
6519 	       && REG_P (SUBREG_REG (index))
6520 	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6521 		  <= UNITS_PER_WORD)
6522   	reg = SUBREG_REG (index);
6523       else
6524 	{
6525 	  reason = "index is not a register";
6526 	  goto report_error;
6527 	}
6528 
6529       if (GET_MODE (index) != Pmode)
6530 	{
6531 	  reason = "index is not in Pmode";
6532 	  goto report_error;
6533 	}
6534 
6535       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6536 	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6537 	{
6538 	  reason = "index is not valid";
6539 	  goto report_error;
6540 	}
6541     }
6542 
6543   /* Validate scale factor.  */
6544   if (scale != 1)
6545     {
6546       reason_rtx = GEN_INT (scale);
6547       if (!index)
6548 	{
6549 	  reason = "scale without index";
6550 	  goto report_error;
6551 	}
6552 
6553       if (scale != 2 && scale != 4 && scale != 8)
6554 	{
6555 	  reason = "scale is not a valid multiplier";
6556 	  goto report_error;
6557 	}
6558     }
6559 
6560   /* Validate displacement.  */
6561   if (disp)
6562     {
6563       reason_rtx = disp;
6564 
6565       if (GET_CODE (disp) == CONST
6566 	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6567 	switch (XINT (XEXP (disp, 0), 1))
6568 	  {
6569 	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6570 	     used.  While ABI specify also 32bit relocations, we don't produce
6571 	     them at all and use IP relative instead.  */
6572 	  case UNSPEC_GOT:
6573 	  case UNSPEC_GOTOFF:
6574 	    gcc_assert (flag_pic);
6575 	    if (!TARGET_64BIT)
6576 	      goto is_legitimate_pic;
6577 	    reason = "64bit address unspec";
6578 	    goto report_error;
6579 
6580 	  case UNSPEC_GOTPCREL:
6581 	    gcc_assert (flag_pic);
6582 	    goto is_legitimate_pic;
6583 
6584 	  case UNSPEC_GOTTPOFF:
6585 	  case UNSPEC_GOTNTPOFF:
6586 	  case UNSPEC_INDNTPOFF:
6587 	  case UNSPEC_NTPOFF:
6588 	  case UNSPEC_DTPOFF:
6589 	    break;
6590 
6591 	  default:
6592 	    reason = "invalid address unspec";
6593 	    goto report_error;
6594 	  }
6595 
6596       else if (SYMBOLIC_CONST (disp)
6597 	       && (flag_pic
6598 		   || (TARGET_MACHO
6599 #if TARGET_MACHO
6600 		       && MACHOPIC_INDIRECT
6601 		       && !machopic_operand_p (disp)
6602 #endif
6603 	       )))
6604 	{
6605 
6606 	is_legitimate_pic:
6607 	  if (TARGET_64BIT && (index || base))
6608 	    {
6609 	      /* foo@dtpoff(%rX) is ok.  */
6610 	      if (GET_CODE (disp) != CONST
6611 		  || GET_CODE (XEXP (disp, 0)) != PLUS
6612 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6613 		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6614 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6615 		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6616 		{
6617 		  reason = "non-constant pic memory reference";
6618 		  goto report_error;
6619 		}
6620 	    }
6621 	  else if (! legitimate_pic_address_disp_p (disp))
6622 	    {
6623 	      reason = "displacement is an invalid pic construct";
6624 	      goto report_error;
6625 	    }
6626 
6627           /* This code used to verify that a symbolic pic displacement
6628 	     includes the pic_offset_table_rtx register.
6629 
6630 	     While this is good idea, unfortunately these constructs may
6631 	     be created by "adds using lea" optimization for incorrect
6632 	     code like:
6633 
6634 	     int a;
6635 	     int foo(int i)
6636 	       {
6637 	         return *(&a+i);
6638 	       }
6639 
6640 	     This code is nonsensical, but results in addressing
6641 	     GOT table with pic_offset_table_rtx base.  We can't
6642 	     just refuse it easily, since it gets matched by
6643 	     "addsi3" pattern, that later gets split to lea in the
6644 	     case output register differs from input.  While this
6645 	     can be handled by separate addsi pattern for this case
6646 	     that never results in lea, this seems to be easier and
6647 	     correct fix for crash to disable this test.  */
6648 	}
6649       else if (GET_CODE (disp) != LABEL_REF
6650 	       && GET_CODE (disp) != CONST_INT
6651 	       && (GET_CODE (disp) != CONST
6652 		   || !legitimate_constant_p (disp))
6653 	       && (GET_CODE (disp) != SYMBOL_REF
6654 		   || !legitimate_constant_p (disp)))
6655 	{
6656 	  reason = "displacement is not constant";
6657 	  goto report_error;
6658 	}
6659       else if (TARGET_64BIT
6660 	       && !x86_64_immediate_operand (disp, VOIDmode))
6661 	{
6662 	  reason = "displacement is out of range";
6663 	  goto report_error;
6664 	}
6665     }
6666 
6667   /* Everything looks valid.  */
6668   if (TARGET_DEBUG_ADDR)
6669     fprintf (stderr, "Success.\n");
6670   return TRUE;
6671 
6672  report_error:
6673   if (TARGET_DEBUG_ADDR)
6674     {
6675       fprintf (stderr, "Error: %s\n", reason);
6676       debug_rtx (reason_rtx);
6677     }
6678   return FALSE;
6679 }
6680 
6681 /* Return a unique alias set for the GOT.  */
6682 
6683 static HOST_WIDE_INT
6684 ix86_GOT_alias_set (void)
6685 {
6686   static HOST_WIDE_INT set = -1;
6687   if (set == -1)
6688     set = new_alias_set ();
6689   return set;
6690 }
6691 
6692 /* Return a legitimate reference for ORIG (an address) using the
6693    register REG.  If REG is 0, a new pseudo is generated.
6694 
6695    There are two types of references that must be handled:
6696 
6697    1. Global data references must load the address from the GOT, via
6698       the PIC reg.  An insn is emitted to do this load, and the reg is
6699       returned.
6700 
6701    2. Static data references, constant pool addresses, and code labels
6702       compute the address as an offset from the GOT, whose base is in
6703       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6704       differentiate them from global data objects.  The returned
6705       address is the PIC reg + an unspec constant.
6706 
6707    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6708    reg also appears in the address.  */
6709 
6710 static rtx
6711 legitimize_pic_address (rtx orig, rtx reg)
6712 {
6713   rtx addr = orig;
6714   rtx new = orig;
6715   rtx base;
6716 
6717 #if TARGET_MACHO
6718   if (TARGET_MACHO && !TARGET_64BIT)
6719     {
6720       if (reg == 0)
6721 	reg = gen_reg_rtx (Pmode);
6722       /* Use the generic Mach-O PIC machinery.  */
6723       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6724     }
6725 #endif
6726 
6727   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6728     new = addr;
6729   else if (TARGET_64BIT
6730 	   && ix86_cmodel != CM_SMALL_PIC
6731 	   && local_symbolic_operand (addr, Pmode))
6732     {
6733       rtx tmpreg;
6734       /* This symbol may be referenced via a displacement from the PIC
6735 	 base address (@GOTOFF).  */
6736 
6737       if (reload_in_progress)
6738 	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6739       if (GET_CODE (addr) == CONST)
6740 	addr = XEXP (addr, 0);
6741       if (GET_CODE (addr) == PLUS)
6742 	  {
6743             new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6744 	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6745 	  }
6746 	else
6747           new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6748       new = gen_rtx_CONST (Pmode, new);
6749       if (!reg)
6750         tmpreg = gen_reg_rtx (Pmode);
6751       else
6752 	tmpreg = reg;
6753       emit_move_insn (tmpreg, new);
6754 
6755       if (reg != 0)
6756 	{
6757 	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6758 				     tmpreg, 1, OPTAB_DIRECT);
6759 	  new = reg;
6760 	}
6761       else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6762     }
6763   else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6764     {
6765       /* This symbol may be referenced via a displacement from the PIC
6766 	 base address (@GOTOFF).  */
6767 
6768       if (reload_in_progress)
6769 	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6770       if (GET_CODE (addr) == CONST)
6771 	addr = XEXP (addr, 0);
6772       if (GET_CODE (addr) == PLUS)
6773 	  {
6774             new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6775 	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6776 	  }
6777 	else
6778           new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6779       new = gen_rtx_CONST (Pmode, new);
6780       new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6781 
6782       if (reg != 0)
6783 	{
6784 	  emit_move_insn (reg, new);
6785 	  new = reg;
6786 	}
6787     }
6788   else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6789     {
6790       if (TARGET_64BIT)
6791 	{
6792 	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6793 	  new = gen_rtx_CONST (Pmode, new);
6794 	  new = gen_const_mem (Pmode, new);
6795 	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6796 
6797 	  if (reg == 0)
6798 	    reg = gen_reg_rtx (Pmode);
6799 	  /* Use directly gen_movsi, otherwise the address is loaded
6800 	     into register for CSE.  We don't want to CSE this addresses,
6801 	     instead we CSE addresses from the GOT table, so skip this.  */
6802 	  emit_insn (gen_movsi (reg, new));
6803 	  new = reg;
6804 	}
6805       else
6806 	{
6807 	  /* This symbol must be referenced via a load from the
6808 	     Global Offset Table (@GOT).  */
6809 
6810 	  if (reload_in_progress)
6811 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6812 	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6813 	  new = gen_rtx_CONST (Pmode, new);
6814 	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6815 	  new = gen_const_mem (Pmode, new);
6816 	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6817 
6818 	  if (reg == 0)
6819 	    reg = gen_reg_rtx (Pmode);
6820 	  emit_move_insn (reg, new);
6821 	  new = reg;
6822 	}
6823     }
6824   else
6825     {
6826       if (GET_CODE (addr) == CONST_INT
6827 	  && !x86_64_immediate_operand (addr, VOIDmode))
6828 	{
6829 	  if (reg)
6830 	    {
6831 	      emit_move_insn (reg, addr);
6832 	      new = reg;
6833 	    }
6834 	  else
6835 	    new = force_reg (Pmode, addr);
6836 	}
6837       else if (GET_CODE (addr) == CONST)
6838 	{
6839 	  addr = XEXP (addr, 0);
6840 
6841 	  /* We must match stuff we generate before.  Assume the only
6842 	     unspecs that can get here are ours.  Not that we could do
6843 	     anything with them anyway....  */
6844 	  if (GET_CODE (addr) == UNSPEC
6845 	      || (GET_CODE (addr) == PLUS
6846 		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6847 	    return orig;
6848 	  gcc_assert (GET_CODE (addr) == PLUS);
6849 	}
6850       if (GET_CODE (addr) == PLUS)
6851 	{
6852 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6853 
6854 	  /* Check first to see if this is a constant offset from a @GOTOFF
6855 	     symbol reference.  */
6856 	  if (local_symbolic_operand (op0, Pmode)
6857 	      && GET_CODE (op1) == CONST_INT)
6858 	    {
6859 	      if (!TARGET_64BIT)
6860 		{
6861 		  if (reload_in_progress)
6862 		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6863 		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6864 					UNSPEC_GOTOFF);
6865 		  new = gen_rtx_PLUS (Pmode, new, op1);
6866 		  new = gen_rtx_CONST (Pmode, new);
6867 		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6868 
6869 		  if (reg != 0)
6870 		    {
6871 		      emit_move_insn (reg, new);
6872 		      new = reg;
6873 		    }
6874 		}
6875 	      else
6876 		{
6877 		  if (INTVAL (op1) < -16*1024*1024
6878 		      || INTVAL (op1) >= 16*1024*1024)
6879 		    {
6880 		      if (!x86_64_immediate_operand (op1, Pmode))
6881 			op1 = force_reg (Pmode, op1);
6882 		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6883 		    }
6884 		}
6885 	    }
6886 	  else
6887 	    {
6888 	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6889 	      new  = legitimize_pic_address (XEXP (addr, 1),
6890 					     base == reg ? NULL_RTX : reg);
6891 
6892 	      if (GET_CODE (new) == CONST_INT)
6893 		new = plus_constant (base, INTVAL (new));
6894 	      else
6895 		{
6896 		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6897 		    {
6898 		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6899 		      new = XEXP (new, 1);
6900 		    }
6901 		  new = gen_rtx_PLUS (Pmode, base, new);
6902 		}
6903 	    }
6904 	}
6905     }
6906   return new;
6907 }
6908 
6909 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6910 
6911 static rtx
6912 get_thread_pointer (int to_reg)
6913 {
6914   rtx tp, reg, insn;
6915 
6916   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6917   if (!to_reg)
6918     return tp;
6919 
6920   reg = gen_reg_rtx (Pmode);
6921   insn = gen_rtx_SET (VOIDmode, reg, tp);
6922   insn = emit_insn (insn);
6923 
6924   return reg;
6925 }
6926 
6927 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6928    false if we expect this to be used for a memory address and true if
6929    we expect to load the address into a register.  */
6930 
6931 static rtx
6932 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6933 {
6934   rtx dest, base, off, pic, tp;
6935   int type;
6936 
6937   switch (model)
6938     {
6939     case TLS_MODEL_GLOBAL_DYNAMIC:
6940       dest = gen_reg_rtx (Pmode);
6941       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6942 
6943       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6944 	{
6945 	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6946 
6947 	  start_sequence ();
6948 	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6949 	  insns = get_insns ();
6950 	  end_sequence ();
6951 
6952 	  emit_libcall_block (insns, dest, rax, x);
6953 	}
6954       else if (TARGET_64BIT && TARGET_GNU2_TLS)
6955 	emit_insn (gen_tls_global_dynamic_64 (dest, x));
6956       else
6957 	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6958 
6959       if (TARGET_GNU2_TLS)
6960 	{
6961 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6962 
6963 	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6964 	}
6965       break;
6966 
6967     case TLS_MODEL_LOCAL_DYNAMIC:
6968       base = gen_reg_rtx (Pmode);
6969       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6970 
6971       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6972 	{
6973 	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6974 
6975 	  start_sequence ();
6976 	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6977 	  insns = get_insns ();
6978 	  end_sequence ();
6979 
6980 	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6981 	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6982 	  emit_libcall_block (insns, base, rax, note);
6983 	}
6984       else if (TARGET_64BIT && TARGET_GNU2_TLS)
6985 	emit_insn (gen_tls_local_dynamic_base_64 (base));
6986       else
6987 	emit_insn (gen_tls_local_dynamic_base_32 (base));
6988 
6989       if (TARGET_GNU2_TLS)
6990 	{
6991 	  rtx x = ix86_tls_module_base ();
6992 
6993 	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
6994 			       gen_rtx_MINUS (Pmode, x, tp));
6995 	}
6996 
6997       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6998       off = gen_rtx_CONST (Pmode, off);
6999 
7000       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7001 
7002       if (TARGET_GNU2_TLS)
7003 	{
7004 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7005 
7006 	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7007 	}
7008 
7009       break;
7010 
7011     case TLS_MODEL_INITIAL_EXEC:
7012       if (TARGET_64BIT)
7013 	{
7014 	  pic = NULL;
7015 	  type = UNSPEC_GOTNTPOFF;
7016 	}
7017       else if (flag_pic)
7018 	{
7019 	  if (reload_in_progress)
7020 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7021 	  pic = pic_offset_table_rtx;
7022 	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7023 	}
7024       else if (!TARGET_ANY_GNU_TLS)
7025 	{
7026 	  pic = gen_reg_rtx (Pmode);
7027 	  emit_insn (gen_set_got (pic));
7028 	  type = UNSPEC_GOTTPOFF;
7029 	}
7030       else
7031 	{
7032 	  pic = NULL;
7033 	  type = UNSPEC_INDNTPOFF;
7034 	}
7035 
7036       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7037       off = gen_rtx_CONST (Pmode, off);
7038       if (pic)
7039 	off = gen_rtx_PLUS (Pmode, pic, off);
7040       off = gen_const_mem (Pmode, off);
7041       set_mem_alias_set (off, ix86_GOT_alias_set ());
7042 
7043       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7044 	{
7045           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7046 	  off = force_reg (Pmode, off);
7047 	  return gen_rtx_PLUS (Pmode, base, off);
7048 	}
7049       else
7050 	{
7051 	  base = get_thread_pointer (true);
7052 	  dest = gen_reg_rtx (Pmode);
7053 	  emit_insn (gen_subsi3 (dest, base, off));
7054 	}
7055       break;
7056 
7057     case TLS_MODEL_LOCAL_EXEC:
7058       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7059 			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7060 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7061       off = gen_rtx_CONST (Pmode, off);
7062 
7063       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7064 	{
7065 	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7066 	  return gen_rtx_PLUS (Pmode, base, off);
7067 	}
7068       else
7069 	{
7070 	  base = get_thread_pointer (true);
7071 	  dest = gen_reg_rtx (Pmode);
7072 	  emit_insn (gen_subsi3 (dest, base, off));
7073 	}
7074       break;
7075 
7076     default:
7077       gcc_unreachable ();
7078     }
7079 
7080   return dest;
7081 }
7082 
7083 /* Try machine-dependent ways of modifying an illegitimate address
7084    to be legitimate.  If we find one, return the new, valid address.
7085    This macro is used in only one place: `memory_address' in explow.c.
7086 
7087    OLDX is the address as it was before break_out_memory_refs was called.
7088    In some cases it is useful to look at this to decide what needs to be done.
7089 
7090    MODE and WIN are passed so that this macro can use
7091    GO_IF_LEGITIMATE_ADDRESS.
7092 
7093    It is always safe for this macro to do nothing.  It exists to recognize
7094    opportunities to optimize the output.
7095 
7096    For the 80386, we handle X+REG by loading X into a register R and
7097    using R+REG.  R will go in a general reg and indexing will be used.
7098    However, if REG is a broken-out memory address or multiplication,
7099    nothing needs to be done because REG can certainly go in a general reg.
7100 
7101    When -fpic is used, special handling is needed for symbolic references.
7102    See comments by legitimize_pic_address in i386.c for details.  */
7103 
7104 rtx
7105 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7106 {
7107   int changed = 0;
7108   unsigned log;
7109 
7110   if (TARGET_DEBUG_ADDR)
7111     {
7112       fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7113 	       GET_MODE_NAME (mode));
7114       debug_rtx (x);
7115     }
7116 
7117   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7118   if (log)
7119     return legitimize_tls_address (x, log, false);
7120   if (GET_CODE (x) == CONST
7121       && GET_CODE (XEXP (x, 0)) == PLUS
7122       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7123       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7124     {
7125       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7126       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7127     }
7128 
7129   if (flag_pic && SYMBOLIC_CONST (x))
7130     return legitimize_pic_address (x, 0);
7131 
7132   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7133   if (GET_CODE (x) == ASHIFT
7134       && GET_CODE (XEXP (x, 1)) == CONST_INT
7135       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7136     {
7137       changed = 1;
7138       log = INTVAL (XEXP (x, 1));
7139       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7140 			GEN_INT (1 << log));
7141     }
7142 
7143   if (GET_CODE (x) == PLUS)
7144     {
7145       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7146 
7147       if (GET_CODE (XEXP (x, 0)) == ASHIFT
7148 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7149 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7150 	{
7151 	  changed = 1;
7152 	  log = INTVAL (XEXP (XEXP (x, 0), 1));
7153 	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
7154 				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7155 				      GEN_INT (1 << log));
7156 	}
7157 
7158       if (GET_CODE (XEXP (x, 1)) == ASHIFT
7159 	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7160 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7161 	{
7162 	  changed = 1;
7163 	  log = INTVAL (XEXP (XEXP (x, 1), 1));
7164 	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
7165 				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7166 				      GEN_INT (1 << log));
7167 	}
7168 
7169       /* Put multiply first if it isn't already.  */
7170       if (GET_CODE (XEXP (x, 1)) == MULT)
7171 	{
7172 	  rtx tmp = XEXP (x, 0);
7173 	  XEXP (x, 0) = XEXP (x, 1);
7174 	  XEXP (x, 1) = tmp;
7175 	  changed = 1;
7176 	}
7177 
7178       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7179 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7180 	 created by virtual register instantiation, register elimination, and
7181 	 similar optimizations.  */
7182       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7183 	{
7184 	  changed = 1;
7185 	  x = gen_rtx_PLUS (Pmode,
7186 			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
7187 					  XEXP (XEXP (x, 1), 0)),
7188 			    XEXP (XEXP (x, 1), 1));
7189 	}
7190 
7191       /* Canonicalize
7192 	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7193 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7194       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7195 	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7196 	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7197 	       && CONSTANT_P (XEXP (x, 1)))
7198 	{
7199 	  rtx constant;
7200 	  rtx other = NULL_RTX;
7201 
7202 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7203 	    {
7204 	      constant = XEXP (x, 1);
7205 	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7206 	    }
7207 	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7208 	    {
7209 	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7210 	      other = XEXP (x, 1);
7211 	    }
7212 	  else
7213 	    constant = 0;
7214 
7215 	  if (constant)
7216 	    {
7217 	      changed = 1;
7218 	      x = gen_rtx_PLUS (Pmode,
7219 				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7220 					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
7221 				plus_constant (other, INTVAL (constant)));
7222 	    }
7223 	}
7224 
7225       if (changed && legitimate_address_p (mode, x, FALSE))
7226 	return x;
7227 
7228       if (GET_CODE (XEXP (x, 0)) == MULT)
7229 	{
7230 	  changed = 1;
7231 	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7232 	}
7233 
7234       if (GET_CODE (XEXP (x, 1)) == MULT)
7235 	{
7236 	  changed = 1;
7237 	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7238 	}
7239 
7240       if (changed
7241 	  && GET_CODE (XEXP (x, 1)) == REG
7242 	  && GET_CODE (XEXP (x, 0)) == REG)
7243 	return x;
7244 
7245       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7246 	{
7247 	  changed = 1;
7248 	  x = legitimize_pic_address (x, 0);
7249 	}
7250 
7251       if (changed && legitimate_address_p (mode, x, FALSE))
7252 	return x;
7253 
7254       if (GET_CODE (XEXP (x, 0)) == REG)
7255 	{
7256 	  rtx temp = gen_reg_rtx (Pmode);
7257 	  rtx val  = force_operand (XEXP (x, 1), temp);
7258 	  if (val != temp)
7259 	    emit_move_insn (temp, val);
7260 
7261 	  XEXP (x, 1) = temp;
7262 	  return x;
7263 	}
7264 
7265       else if (GET_CODE (XEXP (x, 1)) == REG)
7266 	{
7267 	  rtx temp = gen_reg_rtx (Pmode);
7268 	  rtx val  = force_operand (XEXP (x, 0), temp);
7269 	  if (val != temp)
7270 	    emit_move_insn (temp, val);
7271 
7272 	  XEXP (x, 0) = temp;
7273 	  return x;
7274 	}
7275     }
7276 
7277   return x;
7278 }
7279 
7280 /* Print an integer constant expression in assembler syntax.  Addition
7281    and subtraction are the only arithmetic that may appear in these
7282    expressions.  FILE is the stdio stream to write to, X is the rtx, and
7283    CODE is the operand print code from the output string.  */
7284 
7285 static void
7286 output_pic_addr_const (FILE *file, rtx x, int code)
7287 {
7288   char buf[256];
7289 
7290   switch (GET_CODE (x))
7291     {
7292     case PC:
7293       gcc_assert (flag_pic);
7294       putc ('.', file);
7295       break;
7296 
7297     case SYMBOL_REF:
7298       if (! TARGET_MACHO || TARGET_64BIT)
7299 	output_addr_const (file, x);
7300       else
7301 	{
7302 	  const char *name = XSTR (x, 0);
7303 
7304 	  /* Mark the decl as referenced so that cgraph will output the function.  */
7305 	  if (SYMBOL_REF_DECL (x))
7306 	    mark_decl_referenced (SYMBOL_REF_DECL (x));
7307 
7308 #if TARGET_MACHO
7309 	  if (MACHOPIC_INDIRECT
7310 	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7311 	    name = machopic_indirection_name (x, /*stub_p=*/true);
7312 #endif
7313 	  assemble_name (file, name);
7314 	}
7315       if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7316 	fputs ("@PLT", file);
7317       break;
7318 
7319     case LABEL_REF:
7320       x = XEXP (x, 0);
7321       /* FALLTHRU */
7322     case CODE_LABEL:
7323       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7324       assemble_name (asm_out_file, buf);
7325       break;
7326 
7327     case CONST_INT:
7328       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7329       break;
7330 
7331     case CONST:
7332       /* This used to output parentheses around the expression,
7333 	 but that does not work on the 386 (either ATT or BSD assembler).  */
7334       output_pic_addr_const (file, XEXP (x, 0), code);
7335       break;
7336 
7337     case CONST_DOUBLE:
7338       if (GET_MODE (x) == VOIDmode)
7339 	{
7340 	  /* We can use %d if the number is <32 bits and positive.  */
7341 	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7342 	    fprintf (file, "0x%lx%08lx",
7343 		     (unsigned long) CONST_DOUBLE_HIGH (x),
7344 		     (unsigned long) CONST_DOUBLE_LOW (x));
7345 	  else
7346 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7347 	}
7348       else
7349 	/* We can't handle floating point constants;
7350 	   PRINT_OPERAND must handle them.  */
7351 	output_operand_lossage ("floating constant misused");
7352       break;
7353 
7354     case PLUS:
7355       /* Some assemblers need integer constants to appear first.  */
7356       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7357 	{
7358 	  output_pic_addr_const (file, XEXP (x, 0), code);
7359 	  putc ('+', file);
7360 	  output_pic_addr_const (file, XEXP (x, 1), code);
7361 	}
7362       else
7363 	{
7364 	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7365 	  output_pic_addr_const (file, XEXP (x, 1), code);
7366 	  putc ('+', file);
7367 	  output_pic_addr_const (file, XEXP (x, 0), code);
7368 	}
7369       break;
7370 
7371     case MINUS:
7372       if (!TARGET_MACHO)
7373 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7374       output_pic_addr_const (file, XEXP (x, 0), code);
7375       putc ('-', file);
7376       output_pic_addr_const (file, XEXP (x, 1), code);
7377       if (!TARGET_MACHO)
7378 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7379       break;
7380 
7381      case UNSPEC:
7382        gcc_assert (XVECLEN (x, 0) == 1);
7383        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7384        switch (XINT (x, 1))
7385 	{
7386 	case UNSPEC_GOT:
7387 	  fputs ("@GOT", file);
7388 	  break;
7389 	case UNSPEC_GOTOFF:
7390 	  fputs ("@GOTOFF", file);
7391 	  break;
7392 	case UNSPEC_GOTPCREL:
7393 	  fputs ("@GOTPCREL(%rip)", file);
7394 	  break;
7395 	case UNSPEC_GOTTPOFF:
7396 	  /* FIXME: This might be @TPOFF in Sun ld too.  */
7397 	  fputs ("@GOTTPOFF", file);
7398 	  break;
7399 	case UNSPEC_TPOFF:
7400 	  fputs ("@TPOFF", file);
7401 	  break;
7402 	case UNSPEC_NTPOFF:
7403 	  if (TARGET_64BIT)
7404 	    fputs ("@TPOFF", file);
7405 	  else
7406 	    fputs ("@NTPOFF", file);
7407 	  break;
7408 	case UNSPEC_DTPOFF:
7409 	  fputs ("@DTPOFF", file);
7410 	  break;
7411 	case UNSPEC_GOTNTPOFF:
7412 	  if (TARGET_64BIT)
7413 	    fputs ("@GOTTPOFF(%rip)", file);
7414 	  else
7415 	    fputs ("@GOTNTPOFF", file);
7416 	  break;
7417 	case UNSPEC_INDNTPOFF:
7418 	  fputs ("@INDNTPOFF", file);
7419 	  break;
7420 	default:
7421 	  output_operand_lossage ("invalid UNSPEC as operand");
7422 	  break;
7423 	}
7424        break;
7425 
7426     default:
7427       output_operand_lossage ("invalid expression as operand");
7428     }
7429 }
7430 
7431 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7432    We need to emit DTP-relative relocations.  */
7433 
7434 static void
7435 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7436 {
7437   fputs (ASM_LONG, file);
7438   output_addr_const (file, x);
7439   fputs ("@DTPOFF", file);
7440   switch (size)
7441     {
7442     case 4:
7443       break;
7444     case 8:
7445       fputs (", 0", file);
7446       break;
7447     default:
7448       gcc_unreachable ();
7449    }
7450 }
7451 
7452 /* In the name of slightly smaller debug output, and to cater to
7453    general assembler lossage, recognize PIC+GOTOFF and turn it back
7454    into a direct symbol reference.
7455 
7456    On Darwin, this is necessary to avoid a crash, because Darwin
7457    has a different PIC label for each routine but the DWARF debugging
7458    information is not associated with any particular routine, so it's
7459    necessary to remove references to the PIC label from RTL stored by
7460    the DWARF output code.  */
7461 
7462 static rtx
7463 ix86_delegitimize_address (rtx orig_x)
7464 {
7465   rtx x = orig_x;
7466   /* reg_addend is NULL or a multiple of some register.  */
7467   rtx reg_addend = NULL_RTX;
7468   /* const_addend is NULL or a const_int.  */
7469   rtx const_addend = NULL_RTX;
7470   /* This is the result, or NULL.  */
7471   rtx result = NULL_RTX;
7472 
7473   if (GET_CODE (x) == MEM)
7474     x = XEXP (x, 0);
7475 
7476   if (TARGET_64BIT)
7477     {
7478       if (GET_CODE (x) != CONST
7479 	  || GET_CODE (XEXP (x, 0)) != UNSPEC
7480 	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7481 	  || GET_CODE (orig_x) != MEM)
7482 	return orig_x;
7483       return XVECEXP (XEXP (x, 0), 0, 0);
7484     }
7485 
7486   if (GET_CODE (x) != PLUS
7487       || GET_CODE (XEXP (x, 1)) != CONST)
7488     return orig_x;
7489 
7490   if (GET_CODE (XEXP (x, 0)) == REG
7491       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7492     /* %ebx + GOT/GOTOFF */
7493     ;
7494   else if (GET_CODE (XEXP (x, 0)) == PLUS)
7495     {
7496       /* %ebx + %reg * scale + GOT/GOTOFF */
7497       reg_addend = XEXP (x, 0);
7498       if (GET_CODE (XEXP (reg_addend, 0)) == REG
7499 	  && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7500 	reg_addend = XEXP (reg_addend, 1);
7501       else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7502 	       && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7503 	reg_addend = XEXP (reg_addend, 0);
7504       else
7505 	return orig_x;
7506       if (GET_CODE (reg_addend) != REG
7507 	  && GET_CODE (reg_addend) != MULT
7508 	  && GET_CODE (reg_addend) != ASHIFT)
7509 	return orig_x;
7510     }
7511   else
7512     return orig_x;
7513 
7514   x = XEXP (XEXP (x, 1), 0);
7515   if (GET_CODE (x) == PLUS
7516       && GET_CODE (XEXP (x, 1)) == CONST_INT)
7517     {
7518       const_addend = XEXP (x, 1);
7519       x = XEXP (x, 0);
7520     }
7521 
7522   if (GET_CODE (x) == UNSPEC
7523       && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7524 	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7525     result = XVECEXP (x, 0, 0);
7526 
7527   if (TARGET_MACHO && darwin_local_data_pic (x)
7528       && GET_CODE (orig_x) != MEM)
7529     result = XEXP (x, 0);
7530 
7531   if (! result)
7532     return orig_x;
7533 
7534   if (const_addend)
7535     result = gen_rtx_PLUS (Pmode, result, const_addend);
7536   if (reg_addend)
7537     result = gen_rtx_PLUS (Pmode, reg_addend, result);
7538   return result;
7539 }
7540 
7541 static void
7542 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7543 		    int fp, FILE *file)
7544 {
7545   const char *suffix;
7546 
7547   if (mode == CCFPmode || mode == CCFPUmode)
7548     {
7549       enum rtx_code second_code, bypass_code;
7550       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7551       gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7552       code = ix86_fp_compare_code_to_integer (code);
7553       mode = CCmode;
7554     }
7555   if (reverse)
7556     code = reverse_condition (code);
7557 
7558   switch (code)
7559     {
7560     case EQ:
7561       suffix = "e";
7562       break;
7563     case NE:
7564       suffix = "ne";
7565       break;
7566     case GT:
7567       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7568       suffix = "g";
7569       break;
7570     case GTU:
7571       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7572 	 Those same assemblers have the same but opposite lossage on cmov.  */
7573       gcc_assert (mode == CCmode);
7574       suffix = fp ? "nbe" : "a";
7575       break;
7576     case LT:
7577       switch (mode)
7578 	{
7579 	case CCNOmode:
7580 	case CCGOCmode:
7581 	  suffix = "s";
7582 	  break;
7583 
7584 	case CCmode:
7585 	case CCGCmode:
7586 	  suffix = "l";
7587 	  break;
7588 
7589 	default:
7590 	  gcc_unreachable ();
7591 	}
7592       break;
7593     case LTU:
7594       gcc_assert (mode == CCmode);
7595       suffix = "b";
7596       break;
7597     case GE:
7598       switch (mode)
7599 	{
7600 	case CCNOmode:
7601 	case CCGOCmode:
7602 	  suffix = "ns";
7603 	  break;
7604 
7605 	case CCmode:
7606 	case CCGCmode:
7607 	  suffix = "ge";
7608 	  break;
7609 
7610 	default:
7611 	  gcc_unreachable ();
7612 	}
7613       break;
7614     case GEU:
7615       /* ??? As above.  */
7616       gcc_assert (mode == CCmode);
7617       suffix = fp ? "nb" : "ae";
7618       break;
7619     case LE:
7620       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7621       suffix = "le";
7622       break;
7623     case LEU:
7624       gcc_assert (mode == CCmode);
7625       suffix = "be";
7626       break;
7627     case UNORDERED:
7628       suffix = fp ? "u" : "p";
7629       break;
7630     case ORDERED:
7631       suffix = fp ? "nu" : "np";
7632       break;
7633     default:
7634       gcc_unreachable ();
7635     }
7636   fputs (suffix, file);
7637 }
7638 
7639 /* Print the name of register X to FILE based on its machine mode and number.
7640    If CODE is 'w', pretend the mode is HImode.
7641    If CODE is 'b', pretend the mode is QImode.
7642    If CODE is 'k', pretend the mode is SImode.
7643    If CODE is 'q', pretend the mode is DImode.
7644    If CODE is 'h', pretend the reg is the 'high' byte register.
7645    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7646 
7647 void
7648 print_reg (rtx x, int code, FILE *file)
7649 {
7650   gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7651 	      && REGNO (x) != FRAME_POINTER_REGNUM
7652 	      && REGNO (x) != FLAGS_REG
7653 	      && REGNO (x) != FPSR_REG);
7654 
7655   if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7656     putc ('%', file);
7657 
7658   if (code == 'w' || MMX_REG_P (x))
7659     code = 2;
7660   else if (code == 'b')
7661     code = 1;
7662   else if (code == 'k')
7663     code = 4;
7664   else if (code == 'q')
7665     code = 8;
7666   else if (code == 'y')
7667     code = 3;
7668   else if (code == 'h')
7669     code = 0;
7670   else
7671     code = GET_MODE_SIZE (GET_MODE (x));
7672 
7673   /* Irritatingly, AMD extended registers use different naming convention
7674      from the normal registers.  */
7675   if (REX_INT_REG_P (x))
7676     {
7677       gcc_assert (TARGET_64BIT);
7678       switch (code)
7679 	{
7680 	  case 0:
7681 	    error ("extended registers have no high halves");
7682 	    break;
7683 	  case 1:
7684 	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7685 	    break;
7686 	  case 2:
7687 	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7688 	    break;
7689 	  case 4:
7690 	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7691 	    break;
7692 	  case 8:
7693 	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7694 	    break;
7695 	  default:
7696 	    error ("unsupported operand size for extended register");
7697 	    break;
7698 	}
7699       return;
7700     }
7701   switch (code)
7702     {
7703     case 3:
7704       if (STACK_TOP_P (x))
7705 	{
7706 	  fputs ("st(0)", file);
7707 	  break;
7708 	}
7709       /* FALLTHRU */
7710     case 8:
7711     case 4:
7712     case 12:
7713       if (! ANY_FP_REG_P (x))
7714 	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7715       /* FALLTHRU */
7716     case 16:
7717     case 2:
7718     normal:
7719       fputs (hi_reg_name[REGNO (x)], file);
7720       break;
7721     case 1:
7722       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7723 	goto normal;
7724       fputs (qi_reg_name[REGNO (x)], file);
7725       break;
7726     case 0:
7727       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7728 	goto normal;
7729       fputs (qi_high_reg_name[REGNO (x)], file);
7730       break;
7731     default:
7732       gcc_unreachable ();
7733     }
7734 }
7735 
7736 /* Locate some local-dynamic symbol still in use by this function
7737    so that we can print its name in some tls_local_dynamic_base
7738    pattern.  */
7739 
7740 static const char *
7741 get_some_local_dynamic_name (void)
7742 {
7743   rtx insn;
7744 
7745   if (cfun->machine->some_ld_name)
7746     return cfun->machine->some_ld_name;
7747 
7748   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7749     if (INSN_P (insn)
7750 	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7751       return cfun->machine->some_ld_name;
7752 
7753   gcc_unreachable ();
7754 }
7755 
7756 static int
7757 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7758 {
7759   rtx x = *px;
7760 
7761   if (GET_CODE (x) == SYMBOL_REF
7762       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7763     {
7764       cfun->machine->some_ld_name = XSTR (x, 0);
7765       return 1;
7766     }
7767 
7768   return 0;
7769 }
7770 
7771 /* Meaning of CODE:
7772    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7773    C -- print opcode suffix for set/cmov insn.
7774    c -- like C, but print reversed condition
7775    F,f -- likewise, but for floating-point.
7776    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7777         otherwise nothing
7778    R -- print the prefix for register names.
7779    z -- print the opcode suffix for the size of the current operand.
7780    * -- print a star (in certain assembler syntax)
7781    A -- print an absolute memory reference.
7782    w -- print the operand as if it's a "word" (HImode) even if it isn't.
7783    s -- print a shift double count, followed by the assemblers argument
7784 	delimiter.
7785    b -- print the QImode name of the register for the indicated operand.
7786 	%b0 would print %al if operands[0] is reg 0.
7787    w --  likewise, print the HImode name of the register.
7788    k --  likewise, print the SImode name of the register.
7789    q --  likewise, print the DImode name of the register.
7790    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7791    y -- print "st(0)" instead of "st" as a register.
7792    D -- print condition for SSE cmp instruction.
7793    P -- if PIC, print an @PLT suffix.
7794    X -- don't print any sort of PIC '@' suffix for a symbol.
7795    & -- print some in-use local-dynamic symbol name.
7796    H -- print a memory address offset by 8; used for sse high-parts
7797  */
7798 
7799 void
7800 print_operand (FILE *file, rtx x, int code)
7801 {
7802   if (code)
7803     {
7804       switch (code)
7805 	{
7806 	case '*':
7807 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7808 	    putc ('*', file);
7809 	  return;
7810 
7811 	case '&':
7812 	  assemble_name (file, get_some_local_dynamic_name ());
7813 	  return;
7814 
7815 	case 'A':
7816 	  switch (ASSEMBLER_DIALECT)
7817 	    {
7818 	    case ASM_ATT:
7819 	      putc ('*', file);
7820 	      break;
7821 
7822 	    case ASM_INTEL:
7823 	      /* Intel syntax. For absolute addresses, registers should not
7824 		 be surrounded by braces.  */
7825 	      if (GET_CODE (x) != REG)
7826 		{
7827 		  putc ('[', file);
7828 		  PRINT_OPERAND (file, x, 0);
7829 		  putc (']', file);
7830 		  return;
7831 		}
7832 	      break;
7833 
7834 	    default:
7835 	      gcc_unreachable ();
7836 	    }
7837 
7838 	  PRINT_OPERAND (file, x, 0);
7839 	  return;
7840 
7841 
7842 	case 'L':
7843 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7844 	    putc ('l', file);
7845 	  return;
7846 
7847 	case 'W':
7848 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7849 	    putc ('w', file);
7850 	  return;
7851 
7852 	case 'B':
7853 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7854 	    putc ('b', file);
7855 	  return;
7856 
7857 	case 'Q':
7858 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7859 	    putc ('l', file);
7860 	  return;
7861 
7862 	case 'S':
7863 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7864 	    putc ('s', file);
7865 	  return;
7866 
7867 	case 'T':
7868 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7869 	    putc ('t', file);
7870 	  return;
7871 
7872 	case 'z':
7873 	  /* 387 opcodes don't get size suffixes if the operands are
7874 	     registers.  */
7875 	  if (STACK_REG_P (x))
7876 	    return;
7877 
7878 	  /* Likewise if using Intel opcodes.  */
7879 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7880 	    return;
7881 
7882 	  /* This is the size of op from size of operand.  */
7883 	  switch (GET_MODE_SIZE (GET_MODE (x)))
7884 	    {
7885 	    case 2:
7886 #ifdef HAVE_GAS_FILDS_FISTS
7887 	      putc ('s', file);
7888 #endif
7889 	      return;
7890 
7891 	    case 4:
7892 	      if (GET_MODE (x) == SFmode)
7893 		{
7894 		  putc ('s', file);
7895 		  return;
7896 		}
7897 	      else
7898 		putc ('l', file);
7899 	      return;
7900 
7901 	    case 12:
7902 	    case 16:
7903 	      putc ('t', file);
7904 	      return;
7905 
7906 	    case 8:
7907 	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7908 		{
7909 #ifdef GAS_MNEMONICS
7910 		  putc ('q', file);
7911 #else
7912 		  putc ('l', file);
7913 		  putc ('l', file);
7914 #endif
7915 		}
7916 	      else
7917 	        putc ('l', file);
7918 	      return;
7919 
7920 	    default:
7921 	      gcc_unreachable ();
7922 	    }
7923 
7924 	case 'b':
7925 	case 'w':
7926 	case 'k':
7927 	case 'q':
7928 	case 'h':
7929 	case 'y':
7930 	case 'X':
7931 	case 'P':
7932 	  break;
7933 
7934 	case 's':
7935 	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7936 	    {
7937 	      PRINT_OPERAND (file, x, 0);
7938 	      putc (',', file);
7939 	    }
7940 	  return;
7941 
7942 	case 'D':
7943 	  /* Little bit of braindamage here.  The SSE compare instructions
7944 	     does use completely different names for the comparisons that the
7945 	     fp conditional moves.  */
7946 	  switch (GET_CODE (x))
7947 	    {
7948 	    case EQ:
7949 	    case UNEQ:
7950 	      fputs ("eq", file);
7951 	      break;
7952 	    case LT:
7953 	    case UNLT:
7954 	      fputs ("lt", file);
7955 	      break;
7956 	    case LE:
7957 	    case UNLE:
7958 	      fputs ("le", file);
7959 	      break;
7960 	    case UNORDERED:
7961 	      fputs ("unord", file);
7962 	      break;
7963 	    case NE:
7964 	    case LTGT:
7965 	      fputs ("neq", file);
7966 	      break;
7967 	    case UNGE:
7968 	    case GE:
7969 	      fputs ("nlt", file);
7970 	      break;
7971 	    case UNGT:
7972 	    case GT:
7973 	      fputs ("nle", file);
7974 	      break;
7975 	    case ORDERED:
7976 	      fputs ("ord", file);
7977 	      break;
7978 	    default:
7979 	      gcc_unreachable ();
7980 	    }
7981 	  return;
7982 	case 'O':
7983 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7984 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7985 	    {
7986 	      switch (GET_MODE (x))
7987 		{
7988 		case HImode: putc ('w', file); break;
7989 		case SImode:
7990 		case SFmode: putc ('l', file); break;
7991 		case DImode:
7992 		case DFmode: putc ('q', file); break;
7993 		default: gcc_unreachable ();
7994 		}
7995 	      putc ('.', file);
7996 	    }
7997 #endif
7998 	  return;
7999 	case 'C':
8000 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8001 	  return;
8002 	case 'F':
8003 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8004 	  if (ASSEMBLER_DIALECT == ASM_ATT)
8005 	    putc ('.', file);
8006 #endif
8007 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8008 	  return;
8009 
8010 	  /* Like above, but reverse condition */
8011 	case 'c':
8012 	  /* Check to see if argument to %c is really a constant
8013 	     and not a condition code which needs to be reversed.  */
8014 	  if (!COMPARISON_P (x))
8015 	  {
8016 	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8017 	     return;
8018 	  }
8019 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8020 	  return;
8021 	case 'f':
8022 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8023 	  if (ASSEMBLER_DIALECT == ASM_ATT)
8024 	    putc ('.', file);
8025 #endif
8026 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8027 	  return;
8028 
8029 	case 'H':
8030 	  /* It doesn't actually matter what mode we use here, as we're
8031 	     only going to use this for printing.  */
8032 	  x = adjust_address_nv (x, DImode, 8);
8033 	  break;
8034 
8035 	case '+':
8036 	  {
8037 	    rtx x;
8038 
8039 	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8040 	      return;
8041 
8042 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8043 	    if (x)
8044 	      {
8045 		int pred_val = INTVAL (XEXP (x, 0));
8046 
8047 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
8048 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
8049 		  {
8050 		    int taken = pred_val > REG_BR_PROB_BASE / 2;
8051 		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
8052 
8053 		    /* Emit hints only in the case default branch prediction
8054 		       heuristics would fail.  */
8055 		    if (taken != cputaken)
8056 		      {
8057 			/* We use 3e (DS) prefix for taken branches and
8058 			   2e (CS) prefix for not taken branches.  */
8059 			if (taken)
8060 			  fputs ("ds ; ", file);
8061 			else
8062 			  fputs ("cs ; ", file);
8063 		      }
8064 		  }
8065 	      }
8066 	    return;
8067 	  }
8068 	default:
8069 	    output_operand_lossage ("invalid operand code '%c'", code);
8070 	}
8071     }
8072 
8073   if (GET_CODE (x) == REG)
8074     print_reg (x, code, file);
8075 
8076   else if (GET_CODE (x) == MEM)
8077     {
8078       /* No `byte ptr' prefix for call instructions.  */
8079       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8080 	{
8081 	  const char * size;
8082 	  switch (GET_MODE_SIZE (GET_MODE (x)))
8083 	    {
8084 	    case 1: size = "BYTE"; break;
8085 	    case 2: size = "WORD"; break;
8086 	    case 4: size = "DWORD"; break;
8087 	    case 8: size = "QWORD"; break;
8088 	    case 12: size = "XWORD"; break;
8089 	    case 16: size = "XMMWORD"; break;
8090 	    default:
8091 	      gcc_unreachable ();
8092 	    }
8093 
8094 	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
8095 	  if (code == 'b')
8096 	    size = "BYTE";
8097 	  else if (code == 'w')
8098 	    size = "WORD";
8099 	  else if (code == 'k')
8100 	    size = "DWORD";
8101 
8102 	  fputs (size, file);
8103 	  fputs (" PTR ", file);
8104 	}
8105 
8106       x = XEXP (x, 0);
8107       /* Avoid (%rip) for call operands.  */
8108       if (CONSTANT_ADDRESS_P (x) && code == 'P'
8109 	       && GET_CODE (x) != CONST_INT)
8110 	output_addr_const (file, x);
8111       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8112 	output_operand_lossage ("invalid constraints for operand");
8113       else
8114 	output_address (x);
8115     }
8116 
8117   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8118     {
8119       REAL_VALUE_TYPE r;
8120       long l;
8121 
8122       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8123       REAL_VALUE_TO_TARGET_SINGLE (r, l);
8124 
8125       if (ASSEMBLER_DIALECT == ASM_ATT)
8126 	putc ('$', file);
8127       fprintf (file, "0x%08lx", l);
8128     }
8129 
8130   /* These float cases don't actually occur as immediate operands.  */
8131   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8132     {
8133       char dstr[30];
8134 
8135       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8136       fprintf (file, "%s", dstr);
8137     }
8138 
8139   else if (GET_CODE (x) == CONST_DOUBLE
8140 	   && GET_MODE (x) == XFmode)
8141     {
8142       char dstr[30];
8143 
8144       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8145       fprintf (file, "%s", dstr);
8146     }
8147 
8148   else
8149     {
8150       /* We have patterns that allow zero sets of memory, for instance.
8151 	 In 64-bit mode, we should probably support all 8-byte vectors,
8152 	 since we can in fact encode that into an immediate.  */
8153       if (GET_CODE (x) == CONST_VECTOR)
8154 	{
8155 	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8156 	  x = const0_rtx;
8157 	}
8158 
8159       if (code != 'P')
8160 	{
8161 	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8162 	    {
8163 	      if (ASSEMBLER_DIALECT == ASM_ATT)
8164 		putc ('$', file);
8165 	    }
8166 	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8167 		   || GET_CODE (x) == LABEL_REF)
8168 	    {
8169 	      if (ASSEMBLER_DIALECT == ASM_ATT)
8170 		putc ('$', file);
8171 	      else
8172 		fputs ("OFFSET FLAT:", file);
8173 	    }
8174 	}
8175       if (GET_CODE (x) == CONST_INT)
8176 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8177       else if (flag_pic)
8178 	output_pic_addr_const (file, x, code);
8179       else
8180 	output_addr_const (file, x);
8181     }
8182 }
8183 
8184 /* Print a memory operand whose address is ADDR.  */
8185 
8186 void
8187 print_operand_address (FILE *file, rtx addr)
8188 {
8189   struct ix86_address parts;
8190   rtx base, index, disp;
8191   int scale;
8192   int ok = ix86_decompose_address (addr, &parts);
8193 
8194   gcc_assert (ok);
8195 
8196   base = parts.base;
8197   index = parts.index;
8198   disp = parts.disp;
8199   scale = parts.scale;
8200 
8201   switch (parts.seg)
8202     {
8203     case SEG_DEFAULT:
8204       break;
8205     case SEG_FS:
8206     case SEG_GS:
8207       if (USER_LABEL_PREFIX[0] == 0)
8208 	putc ('%', file);
8209       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8210       break;
8211     default:
8212       gcc_unreachable ();
8213     }
8214 
8215   if (!base && !index)
8216     {
8217       /* Displacement only requires special attention.  */
8218 
8219       if (GET_CODE (disp) == CONST_INT)
8220 	{
8221 	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8222 	    {
8223 	      if (USER_LABEL_PREFIX[0] == 0)
8224 		putc ('%', file);
8225 	      fputs ("ds:", file);
8226 	    }
8227 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8228 	}
8229       else if (flag_pic)
8230 	output_pic_addr_const (file, disp, 0);
8231       else
8232 	output_addr_const (file, disp);
8233 
8234       /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8235       if (TARGET_64BIT)
8236 	{
8237 	  if (GET_CODE (disp) == CONST
8238 	      && GET_CODE (XEXP (disp, 0)) == PLUS
8239 	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8240 	    disp = XEXP (XEXP (disp, 0), 0);
8241 	  if (GET_CODE (disp) == LABEL_REF
8242 	      || (GET_CODE (disp) == SYMBOL_REF
8243 		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8244 	    fputs ("(%rip)", file);
8245 	}
8246     }
8247   else
8248     {
8249       if (ASSEMBLER_DIALECT == ASM_ATT)
8250 	{
8251 	  if (disp)
8252 	    {
8253 	      if (flag_pic)
8254 		output_pic_addr_const (file, disp, 0);
8255 	      else if (GET_CODE (disp) == LABEL_REF)
8256 		output_asm_label (disp);
8257 	      else
8258 		output_addr_const (file, disp);
8259 	    }
8260 
8261 	  putc ('(', file);
8262 	  if (base)
8263 	    print_reg (base, 0, file);
8264 	  if (index)
8265 	    {
8266 	      putc (',', file);
8267 	      print_reg (index, 0, file);
8268 	      if (scale != 1)
8269 		fprintf (file, ",%d", scale);
8270 	    }
8271 	  putc (')', file);
8272 	}
8273       else
8274 	{
8275 	  rtx offset = NULL_RTX;
8276 
8277 	  if (disp)
8278 	    {
8279 	      /* Pull out the offset of a symbol; print any symbol itself.  */
8280 	      if (GET_CODE (disp) == CONST
8281 		  && GET_CODE (XEXP (disp, 0)) == PLUS
8282 		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8283 		{
8284 		  offset = XEXP (XEXP (disp, 0), 1);
8285 		  disp = gen_rtx_CONST (VOIDmode,
8286 					XEXP (XEXP (disp, 0), 0));
8287 		}
8288 
8289 	      if (flag_pic)
8290 		output_pic_addr_const (file, disp, 0);
8291 	      else if (GET_CODE (disp) == LABEL_REF)
8292 		output_asm_label (disp);
8293 	      else if (GET_CODE (disp) == CONST_INT)
8294 		offset = disp;
8295 	      else
8296 		output_addr_const (file, disp);
8297 	    }
8298 
8299 	  putc ('[', file);
8300 	  if (base)
8301 	    {
8302 	      print_reg (base, 0, file);
8303 	      if (offset)
8304 		{
8305 		  if (INTVAL (offset) >= 0)
8306 		    putc ('+', file);
8307 		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8308 		}
8309 	    }
8310 	  else if (offset)
8311 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8312 	  else
8313 	    putc ('0', file);
8314 
8315 	  if (index)
8316 	    {
8317 	      putc ('+', file);
8318 	      print_reg (index, 0, file);
8319 	      if (scale != 1)
8320 		fprintf (file, "*%d", scale);
8321 	    }
8322 	  putc (']', file);
8323 	}
8324     }
8325 }
8326 
8327 bool
8328 output_addr_const_extra (FILE *file, rtx x)
8329 {
8330   rtx op;
8331 
8332   if (GET_CODE (x) != UNSPEC)
8333     return false;
8334 
8335   op = XVECEXP (x, 0, 0);
8336   switch (XINT (x, 1))
8337     {
8338     case UNSPEC_GOTTPOFF:
8339       output_addr_const (file, op);
8340       /* FIXME: This might be @TPOFF in Sun ld.  */
8341       fputs ("@GOTTPOFF", file);
8342       break;
8343     case UNSPEC_TPOFF:
8344       output_addr_const (file, op);
8345       fputs ("@TPOFF", file);
8346       break;
8347     case UNSPEC_NTPOFF:
8348       output_addr_const (file, op);
8349       if (TARGET_64BIT)
8350 	fputs ("@TPOFF", file);
8351       else
8352 	fputs ("@NTPOFF", file);
8353       break;
8354     case UNSPEC_DTPOFF:
8355       output_addr_const (file, op);
8356       fputs ("@DTPOFF", file);
8357       break;
8358     case UNSPEC_GOTNTPOFF:
8359       output_addr_const (file, op);
8360       if (TARGET_64BIT)
8361 	fputs ("@GOTTPOFF(%rip)", file);
8362       else
8363 	fputs ("@GOTNTPOFF", file);
8364       break;
8365     case UNSPEC_INDNTPOFF:
8366       output_addr_const (file, op);
8367       fputs ("@INDNTPOFF", file);
8368       break;
8369 
8370     default:
8371       return false;
8372     }
8373 
8374   return true;
8375 }
8376 
8377 /* Split one or more DImode RTL references into pairs of SImode
8378    references.  The RTL can be REG, offsettable MEM, integer constant, or
8379    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8380    split and "num" is its length.  lo_half and hi_half are output arrays
8381    that parallel "operands".  */
8382 
8383 void
8384 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8385 {
8386   while (num--)
8387     {
8388       rtx op = operands[num];
8389 
8390       /* simplify_subreg refuse to split volatile memory addresses,
8391          but we still have to handle it.  */
8392       if (GET_CODE (op) == MEM)
8393 	{
8394 	  lo_half[num] = adjust_address (op, SImode, 0);
8395 	  hi_half[num] = adjust_address (op, SImode, 4);
8396 	}
8397       else
8398 	{
8399 	  lo_half[num] = simplify_gen_subreg (SImode, op,
8400 					      GET_MODE (op) == VOIDmode
8401 					      ? DImode : GET_MODE (op), 0);
8402 	  hi_half[num] = simplify_gen_subreg (SImode, op,
8403 					      GET_MODE (op) == VOIDmode
8404 					      ? DImode : GET_MODE (op), 4);
8405 	}
8406     }
8407 }
8408 /* Split one or more TImode RTL references into pairs of DImode
8409    references.  The RTL can be REG, offsettable MEM, integer constant, or
8410    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8411    split and "num" is its length.  lo_half and hi_half are output arrays
8412    that parallel "operands".  */
8413 
8414 void
8415 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8416 {
8417   while (num--)
8418     {
8419       rtx op = operands[num];
8420 
8421       /* simplify_subreg refuse to split volatile memory addresses, but we
8422          still have to handle it.  */
8423       if (GET_CODE (op) == MEM)
8424 	{
8425 	  lo_half[num] = adjust_address (op, DImode, 0);
8426 	  hi_half[num] = adjust_address (op, DImode, 8);
8427 	}
8428       else
8429 	{
8430 	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8431 	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8432 	}
8433     }
8434 }
8435 
8436 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8437    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8438    is the expression of the binary operation.  The output may either be
8439    emitted here, or returned to the caller, like all output_* functions.
8440 
8441    There is no guarantee that the operands are the same mode, as they
8442    might be within FLOAT or FLOAT_EXTEND expressions.  */
8443 
8444 #ifndef SYSV386_COMPAT
8445 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8446    wants to fix the assemblers because that causes incompatibility
8447    with gcc.  No-one wants to fix gcc because that causes
8448    incompatibility with assemblers...  You can use the option of
8449    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8450 #define SYSV386_COMPAT 1
8451 #endif
8452 
8453 const char *
8454 output_387_binary_op (rtx insn, rtx *operands)
8455 {
8456   static char buf[30];
8457   const char *p;
8458   const char *ssep;
8459   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8460 
8461 #ifdef ENABLE_CHECKING
8462   /* Even if we do not want to check the inputs, this documents input
8463      constraints.  Which helps in understanding the following code.  */
8464   if (STACK_REG_P (operands[0])
8465       && ((REG_P (operands[1])
8466 	   && REGNO (operands[0]) == REGNO (operands[1])
8467 	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8468 	  || (REG_P (operands[2])
8469 	      && REGNO (operands[0]) == REGNO (operands[2])
8470 	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8471       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8472     ; /* ok */
8473   else
8474     gcc_assert (is_sse);
8475 #endif
8476 
8477   switch (GET_CODE (operands[3]))
8478     {
8479     case PLUS:
8480       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8481 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8482 	p = "fiadd";
8483       else
8484 	p = "fadd";
8485       ssep = "add";
8486       break;
8487 
8488     case MINUS:
8489       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8490 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8491 	p = "fisub";
8492       else
8493 	p = "fsub";
8494       ssep = "sub";
8495       break;
8496 
8497     case MULT:
8498       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8499 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8500 	p = "fimul";
8501       else
8502 	p = "fmul";
8503       ssep = "mul";
8504       break;
8505 
8506     case DIV:
8507       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8508 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8509 	p = "fidiv";
8510       else
8511 	p = "fdiv";
8512       ssep = "div";
8513       break;
8514 
8515     default:
8516       gcc_unreachable ();
8517     }
8518 
8519   if (is_sse)
8520    {
8521       strcpy (buf, ssep);
8522       if (GET_MODE (operands[0]) == SFmode)
8523 	strcat (buf, "ss\t{%2, %0|%0, %2}");
8524       else
8525 	strcat (buf, "sd\t{%2, %0|%0, %2}");
8526       return buf;
8527    }
8528   strcpy (buf, p);
8529 
8530   switch (GET_CODE (operands[3]))
8531     {
8532     case MULT:
8533     case PLUS:
8534       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8535 	{
8536 	  rtx temp = operands[2];
8537 	  operands[2] = operands[1];
8538 	  operands[1] = temp;
8539 	}
8540 
8541       /* know operands[0] == operands[1].  */
8542 
8543       if (GET_CODE (operands[2]) == MEM)
8544 	{
8545 	  p = "%z2\t%2";
8546 	  break;
8547 	}
8548 
8549       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8550 	{
8551 	  if (STACK_TOP_P (operands[0]))
8552 	    /* How is it that we are storing to a dead operand[2]?
8553 	       Well, presumably operands[1] is dead too.  We can't
8554 	       store the result to st(0) as st(0) gets popped on this
8555 	       instruction.  Instead store to operands[2] (which I
8556 	       think has to be st(1)).  st(1) will be popped later.
8557 	       gcc <= 2.8.1 didn't have this check and generated
8558 	       assembly code that the Unixware assembler rejected.  */
8559 	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8560 	  else
8561 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8562 	  break;
8563 	}
8564 
8565       if (STACK_TOP_P (operands[0]))
8566 	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8567       else
8568 	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8569       break;
8570 
8571     case MINUS:
8572     case DIV:
8573       if (GET_CODE (operands[1]) == MEM)
8574 	{
8575 	  p = "r%z1\t%1";
8576 	  break;
8577 	}
8578 
8579       if (GET_CODE (operands[2]) == MEM)
8580 	{
8581 	  p = "%z2\t%2";
8582 	  break;
8583 	}
8584 
8585       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8586 	{
8587 #if SYSV386_COMPAT
8588 	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8589 	     derived assemblers, confusingly reverse the direction of
8590 	     the operation for fsub{r} and fdiv{r} when the
8591 	     destination register is not st(0).  The Intel assembler
8592 	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8593 	     figure out what the hardware really does.  */
8594 	  if (STACK_TOP_P (operands[0]))
8595 	    p = "{p\t%0, %2|rp\t%2, %0}";
8596 	  else
8597 	    p = "{rp\t%2, %0|p\t%0, %2}";
8598 #else
8599 	  if (STACK_TOP_P (operands[0]))
8600 	    /* As above for fmul/fadd, we can't store to st(0).  */
8601 	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8602 	  else
8603 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8604 #endif
8605 	  break;
8606 	}
8607 
8608       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8609 	{
8610 #if SYSV386_COMPAT
8611 	  if (STACK_TOP_P (operands[0]))
8612 	    p = "{rp\t%0, %1|p\t%1, %0}";
8613 	  else
8614 	    p = "{p\t%1, %0|rp\t%0, %1}";
8615 #else
8616 	  if (STACK_TOP_P (operands[0]))
8617 	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8618 	  else
8619 	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8620 #endif
8621 	  break;
8622 	}
8623 
8624       if (STACK_TOP_P (operands[0]))
8625 	{
8626 	  if (STACK_TOP_P (operands[1]))
8627 	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8628 	  else
8629 	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8630 	  break;
8631 	}
8632       else if (STACK_TOP_P (operands[1]))
8633 	{
8634 #if SYSV386_COMPAT
8635 	  p = "{\t%1, %0|r\t%0, %1}";
8636 #else
8637 	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8638 #endif
8639 	}
8640       else
8641 	{
8642 #if SYSV386_COMPAT
8643 	  p = "{r\t%2, %0|\t%0, %2}";
8644 #else
8645 	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8646 #endif
8647 	}
8648       break;
8649 
8650     default:
8651       gcc_unreachable ();
8652     }
8653 
8654   strcat (buf, p);
8655   return buf;
8656 }
8657 
8658 /* Return needed mode for entity in optimize_mode_switching pass.  */
8659 
8660 int
8661 ix86_mode_needed (int entity, rtx insn)
8662 {
8663   enum attr_i387_cw mode;
8664 
8665   /* The mode UNINITIALIZED is used to store control word after a
8666      function call or ASM pattern.  The mode ANY specify that function
8667      has no requirements on the control word and make no changes in the
8668      bits we are interested in.  */
8669 
8670   if (CALL_P (insn)
8671       || (NONJUMP_INSN_P (insn)
8672 	  && (asm_noperands (PATTERN (insn)) >= 0
8673 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8674     return I387_CW_UNINITIALIZED;
8675 
8676   if (recog_memoized (insn) < 0)
8677     return I387_CW_ANY;
8678 
8679   mode = get_attr_i387_cw (insn);
8680 
8681   switch (entity)
8682     {
8683     case I387_TRUNC:
8684       if (mode == I387_CW_TRUNC)
8685 	return mode;
8686       break;
8687 
8688     case I387_FLOOR:
8689       if (mode == I387_CW_FLOOR)
8690 	return mode;
8691       break;
8692 
8693     case I387_CEIL:
8694       if (mode == I387_CW_CEIL)
8695 	return mode;
8696       break;
8697 
8698     case I387_MASK_PM:
8699       if (mode == I387_CW_MASK_PM)
8700 	return mode;
8701       break;
8702 
8703     default:
8704       gcc_unreachable ();
8705     }
8706 
8707   return I387_CW_ANY;
8708 }
8709 
8710 /* Output code to initialize control word copies used by trunc?f?i and
8711    rounding patterns.  CURRENT_MODE is set to current control word,
8712    while NEW_MODE is set to new control word.  */
8713 
8714 void
8715 emit_i387_cw_initialization (int mode)
8716 {
8717   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8718   rtx new_mode;
8719 
8720   int slot;
8721 
8722   rtx reg = gen_reg_rtx (HImode);
8723 
8724   emit_insn (gen_x86_fnstcw_1 (stored_mode));
8725   emit_move_insn (reg, stored_mode);
8726 
8727   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8728     {
8729       switch (mode)
8730 	{
8731 	case I387_CW_TRUNC:
8732 	  /* round toward zero (truncate) */
8733 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8734 	  slot = SLOT_CW_TRUNC;
8735 	  break;
8736 
8737 	case I387_CW_FLOOR:
8738 	  /* round down toward -oo */
8739 	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8740 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8741 	  slot = SLOT_CW_FLOOR;
8742 	  break;
8743 
8744 	case I387_CW_CEIL:
8745 	  /* round up toward +oo */
8746 	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8747 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8748 	  slot = SLOT_CW_CEIL;
8749 	  break;
8750 
8751 	case I387_CW_MASK_PM:
8752 	  /* mask precision exception for nearbyint() */
8753 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8754 	  slot = SLOT_CW_MASK_PM;
8755 	  break;
8756 
8757 	default:
8758 	  gcc_unreachable ();
8759 	}
8760     }
8761   else
8762     {
8763       switch (mode)
8764 	{
8765 	case I387_CW_TRUNC:
8766 	  /* round toward zero (truncate) */
8767 	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8768 	  slot = SLOT_CW_TRUNC;
8769 	  break;
8770 
8771 	case I387_CW_FLOOR:
8772 	  /* round down toward -oo */
8773 	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8774 	  slot = SLOT_CW_FLOOR;
8775 	  break;
8776 
8777 	case I387_CW_CEIL:
8778 	  /* round up toward +oo */
8779 	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8780 	  slot = SLOT_CW_CEIL;
8781 	  break;
8782 
8783 	case I387_CW_MASK_PM:
8784 	  /* mask precision exception for nearbyint() */
8785 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8786 	  slot = SLOT_CW_MASK_PM;
8787 	  break;
8788 
8789 	default:
8790 	  gcc_unreachable ();
8791 	}
8792     }
8793 
8794   gcc_assert (slot < MAX_386_STACK_LOCALS);
8795 
8796   new_mode = assign_386_stack_local (HImode, slot);
8797   emit_move_insn (new_mode, reg);
8798 }
8799 
8800 /* Output code for INSN to convert a float to a signed int.  OPERANDS
8801    are the insn operands.  The output may be [HSD]Imode and the input
8802    operand may be [SDX]Fmode.  */
8803 
8804 const char *
8805 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8806 {
8807   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8808   int dimode_p = GET_MODE (operands[0]) == DImode;
8809   int round_mode = get_attr_i387_cw (insn);
8810 
8811   /* Jump through a hoop or two for DImode, since the hardware has no
8812      non-popping instruction.  We used to do this a different way, but
8813      that was somewhat fragile and broke with post-reload splitters.  */
8814   if ((dimode_p || fisttp) && !stack_top_dies)
8815     output_asm_insn ("fld\t%y1", operands);
8816 
8817   gcc_assert (STACK_TOP_P (operands[1]));
8818   gcc_assert (GET_CODE (operands[0]) == MEM);
8819 
8820   if (fisttp)
8821       output_asm_insn ("fisttp%z0\t%0", operands);
8822   else
8823     {
8824       if (round_mode != I387_CW_ANY)
8825 	output_asm_insn ("fldcw\t%3", operands);
8826       if (stack_top_dies || dimode_p)
8827 	output_asm_insn ("fistp%z0\t%0", operands);
8828       else
8829 	output_asm_insn ("fist%z0\t%0", operands);
8830       if (round_mode != I387_CW_ANY)
8831 	output_asm_insn ("fldcw\t%2", operands);
8832     }
8833 
8834   return "";
8835 }
8836 
8837 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
8838    have the values zero or one, indicates the ffreep insn's operand
8839    from the OPERANDS array.  */
8840 
8841 static const char *
8842 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8843 {
8844   if (TARGET_USE_FFREEP)
8845 #if HAVE_AS_IX86_FFREEP
8846     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8847 #else
8848     switch (REGNO (operands[opno]))
8849       {
8850       case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8851       case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8852       case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8853       case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8854       case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8855       case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8856       case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8857       case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8858       }
8859 #endif
8860 
8861   return opno ? "fstp\t%y1" : "fstp\t%y0";
8862 }
8863 
8864 
8865 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8866    should be used.  UNORDERED_P is true when fucom should be used.  */
8867 
8868 const char *
8869 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8870 {
8871   int stack_top_dies;
8872   rtx cmp_op0, cmp_op1;
8873   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8874 
8875   if (eflags_p)
8876     {
8877       cmp_op0 = operands[0];
8878       cmp_op1 = operands[1];
8879     }
8880   else
8881     {
8882       cmp_op0 = operands[1];
8883       cmp_op1 = operands[2];
8884     }
8885 
8886   if (is_sse)
8887     {
8888       if (GET_MODE (operands[0]) == SFmode)
8889 	if (unordered_p)
8890 	  return "ucomiss\t{%1, %0|%0, %1}";
8891 	else
8892 	  return "comiss\t{%1, %0|%0, %1}";
8893       else
8894 	if (unordered_p)
8895 	  return "ucomisd\t{%1, %0|%0, %1}";
8896 	else
8897 	  return "comisd\t{%1, %0|%0, %1}";
8898     }
8899 
8900   gcc_assert (STACK_TOP_P (cmp_op0));
8901 
8902   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8903 
8904   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8905     {
8906       if (stack_top_dies)
8907 	{
8908 	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8909 	  return output_387_ffreep (operands, 1);
8910 	}
8911       else
8912 	return "ftst\n\tfnstsw\t%0";
8913     }
8914 
8915   if (STACK_REG_P (cmp_op1)
8916       && stack_top_dies
8917       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8918       && REGNO (cmp_op1) != FIRST_STACK_REG)
8919     {
8920       /* If both the top of the 387 stack dies, and the other operand
8921 	 is also a stack register that dies, then this must be a
8922 	 `fcompp' float compare */
8923 
8924       if (eflags_p)
8925 	{
8926 	  /* There is no double popping fcomi variant.  Fortunately,
8927 	     eflags is immune from the fstp's cc clobbering.  */
8928 	  if (unordered_p)
8929 	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8930 	  else
8931 	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8932 	  return output_387_ffreep (operands, 0);
8933 	}
8934       else
8935 	{
8936 	  if (unordered_p)
8937 	    return "fucompp\n\tfnstsw\t%0";
8938 	  else
8939 	    return "fcompp\n\tfnstsw\t%0";
8940 	}
8941     }
8942   else
8943     {
8944       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8945 
8946       static const char * const alt[16] =
8947       {
8948 	"fcom%z2\t%y2\n\tfnstsw\t%0",
8949 	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8950 	"fucom%z2\t%y2\n\tfnstsw\t%0",
8951 	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8952 
8953 	"ficom%z2\t%y2\n\tfnstsw\t%0",
8954 	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8955 	NULL,
8956 	NULL,
8957 
8958 	"fcomi\t{%y1, %0|%0, %y1}",
8959 	"fcomip\t{%y1, %0|%0, %y1}",
8960 	"fucomi\t{%y1, %0|%0, %y1}",
8961 	"fucomip\t{%y1, %0|%0, %y1}",
8962 
8963 	NULL,
8964 	NULL,
8965 	NULL,
8966 	NULL
8967       };
8968 
8969       int mask;
8970       const char *ret;
8971 
8972       mask  = eflags_p << 3;
8973       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8974       mask |= unordered_p << 1;
8975       mask |= stack_top_dies;
8976 
8977       gcc_assert (mask < 16);
8978       ret = alt[mask];
8979       gcc_assert (ret);
8980 
8981       return ret;
8982     }
8983 }
8984 
8985 void
8986 ix86_output_addr_vec_elt (FILE *file, int value)
8987 {
8988   const char *directive = ASM_LONG;
8989 
8990 #ifdef ASM_QUAD
8991   if (TARGET_64BIT)
8992     directive = ASM_QUAD;
8993 #else
8994   gcc_assert (!TARGET_64BIT);
8995 #endif
8996 
8997   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8998 }
8999 
9000 void
9001 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9002 {
9003   if (TARGET_64BIT)
9004     fprintf (file, "%s%s%d-%s%d\n",
9005 	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
9006   else if (HAVE_AS_GOTOFF_IN_DATA)
9007     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9008 #if TARGET_MACHO
9009   else if (TARGET_MACHO)
9010     {
9011       fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9012       machopic_output_function_base_name (file);
9013       fprintf(file, "\n");
9014     }
9015 #endif
9016   else
9017     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9018 		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9019 }
9020 
9021 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9022    for the target.  */
9023 
9024 void
9025 ix86_expand_clear (rtx dest)
9026 {
9027   rtx tmp;
9028 
9029   /* We play register width games, which are only valid after reload.  */
9030   gcc_assert (reload_completed);
9031 
9032   /* Avoid HImode and its attendant prefix byte.  */
9033   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9034     dest = gen_rtx_REG (SImode, REGNO (dest));
9035 
9036   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9037 
9038   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
9039   if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9040     {
9041       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9042       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9043     }
9044 
9045   emit_insn (tmp);
9046 }
9047 
9048 /* X is an unchanging MEM.  If it is a constant pool reference, return
9049    the constant pool rtx, else NULL.  */
9050 
9051 rtx
9052 maybe_get_pool_constant (rtx x)
9053 {
9054   x = ix86_delegitimize_address (XEXP (x, 0));
9055 
9056   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9057     return get_pool_constant (x);
9058 
9059   return NULL_RTX;
9060 }
9061 
9062 void
9063 ix86_expand_move (enum machine_mode mode, rtx operands[])
9064 {
9065   int strict = (reload_in_progress || reload_completed);
9066   rtx op0, op1;
9067   enum tls_model model;
9068 
9069   op0 = operands[0];
9070   op1 = operands[1];
9071 
9072   if (GET_CODE (op1) == SYMBOL_REF)
9073     {
9074       model = SYMBOL_REF_TLS_MODEL (op1);
9075       if (model)
9076 	{
9077 	  op1 = legitimize_tls_address (op1, model, true);
9078 	  op1 = force_operand (op1, op0);
9079 	  if (op1 == op0)
9080 	    return;
9081 	}
9082     }
9083   else if (GET_CODE (op1) == CONST
9084 	   && GET_CODE (XEXP (op1, 0)) == PLUS
9085 	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9086     {
9087       model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9088       if (model)
9089 	{
9090 	  rtx addend = XEXP (XEXP (op1, 0), 1);
9091 	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9092 	  op1 = force_operand (op1, NULL);
9093 	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9094 				     op0, 1, OPTAB_DIRECT);
9095 	  if (op1 == op0)
9096 	    return;
9097 	}
9098     }
9099 
9100   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9101     {
9102       if (TARGET_MACHO && !TARGET_64BIT)
9103 	{
9104 #if TARGET_MACHO
9105 	  if (MACHOPIC_PURE)
9106 	    {
9107 	      rtx temp = ((reload_in_progress
9108 			   || ((op0 && GET_CODE (op0) == REG)
9109 			       && mode == Pmode))
9110 			  ? op0 : gen_reg_rtx (Pmode));
9111 	      op1 = machopic_indirect_data_reference (op1, temp);
9112 	      op1 = machopic_legitimize_pic_address (op1, mode,
9113 						     temp == op1 ? 0 : temp);
9114 	    }
9115 	  else if (MACHOPIC_INDIRECT)
9116 	    op1 = machopic_indirect_data_reference (op1, 0);
9117 	  if (op0 == op1)
9118 	    return;
9119 #endif
9120 	}
9121       else
9122 	{
9123 	  if (GET_CODE (op0) == MEM)
9124 	    op1 = force_reg (Pmode, op1);
9125 	  else
9126 	    op1 = legitimize_address (op1, op1, Pmode);
9127 	}
9128     }
9129   else
9130     {
9131       if (GET_CODE (op0) == MEM
9132 	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9133 	      || !push_operand (op0, mode))
9134 	  && GET_CODE (op1) == MEM)
9135 	op1 = force_reg (mode, op1);
9136 
9137       if (push_operand (op0, mode)
9138 	  && ! general_no_elim_operand (op1, mode))
9139 	op1 = copy_to_mode_reg (mode, op1);
9140 
9141       /* Force large constants in 64bit compilation into register
9142 	 to get them CSEed.  */
9143       if (TARGET_64BIT && mode == DImode
9144 	  && immediate_operand (op1, mode)
9145 	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
9146 	  && !register_operand (op0, mode)
9147 	  && optimize && !reload_completed && !reload_in_progress)
9148 	op1 = copy_to_mode_reg (mode, op1);
9149 
9150       if (FLOAT_MODE_P (mode))
9151 	{
9152 	  /* If we are loading a floating point constant to a register,
9153 	     force the value to memory now, since we'll get better code
9154 	     out the back end.  */
9155 
9156 	  if (strict)
9157 	    ;
9158 	  else if (GET_CODE (op1) == CONST_DOUBLE)
9159 	    {
9160 	      op1 = validize_mem (force_const_mem (mode, op1));
9161 	      if (!register_operand (op0, mode))
9162 		{
9163 		  rtx temp = gen_reg_rtx (mode);
9164 		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9165 		  emit_move_insn (op0, temp);
9166 		  return;
9167 		}
9168 	    }
9169 	}
9170     }
9171 
9172   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9173 }
9174 
9175 void
9176 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9177 {
9178   rtx op0 = operands[0], op1 = operands[1];
9179 
9180   /* Force constants other than zero into memory.  We do not know how
9181      the instructions used to build constants modify the upper 64 bits
9182      of the register, once we have that information we may be able
9183      to handle some of them more efficiently.  */
9184   if ((reload_in_progress | reload_completed) == 0
9185       && register_operand (op0, mode)
9186       && CONSTANT_P (op1)
9187       && standard_sse_constant_p (op1) <= 0)
9188     op1 = validize_mem (force_const_mem (mode, op1));
9189 
9190   /* Make operand1 a register if it isn't already.  */
9191   if (!no_new_pseudos
9192       && !register_operand (op0, mode)
9193       && !register_operand (op1, mode))
9194     {
9195       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9196       return;
9197     }
9198 
9199   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9200 }
9201 
9202 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9203    straight to ix86_expand_vector_move.  */
9204 
9205 void
9206 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9207 {
9208   rtx op0, op1, m;
9209 
9210   op0 = operands[0];
9211   op1 = operands[1];
9212 
9213   if (MEM_P (op1))
9214     {
9215       /* If we're optimizing for size, movups is the smallest.  */
9216       if (optimize_size)
9217 	{
9218 	  op0 = gen_lowpart (V4SFmode, op0);
9219 	  op1 = gen_lowpart (V4SFmode, op1);
9220 	  emit_insn (gen_sse_movups (op0, op1));
9221 	  return;
9222 	}
9223 
9224       /* ??? If we have typed data, then it would appear that using
9225 	 movdqu is the only way to get unaligned data loaded with
9226 	 integer type.  */
9227       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9228 	{
9229 	  op0 = gen_lowpart (V16QImode, op0);
9230 	  op1 = gen_lowpart (V16QImode, op1);
9231 	  emit_insn (gen_sse2_movdqu (op0, op1));
9232 	  return;
9233 	}
9234 
9235       if (TARGET_SSE2 && mode == V2DFmode)
9236 	{
9237 	  rtx zero;
9238 
9239 	  /* When SSE registers are split into halves, we can avoid
9240 	     writing to the top half twice.  */
9241 	  if (TARGET_SSE_SPLIT_REGS)
9242 	    {
9243 	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9244 	      zero = op0;
9245 	    }
9246 	  else
9247 	    {
9248 	      /* ??? Not sure about the best option for the Intel chips.
9249 		 The following would seem to satisfy; the register is
9250 		 entirely cleared, breaking the dependency chain.  We
9251 		 then store to the upper half, with a dependency depth
9252 		 of one.  A rumor has it that Intel recommends two movsd
9253 		 followed by an unpacklpd, but this is unconfirmed.  And
9254 		 given that the dependency depth of the unpacklpd would
9255 		 still be one, I'm not sure why this would be better.  */
9256 	      zero = CONST0_RTX (V2DFmode);
9257 	    }
9258 
9259 	  m = adjust_address (op1, DFmode, 0);
9260 	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
9261 	  m = adjust_address (op1, DFmode, 8);
9262 	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
9263 	}
9264       else
9265 	{
9266 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9267 	    emit_move_insn (op0, CONST0_RTX (mode));
9268 	  else
9269 	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9270 
9271 	  if (mode != V4SFmode)
9272 	    op0 = gen_lowpart (V4SFmode, op0);
9273 	  m = adjust_address (op1, V2SFmode, 0);
9274 	  emit_insn (gen_sse_loadlps (op0, op0, m));
9275 	  m = adjust_address (op1, V2SFmode, 8);
9276 	  emit_insn (gen_sse_loadhps (op0, op0, m));
9277 	}
9278     }
9279   else if (MEM_P (op0))
9280     {
9281       /* If we're optimizing for size, movups is the smallest.  */
9282       if (optimize_size)
9283 	{
9284 	  op0 = gen_lowpart (V4SFmode, op0);
9285 	  op1 = gen_lowpart (V4SFmode, op1);
9286 	  emit_insn (gen_sse_movups (op0, op1));
9287 	  return;
9288 	}
9289 
9290       /* ??? Similar to above, only less clear because of quote
9291 	 typeless stores unquote.  */
9292       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9293 	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9294         {
9295 	  op0 = gen_lowpart (V16QImode, op0);
9296 	  op1 = gen_lowpart (V16QImode, op1);
9297 	  emit_insn (gen_sse2_movdqu (op0, op1));
9298 	  return;
9299 	}
9300 
9301       if (TARGET_SSE2 && mode == V2DFmode)
9302 	{
9303 	  m = adjust_address (op0, DFmode, 0);
9304 	  emit_insn (gen_sse2_storelpd (m, op1));
9305 	  m = adjust_address (op0, DFmode, 8);
9306 	  emit_insn (gen_sse2_storehpd (m, op1));
9307 	}
9308       else
9309 	{
9310 	  if (mode != V4SFmode)
9311 	    op1 = gen_lowpart (V4SFmode, op1);
9312 	  m = adjust_address (op0, V2SFmode, 0);
9313 	  emit_insn (gen_sse_storelps (m, op1));
9314 	  m = adjust_address (op0, V2SFmode, 8);
9315 	  emit_insn (gen_sse_storehps (m, op1));
9316 	}
9317     }
9318   else
9319     gcc_unreachable ();
9320 }
9321 
9322 /* Expand a push in MODE.  This is some mode for which we do not support
9323    proper push instructions, at least from the registers that we expect
9324    the value to live in.  */
9325 
9326 void
9327 ix86_expand_push (enum machine_mode mode, rtx x)
9328 {
9329   rtx tmp;
9330 
9331   tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9332 			     GEN_INT (-GET_MODE_SIZE (mode)),
9333 			     stack_pointer_rtx, 1, OPTAB_DIRECT);
9334   if (tmp != stack_pointer_rtx)
9335     emit_move_insn (stack_pointer_rtx, tmp);
9336 
9337   tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9338   emit_move_insn (tmp, x);
9339 }
9340 
9341 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9342    destination to use for the operation.  If different from the true
9343    destination in operands[0], a copy operation will be required.  */
9344 
9345 rtx
9346 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9347 			    rtx operands[])
9348 {
9349   int matching_memory;
9350   rtx src1, src2, dst;
9351 
9352   dst = operands[0];
9353   src1 = operands[1];
9354   src2 = operands[2];
9355 
9356   /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9357   if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9358       && (rtx_equal_p (dst, src2)
9359 	  || immediate_operand (src1, mode)))
9360     {
9361       rtx temp = src1;
9362       src1 = src2;
9363       src2 = temp;
9364     }
9365 
9366   /* If the destination is memory, and we do not have matching source
9367      operands, do things in registers.  */
9368   matching_memory = 0;
9369   if (GET_CODE (dst) == MEM)
9370     {
9371       if (rtx_equal_p (dst, src1))
9372 	matching_memory = 1;
9373       else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9374 	       && rtx_equal_p (dst, src2))
9375 	matching_memory = 2;
9376       else
9377 	dst = gen_reg_rtx (mode);
9378     }
9379 
9380   /* Both source operands cannot be in memory.  */
9381   if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9382     {
9383       if (matching_memory != 2)
9384 	src2 = force_reg (mode, src2);
9385       else
9386 	src1 = force_reg (mode, src1);
9387     }
9388 
9389   /* If the operation is not commutable, source 1 cannot be a constant
9390      or non-matching memory.  */
9391   if ((CONSTANT_P (src1)
9392        || (!matching_memory && GET_CODE (src1) == MEM))
9393       && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9394     src1 = force_reg (mode, src1);
9395 
9396   src1 = operands[1] = src1;
9397   src2 = operands[2] = src2;
9398   return dst;
9399 }
9400 
9401 /* Similarly, but assume that the destination has already been
9402    set up properly.  */
9403 
9404 void
9405 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9406 				    enum machine_mode mode, rtx operands[])
9407 {
9408   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9409   gcc_assert (dst == operands[0]);
9410 }
9411 
9412 /* Attempt to expand a binary operator.  Make the expansion closer to the
9413    actual machine, then just general_operand, which will allow 3 separate
9414    memory references (one output, two input) in a single insn.  */
9415 
9416 void
9417 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9418 			     rtx operands[])
9419 {
9420   rtx src1, src2, dst, op, clob;
9421 
9422   dst = ix86_fixup_binary_operands (code, mode, operands);
9423   src1 = operands[1];
9424   src2 = operands[2];
9425 
9426  /* Emit the instruction.  */
9427 
9428   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9429   if (reload_in_progress)
9430     {
9431       /* Reload doesn't know about the flags register, and doesn't know that
9432          it doesn't want to clobber it.  We can only do this with PLUS.  */
9433       gcc_assert (code == PLUS);
9434       emit_insn (op);
9435     }
9436   else
9437     {
9438       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9439       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9440     }
9441 
9442   /* Fix up the destination if needed.  */
9443   if (dst != operands[0])
9444     emit_move_insn (operands[0], dst);
9445 }
9446 
9447 /* Return TRUE or FALSE depending on whether the binary operator meets the
9448    appropriate constraints.  */
9449 
9450 int
9451 ix86_binary_operator_ok (enum rtx_code code,
9452 			 enum machine_mode mode ATTRIBUTE_UNUSED,
9453 			 rtx operands[3])
9454 {
9455   /* Both source operands cannot be in memory.  */
9456   if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9457     return 0;
9458   /* If the operation is not commutable, source 1 cannot be a constant.  */
9459   if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9460     return 0;
9461   /* If the destination is memory, we must have a matching source operand.  */
9462   if (GET_CODE (operands[0]) == MEM
9463       && ! (rtx_equal_p (operands[0], operands[1])
9464 	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9465 		&& rtx_equal_p (operands[0], operands[2]))))
9466     return 0;
9467   /* If the operation is not commutable and the source 1 is memory, we must
9468      have a matching destination.  */
9469   if (GET_CODE (operands[1]) == MEM
9470       && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9471       && ! rtx_equal_p (operands[0], operands[1]))
9472     return 0;
9473   return 1;
9474 }
9475 
9476 /* Attempt to expand a unary operator.  Make the expansion closer to the
9477    actual machine, then just general_operand, which will allow 2 separate
9478    memory references (one output, one input) in a single insn.  */
9479 
9480 void
9481 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9482 			    rtx operands[])
9483 {
9484   int matching_memory;
9485   rtx src, dst, op, clob;
9486 
9487   dst = operands[0];
9488   src = operands[1];
9489 
9490   /* If the destination is memory, and we do not have matching source
9491      operands, do things in registers.  */
9492   matching_memory = 0;
9493   if (MEM_P (dst))
9494     {
9495       if (rtx_equal_p (dst, src))
9496 	matching_memory = 1;
9497       else
9498 	dst = gen_reg_rtx (mode);
9499     }
9500 
9501   /* When source operand is memory, destination must match.  */
9502   if (MEM_P (src) && !matching_memory)
9503     src = force_reg (mode, src);
9504 
9505   /* Emit the instruction.  */
9506 
9507   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9508   if (reload_in_progress || code == NOT)
9509     {
9510       /* Reload doesn't know about the flags register, and doesn't know that
9511          it doesn't want to clobber it.  */
9512       gcc_assert (code == NOT);
9513       emit_insn (op);
9514     }
9515   else
9516     {
9517       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9518       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9519     }
9520 
9521   /* Fix up the destination if needed.  */
9522   if (dst != operands[0])
9523     emit_move_insn (operands[0], dst);
9524 }
9525 
9526 /* Return TRUE or FALSE depending on whether the unary operator meets the
9527    appropriate constraints.  */
9528 
9529 int
9530 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9531 			enum machine_mode mode ATTRIBUTE_UNUSED,
9532 			rtx operands[2] ATTRIBUTE_UNUSED)
9533 {
9534   /* If one of operands is memory, source and destination must match.  */
9535   if ((GET_CODE (operands[0]) == MEM
9536        || GET_CODE (operands[1]) == MEM)
9537       && ! rtx_equal_p (operands[0], operands[1]))
9538     return FALSE;
9539   return TRUE;
9540 }
9541 
9542 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9543    Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9544    true, then replicate the mask for all elements of the vector register.
9545    If INVERT is true, then create a mask excluding the sign bit.  */
9546 
9547 rtx
9548 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9549 {
9550   enum machine_mode vec_mode;
9551   HOST_WIDE_INT hi, lo;
9552   int shift = 63;
9553   rtvec v;
9554   rtx mask;
9555 
9556   /* Find the sign bit, sign extended to 2*HWI.  */
9557   if (mode == SFmode)
9558     lo = 0x80000000, hi = lo < 0;
9559   else if (HOST_BITS_PER_WIDE_INT >= 64)
9560     lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9561   else
9562     lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9563 
9564   if (invert)
9565     lo = ~lo, hi = ~hi;
9566 
9567   /* Force this value into the low part of a fp vector constant.  */
9568   mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9569   mask = gen_lowpart (mode, mask);
9570 
9571   if (mode == SFmode)
9572     {
9573       if (vect)
9574 	v = gen_rtvec (4, mask, mask, mask, mask);
9575       else
9576 	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9577 		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9578       vec_mode = V4SFmode;
9579     }
9580   else
9581     {
9582       if (vect)
9583 	v = gen_rtvec (2, mask, mask);
9584       else
9585 	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9586       vec_mode = V2DFmode;
9587     }
9588 
9589   return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9590 }
9591 
9592 /* Generate code for floating point ABS or NEG.  */
9593 
9594 void
9595 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9596 				rtx operands[])
9597 {
9598   rtx mask, set, use, clob, dst, src;
9599   bool matching_memory;
9600   bool use_sse = false;
9601   bool vector_mode = VECTOR_MODE_P (mode);
9602   enum machine_mode elt_mode = mode;
9603 
9604   if (vector_mode)
9605     {
9606       elt_mode = GET_MODE_INNER (mode);
9607       use_sse = true;
9608     }
9609   else if (TARGET_SSE_MATH)
9610     use_sse = SSE_FLOAT_MODE_P (mode);
9611 
9612   /* NEG and ABS performed with SSE use bitwise mask operations.
9613      Create the appropriate mask now.  */
9614   if (use_sse)
9615     mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9616   else
9617     mask = NULL_RTX;
9618 
9619   dst = operands[0];
9620   src = operands[1];
9621 
9622   /* If the destination is memory, and we don't have matching source
9623      operands or we're using the x87, do things in registers.  */
9624   matching_memory = false;
9625   if (MEM_P (dst))
9626     {
9627       if (use_sse && rtx_equal_p (dst, src))
9628 	matching_memory = true;
9629       else
9630 	dst = gen_reg_rtx (mode);
9631     }
9632   if (MEM_P (src) && !matching_memory)
9633     src = force_reg (mode, src);
9634 
9635   if (vector_mode)
9636     {
9637       set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9638       set = gen_rtx_SET (VOIDmode, dst, set);
9639       emit_insn (set);
9640     }
9641   else
9642     {
9643       set = gen_rtx_fmt_e (code, mode, src);
9644       set = gen_rtx_SET (VOIDmode, dst, set);
9645       if (mask)
9646         {
9647           use = gen_rtx_USE (VOIDmode, mask);
9648           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9649           emit_insn (gen_rtx_PARALLEL (VOIDmode,
9650 				       gen_rtvec (3, set, use, clob)));
9651         }
9652       else
9653 	emit_insn (set);
9654     }
9655 
9656   if (dst != operands[0])
9657     emit_move_insn (operands[0], dst);
9658 }
9659 
9660 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
9661 
9662 void
9663 ix86_expand_copysign (rtx operands[])
9664 {
9665   enum machine_mode mode, vmode;
9666   rtx dest, op0, op1, mask, nmask;
9667 
9668   dest = operands[0];
9669   op0 = operands[1];
9670   op1 = operands[2];
9671 
9672   mode = GET_MODE (dest);
9673   vmode = mode == SFmode ? V4SFmode : V2DFmode;
9674 
9675   if (GET_CODE (op0) == CONST_DOUBLE)
9676     {
9677       rtvec v;
9678 
9679       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9680 	op0 = simplify_unary_operation (ABS, mode, op0, mode);
9681 
9682       if (op0 == CONST0_RTX (mode))
9683 	op0 = CONST0_RTX (vmode);
9684       else
9685         {
9686 	  if (mode == SFmode)
9687 	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9688                            CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9689 	  else
9690 	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9691           op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9692 	}
9693 
9694       mask = ix86_build_signbit_mask (mode, 0, 0);
9695 
9696       if (mode == SFmode)
9697 	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9698       else
9699 	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9700     }
9701   else
9702     {
9703       nmask = ix86_build_signbit_mask (mode, 0, 1);
9704       mask = ix86_build_signbit_mask (mode, 0, 0);
9705 
9706       if (mode == SFmode)
9707 	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9708       else
9709 	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9710     }
9711 }
9712 
9713 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9714    be a constant, and so has already been expanded into a vector constant.  */
9715 
9716 void
9717 ix86_split_copysign_const (rtx operands[])
9718 {
9719   enum machine_mode mode, vmode;
9720   rtx dest, op0, op1, mask, x;
9721 
9722   dest = operands[0];
9723   op0 = operands[1];
9724   op1 = operands[2];
9725   mask = operands[3];
9726 
9727   mode = GET_MODE (dest);
9728   vmode = GET_MODE (mask);
9729 
9730   dest = simplify_gen_subreg (vmode, dest, mode, 0);
9731   x = gen_rtx_AND (vmode, dest, mask);
9732   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9733 
9734   if (op0 != CONST0_RTX (vmode))
9735     {
9736       x = gen_rtx_IOR (vmode, dest, op0);
9737       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9738     }
9739 }
9740 
9741 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9742    so we have to do two masks.  */
9743 
9744 void
9745 ix86_split_copysign_var (rtx operands[])
9746 {
9747   enum machine_mode mode, vmode;
9748   rtx dest, scratch, op0, op1, mask, nmask, x;
9749 
9750   dest = operands[0];
9751   scratch = operands[1];
9752   op0 = operands[2];
9753   op1 = operands[3];
9754   nmask = operands[4];
9755   mask = operands[5];
9756 
9757   mode = GET_MODE (dest);
9758   vmode = GET_MODE (mask);
9759 
9760   if (rtx_equal_p (op0, op1))
9761     {
9762       /* Shouldn't happen often (it's useless, obviously), but when it does
9763 	 we'd generate incorrect code if we continue below.  */
9764       emit_move_insn (dest, op0);
9765       return;
9766     }
9767 
9768   if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9769     {
9770       gcc_assert (REGNO (op1) == REGNO (scratch));
9771 
9772       x = gen_rtx_AND (vmode, scratch, mask);
9773       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9774 
9775       dest = mask;
9776       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9777       x = gen_rtx_NOT (vmode, dest);
9778       x = gen_rtx_AND (vmode, x, op0);
9779       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9780     }
9781   else
9782     {
9783       if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
9784 	{
9785 	  x = gen_rtx_AND (vmode, scratch, mask);
9786 	}
9787       else						/* alternative 2,4 */
9788 	{
9789           gcc_assert (REGNO (mask) == REGNO (scratch));
9790           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9791 	  x = gen_rtx_AND (vmode, scratch, op1);
9792 	}
9793       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9794 
9795       if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
9796 	{
9797 	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
9798 	  x = gen_rtx_AND (vmode, dest, nmask);
9799 	}
9800       else						/* alternative 3,4 */
9801 	{
9802           gcc_assert (REGNO (nmask) == REGNO (dest));
9803 	  dest = nmask;
9804 	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9805 	  x = gen_rtx_AND (vmode, dest, op0);
9806 	}
9807       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9808     }
9809 
9810   x = gen_rtx_IOR (vmode, dest, scratch);
9811   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9812 }
9813 
9814 /* Return TRUE or FALSE depending on whether the first SET in INSN
9815    has source and destination with matching CC modes, and that the
9816    CC mode is at least as constrained as REQ_MODE.  */
9817 
9818 int
9819 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9820 {
9821   rtx set;
9822   enum machine_mode set_mode;
9823 
9824   set = PATTERN (insn);
9825   if (GET_CODE (set) == PARALLEL)
9826     set = XVECEXP (set, 0, 0);
9827   gcc_assert (GET_CODE (set) == SET);
9828   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9829 
9830   set_mode = GET_MODE (SET_DEST (set));
9831   switch (set_mode)
9832     {
9833     case CCNOmode:
9834       if (req_mode != CCNOmode
9835 	  && (req_mode != CCmode
9836 	      || XEXP (SET_SRC (set), 1) != const0_rtx))
9837 	return 0;
9838       break;
9839     case CCmode:
9840       if (req_mode == CCGCmode)
9841 	return 0;
9842       /* FALLTHRU */
9843     case CCGCmode:
9844       if (req_mode == CCGOCmode || req_mode == CCNOmode)
9845 	return 0;
9846       /* FALLTHRU */
9847     case CCGOCmode:
9848       if (req_mode == CCZmode)
9849 	return 0;
9850       /* FALLTHRU */
9851     case CCZmode:
9852       break;
9853 
9854     default:
9855       gcc_unreachable ();
9856     }
9857 
9858   return (GET_MODE (SET_SRC (set)) == set_mode);
9859 }
9860 
9861 /* Generate insn patterns to do an integer compare of OPERANDS.  */
9862 
9863 static rtx
9864 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9865 {
9866   enum machine_mode cmpmode;
9867   rtx tmp, flags;
9868 
9869   cmpmode = SELECT_CC_MODE (code, op0, op1);
9870   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9871 
9872   /* This is very simple, but making the interface the same as in the
9873      FP case makes the rest of the code easier.  */
9874   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9875   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9876 
9877   /* Return the test that should be put into the flags user, i.e.
9878      the bcc, scc, or cmov instruction.  */
9879   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9880 }
9881 
9882 /* Figure out whether to use ordered or unordered fp comparisons.
9883    Return the appropriate mode to use.  */
9884 
9885 enum machine_mode
9886 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9887 {
9888   /* ??? In order to make all comparisons reversible, we do all comparisons
9889      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9890      all forms trapping and nontrapping comparisons, we can make inequality
9891      comparisons trapping again, since it results in better code when using
9892      FCOM based compares.  */
9893   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9894 }
9895 
9896 enum machine_mode
9897 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9898 {
9899   if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9900     return ix86_fp_compare_mode (code);
9901   switch (code)
9902     {
9903       /* Only zero flag is needed.  */
9904     case EQ:			/* ZF=0 */
9905     case NE:			/* ZF!=0 */
9906       return CCZmode;
9907       /* Codes needing carry flag.  */
9908     case GEU:			/* CF=0 */
9909     case GTU:			/* CF=0 & ZF=0 */
9910     case LTU:			/* CF=1 */
9911     case LEU:			/* CF=1 | ZF=1 */
9912       return CCmode;
9913       /* Codes possibly doable only with sign flag when
9914          comparing against zero.  */
9915     case GE:			/* SF=OF   or   SF=0 */
9916     case LT:			/* SF<>OF  or   SF=1 */
9917       if (op1 == const0_rtx)
9918 	return CCGOCmode;
9919       else
9920 	/* For other cases Carry flag is not required.  */
9921 	return CCGCmode;
9922       /* Codes doable only with sign flag when comparing
9923          against zero, but we miss jump instruction for it
9924          so we need to use relational tests against overflow
9925          that thus needs to be zero.  */
9926     case GT:			/* ZF=0 & SF=OF */
9927     case LE:			/* ZF=1 | SF<>OF */
9928       if (op1 == const0_rtx)
9929 	return CCNOmode;
9930       else
9931 	return CCGCmode;
9932       /* strcmp pattern do (use flags) and combine may ask us for proper
9933 	 mode.  */
9934     case USE:
9935       return CCmode;
9936     default:
9937       gcc_unreachable ();
9938     }
9939 }
9940 
9941 /* Return the fixed registers used for condition codes.  */
9942 
9943 static bool
9944 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9945 {
9946   *p1 = FLAGS_REG;
9947   *p2 = FPSR_REG;
9948   return true;
9949 }
9950 
9951 /* If two condition code modes are compatible, return a condition code
9952    mode which is compatible with both.  Otherwise, return
9953    VOIDmode.  */
9954 
9955 static enum machine_mode
9956 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9957 {
9958   if (m1 == m2)
9959     return m1;
9960 
9961   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9962     return VOIDmode;
9963 
9964   if ((m1 == CCGCmode && m2 == CCGOCmode)
9965       || (m1 == CCGOCmode && m2 == CCGCmode))
9966     return CCGCmode;
9967 
9968   switch (m1)
9969     {
9970     default:
9971       gcc_unreachable ();
9972 
9973     case CCmode:
9974     case CCGCmode:
9975     case CCGOCmode:
9976     case CCNOmode:
9977     case CCZmode:
9978       switch (m2)
9979 	{
9980 	default:
9981 	  return VOIDmode;
9982 
9983 	case CCmode:
9984 	case CCGCmode:
9985 	case CCGOCmode:
9986 	case CCNOmode:
9987 	case CCZmode:
9988 	  return CCmode;
9989 	}
9990 
9991     case CCFPmode:
9992     case CCFPUmode:
9993       /* These are only compatible with themselves, which we already
9994 	 checked above.  */
9995       return VOIDmode;
9996     }
9997 }
9998 
9999 /* Return true if we should use an FCOMI instruction for this fp comparison.  */
10000 
10001 int
10002 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10003 {
10004   enum rtx_code swapped_code = swap_condition (code);
10005   return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10006 	  || (ix86_fp_comparison_cost (swapped_code)
10007 	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
10008 }
10009 
10010 /* Swap, force into registers, or otherwise massage the two operands
10011    to a fp comparison.  The operands are updated in place; the new
10012    comparison code is returned.  */
10013 
10014 static enum rtx_code
10015 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10016 {
10017   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10018   rtx op0 = *pop0, op1 = *pop1;
10019   enum machine_mode op_mode = GET_MODE (op0);
10020   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10021 
10022   /* All of the unordered compare instructions only work on registers.
10023      The same is true of the fcomi compare instructions.  The XFmode
10024      compare instructions require registers except when comparing
10025      against zero or when converting operand 1 from fixed point to
10026      floating point.  */
10027 
10028   if (!is_sse
10029       && (fpcmp_mode == CCFPUmode
10030 	  || (op_mode == XFmode
10031 	      && ! (standard_80387_constant_p (op0) == 1
10032 		    || standard_80387_constant_p (op1) == 1)
10033 	      && GET_CODE (op1) != FLOAT)
10034 	  || ix86_use_fcomi_compare (code)))
10035     {
10036       op0 = force_reg (op_mode, op0);
10037       op1 = force_reg (op_mode, op1);
10038     }
10039   else
10040     {
10041       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
10042 	 things around if they appear profitable, otherwise force op0
10043 	 into a register.  */
10044 
10045       if (standard_80387_constant_p (op0) == 0
10046 	  || (GET_CODE (op0) == MEM
10047 	      && ! (standard_80387_constant_p (op1) == 0
10048 		    || GET_CODE (op1) == MEM)))
10049 	{
10050 	  rtx tmp;
10051 	  tmp = op0, op0 = op1, op1 = tmp;
10052 	  code = swap_condition (code);
10053 	}
10054 
10055       if (GET_CODE (op0) != REG)
10056 	op0 = force_reg (op_mode, op0);
10057 
10058       if (CONSTANT_P (op1))
10059 	{
10060 	  int tmp = standard_80387_constant_p (op1);
10061 	  if (tmp == 0)
10062 	    op1 = validize_mem (force_const_mem (op_mode, op1));
10063 	  else if (tmp == 1)
10064 	    {
10065 	      if (TARGET_CMOVE)
10066 		op1 = force_reg (op_mode, op1);
10067 	    }
10068 	  else
10069 	    op1 = force_reg (op_mode, op1);
10070 	}
10071     }
10072 
10073   /* Try to rearrange the comparison to make it cheaper.  */
10074   if (ix86_fp_comparison_cost (code)
10075       > ix86_fp_comparison_cost (swap_condition (code))
10076       && (GET_CODE (op1) == REG || !no_new_pseudos))
10077     {
10078       rtx tmp;
10079       tmp = op0, op0 = op1, op1 = tmp;
10080       code = swap_condition (code);
10081       if (GET_CODE (op0) != REG)
10082 	op0 = force_reg (op_mode, op0);
10083     }
10084 
10085   *pop0 = op0;
10086   *pop1 = op1;
10087   return code;
10088 }
10089 
10090 /* Convert comparison codes we use to represent FP comparison to integer
10091    code that will result in proper branch.  Return UNKNOWN if no such code
10092    is available.  */
10093 
10094 enum rtx_code
10095 ix86_fp_compare_code_to_integer (enum rtx_code code)
10096 {
10097   switch (code)
10098     {
10099     case GT:
10100       return GTU;
10101     case GE:
10102       return GEU;
10103     case ORDERED:
10104     case UNORDERED:
10105       return code;
10106       break;
10107     case UNEQ:
10108       return EQ;
10109       break;
10110     case UNLT:
10111       return LTU;
10112       break;
10113     case UNLE:
10114       return LEU;
10115       break;
10116     case LTGT:
10117       return NE;
10118       break;
10119     default:
10120       return UNKNOWN;
10121     }
10122 }
10123 
10124 /* Split comparison code CODE into comparisons we can do using branch
10125    instructions.  BYPASS_CODE is comparison code for branch that will
10126    branch around FIRST_CODE and SECOND_CODE.  If some of branches
10127    is not required, set value to UNKNOWN.
10128    We never require more than two branches.  */
10129 
10130 void
10131 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10132 			  enum rtx_code *first_code,
10133 			  enum rtx_code *second_code)
10134 {
10135   *first_code = code;
10136   *bypass_code = UNKNOWN;
10137   *second_code = UNKNOWN;
10138 
10139   /* The fcomi comparison sets flags as follows:
10140 
10141      cmp    ZF PF CF
10142      >      0  0  0
10143      <      0  0  1
10144      =      1  0  0
10145      un     1  1  1 */
10146 
10147   switch (code)
10148     {
10149     case GT:			/* GTU - CF=0 & ZF=0 */
10150     case GE:			/* GEU - CF=0 */
10151     case ORDERED:		/* PF=0 */
10152     case UNORDERED:		/* PF=1 */
10153     case UNEQ:			/* EQ - ZF=1 */
10154     case UNLT:			/* LTU - CF=1 */
10155     case UNLE:			/* LEU - CF=1 | ZF=1 */
10156     case LTGT:			/* EQ - ZF=0 */
10157       break;
10158     case LT:			/* LTU - CF=1 - fails on unordered */
10159       *first_code = UNLT;
10160       *bypass_code = UNORDERED;
10161       break;
10162     case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
10163       *first_code = UNLE;
10164       *bypass_code = UNORDERED;
10165       break;
10166     case EQ:			/* EQ - ZF=1 - fails on unordered */
10167       *first_code = UNEQ;
10168       *bypass_code = UNORDERED;
10169       break;
10170     case NE:			/* NE - ZF=0 - fails on unordered */
10171       *first_code = LTGT;
10172       *second_code = UNORDERED;
10173       break;
10174     case UNGE:			/* GEU - CF=0 - fails on unordered */
10175       *first_code = GE;
10176       *second_code = UNORDERED;
10177       break;
10178     case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
10179       *first_code = GT;
10180       *second_code = UNORDERED;
10181       break;
10182     default:
10183       gcc_unreachable ();
10184     }
10185   if (!TARGET_IEEE_FP)
10186     {
10187       *second_code = UNKNOWN;
10188       *bypass_code = UNKNOWN;
10189     }
10190 }
10191 
10192 /* Return cost of comparison done fcom + arithmetics operations on AX.
10193    All following functions do use number of instructions as a cost metrics.
10194    In future this should be tweaked to compute bytes for optimize_size and
10195    take into account performance of various instructions on various CPUs.  */
10196 static int
10197 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10198 {
10199   if (!TARGET_IEEE_FP)
10200     return 4;
10201   /* The cost of code output by ix86_expand_fp_compare.  */
10202   switch (code)
10203     {
10204     case UNLE:
10205     case UNLT:
10206     case LTGT:
10207     case GT:
10208     case GE:
10209     case UNORDERED:
10210     case ORDERED:
10211     case UNEQ:
10212       return 4;
10213       break;
10214     case LT:
10215     case NE:
10216     case EQ:
10217     case UNGE:
10218       return 5;
10219       break;
10220     case LE:
10221     case UNGT:
10222       return 6;
10223       break;
10224     default:
10225       gcc_unreachable ();
10226     }
10227 }
10228 
10229 /* Return cost of comparison done using fcomi operation.
10230    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10231 static int
10232 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10233 {
10234   enum rtx_code bypass_code, first_code, second_code;
10235   /* Return arbitrarily high cost when instruction is not supported - this
10236      prevents gcc from using it.  */
10237   if (!TARGET_CMOVE)
10238     return 1024;
10239   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10240   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10241 }
10242 
10243 /* Return cost of comparison done using sahf operation.
10244    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10245 static int
10246 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10247 {
10248   enum rtx_code bypass_code, first_code, second_code;
10249   /* Return arbitrarily high cost when instruction is not preferred - this
10250      avoids gcc from using it.  */
10251   if (!TARGET_USE_SAHF && !optimize_size)
10252     return 1024;
10253   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10254   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10255 }
10256 
10257 /* Compute cost of the comparison done using any method.
10258    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10259 static int
10260 ix86_fp_comparison_cost (enum rtx_code code)
10261 {
10262   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10263   int min;
10264 
10265   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10266   sahf_cost = ix86_fp_comparison_sahf_cost (code);
10267 
10268   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10269   if (min > sahf_cost)
10270     min = sahf_cost;
10271   if (min > fcomi_cost)
10272     min = fcomi_cost;
10273   return min;
10274 }
10275 
10276 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
10277 
10278 static rtx
10279 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10280 			rtx *second_test, rtx *bypass_test)
10281 {
10282   enum machine_mode fpcmp_mode, intcmp_mode;
10283   rtx tmp, tmp2;
10284   int cost = ix86_fp_comparison_cost (code);
10285   enum rtx_code bypass_code, first_code, second_code;
10286 
10287   fpcmp_mode = ix86_fp_compare_mode (code);
10288   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10289 
10290   if (second_test)
10291     *second_test = NULL_RTX;
10292   if (bypass_test)
10293     *bypass_test = NULL_RTX;
10294 
10295   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10296 
10297   /* Do fcomi/sahf based test when profitable.  */
10298   if ((bypass_code == UNKNOWN || bypass_test)
10299       && (second_code == UNKNOWN || second_test)
10300       && ix86_fp_comparison_arithmetics_cost (code) > cost)
10301     {
10302       if (TARGET_CMOVE)
10303 	{
10304 	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10305 	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10306 			     tmp);
10307 	  emit_insn (tmp);
10308 	}
10309       else
10310 	{
10311 	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10312 	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10313 	  if (!scratch)
10314 	    scratch = gen_reg_rtx (HImode);
10315 	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10316 	  emit_insn (gen_x86_sahf_1 (scratch));
10317 	}
10318 
10319       /* The FP codes work out to act like unsigned.  */
10320       intcmp_mode = fpcmp_mode;
10321       code = first_code;
10322       if (bypass_code != UNKNOWN)
10323 	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10324 				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10325 				       const0_rtx);
10326       if (second_code != UNKNOWN)
10327 	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10328 				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10329 				       const0_rtx);
10330     }
10331   else
10332     {
10333       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10334       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10335       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10336       if (!scratch)
10337 	scratch = gen_reg_rtx (HImode);
10338       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10339 
10340       /* In the unordered case, we have to check C2 for NaN's, which
10341 	 doesn't happen to work out to anything nice combination-wise.
10342 	 So do some bit twiddling on the value we've got in AH to come
10343 	 up with an appropriate set of condition codes.  */
10344 
10345       intcmp_mode = CCNOmode;
10346       switch (code)
10347 	{
10348 	case GT:
10349 	case UNGT:
10350 	  if (code == GT || !TARGET_IEEE_FP)
10351 	    {
10352 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10353 	      code = EQ;
10354 	    }
10355 	  else
10356 	    {
10357 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10358 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10359 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10360 	      intcmp_mode = CCmode;
10361 	      code = GEU;
10362 	    }
10363 	  break;
10364 	case LT:
10365 	case UNLT:
10366 	  if (code == LT && TARGET_IEEE_FP)
10367 	    {
10368 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10369 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10370 	      intcmp_mode = CCmode;
10371 	      code = EQ;
10372 	    }
10373 	  else
10374 	    {
10375 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10376 	      code = NE;
10377 	    }
10378 	  break;
10379 	case GE:
10380 	case UNGE:
10381 	  if (code == GE || !TARGET_IEEE_FP)
10382 	    {
10383 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10384 	      code = EQ;
10385 	    }
10386 	  else
10387 	    {
10388 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10389 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10390 					     GEN_INT (0x01)));
10391 	      code = NE;
10392 	    }
10393 	  break;
10394 	case LE:
10395 	case UNLE:
10396 	  if (code == LE && TARGET_IEEE_FP)
10397 	    {
10398 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10399 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10400 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10401 	      intcmp_mode = CCmode;
10402 	      code = LTU;
10403 	    }
10404 	  else
10405 	    {
10406 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10407 	      code = NE;
10408 	    }
10409 	  break;
10410 	case EQ:
10411 	case UNEQ:
10412 	  if (code == EQ && TARGET_IEEE_FP)
10413 	    {
10414 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10415 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10416 	      intcmp_mode = CCmode;
10417 	      code = EQ;
10418 	    }
10419 	  else
10420 	    {
10421 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10422 	      code = NE;
10423 	      break;
10424 	    }
10425 	  break;
10426 	case NE:
10427 	case LTGT:
10428 	  if (code == NE && TARGET_IEEE_FP)
10429 	    {
10430 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10431 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10432 					     GEN_INT (0x40)));
10433 	      code = NE;
10434 	    }
10435 	  else
10436 	    {
10437 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10438 	      code = EQ;
10439 	    }
10440 	  break;
10441 
10442 	case UNORDERED:
10443 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10444 	  code = NE;
10445 	  break;
10446 	case ORDERED:
10447 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10448 	  code = EQ;
10449 	  break;
10450 
10451 	default:
10452 	  gcc_unreachable ();
10453 	}
10454     }
10455 
10456   /* Return the test that should be put into the flags user, i.e.
10457      the bcc, scc, or cmov instruction.  */
10458   return gen_rtx_fmt_ee (code, VOIDmode,
10459 			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10460 			 const0_rtx);
10461 }
10462 
10463 rtx
10464 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10465 {
10466   rtx op0, op1, ret;
10467   op0 = ix86_compare_op0;
10468   op1 = ix86_compare_op1;
10469 
10470   if (second_test)
10471     *second_test = NULL_RTX;
10472   if (bypass_test)
10473     *bypass_test = NULL_RTX;
10474 
10475   if (ix86_compare_emitted)
10476     {
10477       ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10478       ix86_compare_emitted = NULL_RTX;
10479     }
10480   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10481     ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10482 				  second_test, bypass_test);
10483   else
10484     ret = ix86_expand_int_compare (code, op0, op1);
10485 
10486   return ret;
10487 }
10488 
10489 /* Return true if the CODE will result in nontrivial jump sequence.  */
10490 bool
10491 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10492 {
10493   enum rtx_code bypass_code, first_code, second_code;
10494   if (!TARGET_CMOVE)
10495     return true;
10496   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10497   return bypass_code != UNKNOWN || second_code != UNKNOWN;
10498 }
10499 
10500 void
10501 ix86_expand_branch (enum rtx_code code, rtx label)
10502 {
10503   rtx tmp;
10504 
10505   /* If we have emitted a compare insn, go straight to simple.
10506      ix86_expand_compare won't emit anything if ix86_compare_emitted
10507      is non NULL.  */
10508   if (ix86_compare_emitted)
10509     goto simple;
10510 
10511   switch (GET_MODE (ix86_compare_op0))
10512     {
10513     case QImode:
10514     case HImode:
10515     case SImode:
10516       simple:
10517       tmp = ix86_expand_compare (code, NULL, NULL);
10518       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10519 				  gen_rtx_LABEL_REF (VOIDmode, label),
10520 				  pc_rtx);
10521       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10522       return;
10523 
10524     case SFmode:
10525     case DFmode:
10526     case XFmode:
10527       {
10528 	rtvec vec;
10529 	int use_fcomi;
10530 	enum rtx_code bypass_code, first_code, second_code;
10531 
10532 	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10533 					     &ix86_compare_op1);
10534 
10535 	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10536 
10537 	/* Check whether we will use the natural sequence with one jump.  If
10538 	   so, we can expand jump early.  Otherwise delay expansion by
10539 	   creating compound insn to not confuse optimizers.  */
10540 	if (bypass_code == UNKNOWN && second_code == UNKNOWN
10541 	    && TARGET_CMOVE)
10542 	  {
10543 	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10544 				  gen_rtx_LABEL_REF (VOIDmode, label),
10545 				  pc_rtx, NULL_RTX, NULL_RTX);
10546 	  }
10547 	else
10548 	  {
10549 	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
10550 				  ix86_compare_op0, ix86_compare_op1);
10551 	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10552 					gen_rtx_LABEL_REF (VOIDmode, label),
10553 					pc_rtx);
10554 	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10555 
10556 	    use_fcomi = ix86_use_fcomi_compare (code);
10557 	    vec = rtvec_alloc (3 + !use_fcomi);
10558 	    RTVEC_ELT (vec, 0) = tmp;
10559 	    RTVEC_ELT (vec, 1)
10560 	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10561 	    RTVEC_ELT (vec, 2)
10562 	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10563 	    if (! use_fcomi)
10564 	      RTVEC_ELT (vec, 3)
10565 		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10566 
10567 	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10568 	  }
10569 	return;
10570       }
10571 
10572     case DImode:
10573       if (TARGET_64BIT)
10574 	goto simple;
10575     case TImode:
10576       /* Expand DImode branch into multiple compare+branch.  */
10577       {
10578 	rtx lo[2], hi[2], label2;
10579 	enum rtx_code code1, code2, code3;
10580 	enum machine_mode submode;
10581 
10582 	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10583 	  {
10584 	    tmp = ix86_compare_op0;
10585 	    ix86_compare_op0 = ix86_compare_op1;
10586 	    ix86_compare_op1 = tmp;
10587 	    code = swap_condition (code);
10588 	  }
10589 	if (GET_MODE (ix86_compare_op0) == DImode)
10590 	  {
10591 	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10592 	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10593 	    submode = SImode;
10594 	  }
10595 	else
10596 	  {
10597 	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10598 	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10599 	    submode = DImode;
10600 	  }
10601 
10602 	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10603 	   avoid two branches.  This costs one extra insn, so disable when
10604 	   optimizing for size.  */
10605 
10606 	if ((code == EQ || code == NE)
10607 	    && (!optimize_size
10608 	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
10609 	  {
10610 	    rtx xor0, xor1;
10611 
10612 	    xor1 = hi[0];
10613 	    if (hi[1] != const0_rtx)
10614 	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10615 				   NULL_RTX, 0, OPTAB_WIDEN);
10616 
10617 	    xor0 = lo[0];
10618 	    if (lo[1] != const0_rtx)
10619 	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10620 				   NULL_RTX, 0, OPTAB_WIDEN);
10621 
10622 	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
10623 				NULL_RTX, 0, OPTAB_WIDEN);
10624 
10625 	    ix86_compare_op0 = tmp;
10626 	    ix86_compare_op1 = const0_rtx;
10627 	    ix86_expand_branch (code, label);
10628 	    return;
10629 	  }
10630 
10631 	/* Otherwise, if we are doing less-than or greater-or-equal-than,
10632 	   op1 is a constant and the low word is zero, then we can just
10633 	   examine the high word.  */
10634 
10635 	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10636 	  switch (code)
10637 	    {
10638 	    case LT: case LTU: case GE: case GEU:
10639 	      ix86_compare_op0 = hi[0];
10640 	      ix86_compare_op1 = hi[1];
10641 	      ix86_expand_branch (code, label);
10642 	      return;
10643 	    default:
10644 	      break;
10645 	    }
10646 
10647 	/* Otherwise, we need two or three jumps.  */
10648 
10649 	label2 = gen_label_rtx ();
10650 
10651 	code1 = code;
10652 	code2 = swap_condition (code);
10653 	code3 = unsigned_condition (code);
10654 
10655 	switch (code)
10656 	  {
10657 	  case LT: case GT: case LTU: case GTU:
10658 	    break;
10659 
10660 	  case LE:   code1 = LT;  code2 = GT;  break;
10661 	  case GE:   code1 = GT;  code2 = LT;  break;
10662 	  case LEU:  code1 = LTU; code2 = GTU; break;
10663 	  case GEU:  code1 = GTU; code2 = LTU; break;
10664 
10665 	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10666 	  case NE:   code2 = UNKNOWN; break;
10667 
10668 	  default:
10669 	    gcc_unreachable ();
10670 	  }
10671 
10672 	/*
10673 	 * a < b =>
10674 	 *    if (hi(a) < hi(b)) goto true;
10675 	 *    if (hi(a) > hi(b)) goto false;
10676 	 *    if (lo(a) < lo(b)) goto true;
10677 	 *  false:
10678 	 */
10679 
10680 	ix86_compare_op0 = hi[0];
10681 	ix86_compare_op1 = hi[1];
10682 
10683 	if (code1 != UNKNOWN)
10684 	  ix86_expand_branch (code1, label);
10685 	if (code2 != UNKNOWN)
10686 	  ix86_expand_branch (code2, label2);
10687 
10688 	ix86_compare_op0 = lo[0];
10689 	ix86_compare_op1 = lo[1];
10690 	ix86_expand_branch (code3, label);
10691 
10692 	if (code2 != UNKNOWN)
10693 	  emit_label (label2);
10694 	return;
10695       }
10696 
10697     default:
10698       gcc_unreachable ();
10699     }
10700 }
10701 
10702 /* Split branch based on floating point condition.  */
10703 void
10704 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10705 		      rtx target1, rtx target2, rtx tmp, rtx pushed)
10706 {
10707   rtx second, bypass;
10708   rtx label = NULL_RTX;
10709   rtx condition;
10710   int bypass_probability = -1, second_probability = -1, probability = -1;
10711   rtx i;
10712 
10713   if (target2 != pc_rtx)
10714     {
10715       rtx tmp = target2;
10716       code = reverse_condition_maybe_unordered (code);
10717       target2 = target1;
10718       target1 = tmp;
10719     }
10720 
10721   condition = ix86_expand_fp_compare (code, op1, op2,
10722 				      tmp, &second, &bypass);
10723 
10724   /* Remove pushed operand from stack.  */
10725   if (pushed)
10726     ix86_free_from_memory (GET_MODE (pushed));
10727 
10728   if (split_branch_probability >= 0)
10729     {
10730       /* Distribute the probabilities across the jumps.
10731 	 Assume the BYPASS and SECOND to be always test
10732 	 for UNORDERED.  */
10733       probability = split_branch_probability;
10734 
10735       /* Value of 1 is low enough to make no need for probability
10736 	 to be updated.  Later we may run some experiments and see
10737 	 if unordered values are more frequent in practice.  */
10738       if (bypass)
10739 	bypass_probability = 1;
10740       if (second)
10741 	second_probability = 1;
10742     }
10743   if (bypass != NULL_RTX)
10744     {
10745       label = gen_label_rtx ();
10746       i = emit_jump_insn (gen_rtx_SET
10747 			  (VOIDmode, pc_rtx,
10748 			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10749 						 bypass,
10750 						 gen_rtx_LABEL_REF (VOIDmode,
10751 								    label),
10752 						 pc_rtx)));
10753       if (bypass_probability >= 0)
10754 	REG_NOTES (i)
10755 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10756 			       GEN_INT (bypass_probability),
10757 			       REG_NOTES (i));
10758     }
10759   i = emit_jump_insn (gen_rtx_SET
10760 		      (VOIDmode, pc_rtx,
10761 		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10762 					     condition, target1, target2)));
10763   if (probability >= 0)
10764     REG_NOTES (i)
10765       = gen_rtx_EXPR_LIST (REG_BR_PROB,
10766 			   GEN_INT (probability),
10767 			   REG_NOTES (i));
10768   if (second != NULL_RTX)
10769     {
10770       i = emit_jump_insn (gen_rtx_SET
10771 			  (VOIDmode, pc_rtx,
10772 			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10773 						 target2)));
10774       if (second_probability >= 0)
10775 	REG_NOTES (i)
10776 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10777 			       GEN_INT (second_probability),
10778 			       REG_NOTES (i));
10779     }
10780   if (label != NULL_RTX)
10781     emit_label (label);
10782 }
10783 
10784 int
10785 ix86_expand_setcc (enum rtx_code code, rtx dest)
10786 {
10787   rtx ret, tmp, tmpreg, equiv;
10788   rtx second_test, bypass_test;
10789 
10790   if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10791     return 0; /* FAIL */
10792 
10793   gcc_assert (GET_MODE (dest) == QImode);
10794 
10795   ret = ix86_expand_compare (code, &second_test, &bypass_test);
10796   PUT_MODE (ret, QImode);
10797 
10798   tmp = dest;
10799   tmpreg = dest;
10800 
10801   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10802   if (bypass_test || second_test)
10803     {
10804       rtx test = second_test;
10805       int bypass = 0;
10806       rtx tmp2 = gen_reg_rtx (QImode);
10807       if (bypass_test)
10808 	{
10809 	  gcc_assert (!second_test);
10810 	  test = bypass_test;
10811 	  bypass = 1;
10812 	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10813 	}
10814       PUT_MODE (test, QImode);
10815       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10816 
10817       if (bypass)
10818 	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10819       else
10820 	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10821     }
10822 
10823   /* Attach a REG_EQUAL note describing the comparison result.  */
10824   if (ix86_compare_op0 && ix86_compare_op1)
10825     {
10826       equiv = simplify_gen_relational (code, QImode,
10827 				       GET_MODE (ix86_compare_op0),
10828 				       ix86_compare_op0, ix86_compare_op1);
10829       set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10830     }
10831 
10832   return 1; /* DONE */
10833 }
10834 
10835 /* Expand comparison setting or clearing carry flag.  Return true when
10836    successful and set pop for the operation.  */
10837 static bool
10838 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10839 {
10840   enum machine_mode mode =
10841     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10842 
10843   /* Do not handle DImode compares that go through special path.  Also we can't
10844      deal with FP compares yet.  This is possible to add.  */
10845   if (mode == (TARGET_64BIT ? TImode : DImode))
10846     return false;
10847   if (FLOAT_MODE_P (mode))
10848     {
10849       rtx second_test = NULL, bypass_test = NULL;
10850       rtx compare_op, compare_seq;
10851 
10852       /* Shortcut:  following common codes never translate into carry flag compares.  */
10853       if (code == EQ || code == NE || code == UNEQ || code == LTGT
10854 	  || code == ORDERED || code == UNORDERED)
10855 	return false;
10856 
10857       /* These comparisons require zero flag; swap operands so they won't.  */
10858       if ((code == GT || code == UNLE || code == LE || code == UNGT)
10859 	  && !TARGET_IEEE_FP)
10860 	{
10861 	  rtx tmp = op0;
10862 	  op0 = op1;
10863 	  op1 = tmp;
10864 	  code = swap_condition (code);
10865 	}
10866 
10867       /* Try to expand the comparison and verify that we end up with carry flag
10868 	 based comparison.  This is fails to be true only when we decide to expand
10869 	 comparison using arithmetic that is not too common scenario.  */
10870       start_sequence ();
10871       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10872 					   &second_test, &bypass_test);
10873       compare_seq = get_insns ();
10874       end_sequence ();
10875 
10876       if (second_test || bypass_test)
10877 	return false;
10878       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10879 	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10880         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10881       else
10882 	code = GET_CODE (compare_op);
10883       if (code != LTU && code != GEU)
10884 	return false;
10885       emit_insn (compare_seq);
10886       *pop = compare_op;
10887       return true;
10888     }
10889   if (!INTEGRAL_MODE_P (mode))
10890     return false;
10891   switch (code)
10892     {
10893     case LTU:
10894     case GEU:
10895       break;
10896 
10897     /* Convert a==0 into (unsigned)a<1.  */
10898     case EQ:
10899     case NE:
10900       if (op1 != const0_rtx)
10901 	return false;
10902       op1 = const1_rtx;
10903       code = (code == EQ ? LTU : GEU);
10904       break;
10905 
10906     /* Convert a>b into b<a or a>=b-1.  */
10907     case GTU:
10908     case LEU:
10909       if (GET_CODE (op1) == CONST_INT)
10910 	{
10911 	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10912 	  /* Bail out on overflow.  We still can swap operands but that
10913 	     would force loading of the constant into register.  */
10914 	  if (op1 == const0_rtx
10915 	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10916 	    return false;
10917 	  code = (code == GTU ? GEU : LTU);
10918 	}
10919       else
10920 	{
10921 	  rtx tmp = op1;
10922 	  op1 = op0;
10923 	  op0 = tmp;
10924 	  code = (code == GTU ? LTU : GEU);
10925 	}
10926       break;
10927 
10928     /* Convert a>=0 into (unsigned)a<0x80000000.  */
10929     case LT:
10930     case GE:
10931       if (mode == DImode || op1 != const0_rtx)
10932 	return false;
10933       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10934       code = (code == LT ? GEU : LTU);
10935       break;
10936     case LE:
10937     case GT:
10938       if (mode == DImode || op1 != constm1_rtx)
10939 	return false;
10940       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10941       code = (code == LE ? GEU : LTU);
10942       break;
10943 
10944     default:
10945       return false;
10946     }
10947   /* Swapping operands may cause constant to appear as first operand.  */
10948   if (!nonimmediate_operand (op0, VOIDmode))
10949     {
10950       if (no_new_pseudos)
10951 	return false;
10952       op0 = force_reg (mode, op0);
10953     }
10954   ix86_compare_op0 = op0;
10955   ix86_compare_op1 = op1;
10956   *pop = ix86_expand_compare (code, NULL, NULL);
10957   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10958   return true;
10959 }
10960 
10961 int
10962 ix86_expand_int_movcc (rtx operands[])
10963 {
10964   enum rtx_code code = GET_CODE (operands[1]), compare_code;
10965   rtx compare_seq, compare_op;
10966   rtx second_test, bypass_test;
10967   enum machine_mode mode = GET_MODE (operands[0]);
10968   bool sign_bit_compare_p = false;;
10969 
10970   start_sequence ();
10971   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10972   compare_seq = get_insns ();
10973   end_sequence ();
10974 
10975   compare_code = GET_CODE (compare_op);
10976 
10977   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10978       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10979     sign_bit_compare_p = true;
10980 
10981   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10982      HImode insns, we'd be swallowed in word prefix ops.  */
10983 
10984   if ((mode != HImode || TARGET_FAST_PREFIX)
10985       && (mode != (TARGET_64BIT ? TImode : DImode))
10986       && GET_CODE (operands[2]) == CONST_INT
10987       && GET_CODE (operands[3]) == CONST_INT)
10988     {
10989       rtx out = operands[0];
10990       HOST_WIDE_INT ct = INTVAL (operands[2]);
10991       HOST_WIDE_INT cf = INTVAL (operands[3]);
10992       HOST_WIDE_INT diff;
10993 
10994       diff = ct - cf;
10995       /*  Sign bit compares are better done using shifts than we do by using
10996 	  sbb.  */
10997       if (sign_bit_compare_p
10998 	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10999 					     ix86_compare_op1, &compare_op))
11000 	{
11001 	  /* Detect overlap between destination and compare sources.  */
11002 	  rtx tmp = out;
11003 
11004           if (!sign_bit_compare_p)
11005 	    {
11006 	      bool fpcmp = false;
11007 
11008 	      compare_code = GET_CODE (compare_op);
11009 
11010 	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11011 		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11012 		{
11013 		  fpcmp = true;
11014 		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
11015 		}
11016 
11017 	      /* To simplify rest of code, restrict to the GEU case.  */
11018 	      if (compare_code == LTU)
11019 		{
11020 		  HOST_WIDE_INT tmp = ct;
11021 		  ct = cf;
11022 		  cf = tmp;
11023 		  compare_code = reverse_condition (compare_code);
11024 		  code = reverse_condition (code);
11025 		}
11026 	      else
11027 		{
11028 		  if (fpcmp)
11029 		    PUT_CODE (compare_op,
11030 			      reverse_condition_maybe_unordered
11031 			        (GET_CODE (compare_op)));
11032 		  else
11033 		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11034 		}
11035 	      diff = ct - cf;
11036 
11037 	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11038 		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
11039 		tmp = gen_reg_rtx (mode);
11040 
11041 	      if (mode == DImode)
11042 		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11043 	      else
11044 		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11045 	    }
11046 	  else
11047 	    {
11048 	      if (code == GT || code == GE)
11049 		code = reverse_condition (code);
11050 	      else
11051 		{
11052 		  HOST_WIDE_INT tmp = ct;
11053 		  ct = cf;
11054 		  cf = tmp;
11055 		  diff = ct - cf;
11056 		}
11057 	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11058 				     ix86_compare_op1, VOIDmode, 0, -1);
11059 	    }
11060 
11061 	  if (diff == 1)
11062 	    {
11063 	      /*
11064 	       * cmpl op0,op1
11065 	       * sbbl dest,dest
11066 	       * [addl dest, ct]
11067 	       *
11068 	       * Size 5 - 8.
11069 	       */
11070 	      if (ct)
11071 		tmp = expand_simple_binop (mode, PLUS,
11072 					   tmp, GEN_INT (ct),
11073 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11074 	    }
11075 	  else if (cf == -1)
11076 	    {
11077 	      /*
11078 	       * cmpl op0,op1
11079 	       * sbbl dest,dest
11080 	       * orl $ct, dest
11081 	       *
11082 	       * Size 8.
11083 	       */
11084 	      tmp = expand_simple_binop (mode, IOR,
11085 					 tmp, GEN_INT (ct),
11086 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11087 	    }
11088 	  else if (diff == -1 && ct)
11089 	    {
11090 	      /*
11091 	       * cmpl op0,op1
11092 	       * sbbl dest,dest
11093 	       * notl dest
11094 	       * [addl dest, cf]
11095 	       *
11096 	       * Size 8 - 11.
11097 	       */
11098 	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11099 	      if (cf)
11100 		tmp = expand_simple_binop (mode, PLUS,
11101 					   copy_rtx (tmp), GEN_INT (cf),
11102 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11103 	    }
11104 	  else
11105 	    {
11106 	      /*
11107 	       * cmpl op0,op1
11108 	       * sbbl dest,dest
11109 	       * [notl dest]
11110 	       * andl cf - ct, dest
11111 	       * [addl dest, ct]
11112 	       *
11113 	       * Size 8 - 11.
11114 	       */
11115 
11116 	      if (cf == 0)
11117 		{
11118 		  cf = ct;
11119 		  ct = 0;
11120 		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11121 		}
11122 
11123 	      tmp = expand_simple_binop (mode, AND,
11124 					 copy_rtx (tmp),
11125 					 gen_int_mode (cf - ct, mode),
11126 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11127 	      if (ct)
11128 		tmp = expand_simple_binop (mode, PLUS,
11129 					   copy_rtx (tmp), GEN_INT (ct),
11130 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11131 	    }
11132 
11133 	  if (!rtx_equal_p (tmp, out))
11134 	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11135 
11136 	  return 1; /* DONE */
11137 	}
11138 
11139       if (diff < 0)
11140 	{
11141 	  HOST_WIDE_INT tmp;
11142 	  tmp = ct, ct = cf, cf = tmp;
11143 	  diff = -diff;
11144 	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11145 	    {
11146 	      /* We may be reversing unordered compare to normal compare, that
11147 		 is not valid in general (we may convert non-trapping condition
11148 		 to trapping one), however on i386 we currently emit all
11149 		 comparisons unordered.  */
11150 	      compare_code = reverse_condition_maybe_unordered (compare_code);
11151 	      code = reverse_condition_maybe_unordered (code);
11152 	    }
11153 	  else
11154 	    {
11155 	      compare_code = reverse_condition (compare_code);
11156 	      code = reverse_condition (code);
11157 	    }
11158 	}
11159 
11160       compare_code = UNKNOWN;
11161       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11162 	  && GET_CODE (ix86_compare_op1) == CONST_INT)
11163 	{
11164 	  if (ix86_compare_op1 == const0_rtx
11165 	      && (code == LT || code == GE))
11166 	    compare_code = code;
11167 	  else if (ix86_compare_op1 == constm1_rtx)
11168 	    {
11169 	      if (code == LE)
11170 		compare_code = LT;
11171 	      else if (code == GT)
11172 		compare_code = GE;
11173 	    }
11174 	}
11175 
11176       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11177       if (compare_code != UNKNOWN
11178 	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11179 	  && (cf == -1 || ct == -1))
11180 	{
11181 	  /* If lea code below could be used, only optimize
11182 	     if it results in a 2 insn sequence.  */
11183 
11184 	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11185 		 || diff == 3 || diff == 5 || diff == 9)
11186 	      || (compare_code == LT && ct == -1)
11187 	      || (compare_code == GE && cf == -1))
11188 	    {
11189 	      /*
11190 	       * notl op1	(if necessary)
11191 	       * sarl $31, op1
11192 	       * orl cf, op1
11193 	       */
11194 	      if (ct != -1)
11195 		{
11196 		  cf = ct;
11197 		  ct = -1;
11198 		  code = reverse_condition (code);
11199 		}
11200 
11201 	      out = emit_store_flag (out, code, ix86_compare_op0,
11202 				     ix86_compare_op1, VOIDmode, 0, -1);
11203 
11204 	      out = expand_simple_binop (mode, IOR,
11205 					 out, GEN_INT (cf),
11206 					 out, 1, OPTAB_DIRECT);
11207 	      if (out != operands[0])
11208 		emit_move_insn (operands[0], out);
11209 
11210 	      return 1; /* DONE */
11211 	    }
11212 	}
11213 
11214 
11215       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11216 	   || diff == 3 || diff == 5 || diff == 9)
11217 	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11218 	  && (mode != DImode
11219 	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11220 	{
11221 	  /*
11222 	   * xorl dest,dest
11223 	   * cmpl op1,op2
11224 	   * setcc dest
11225 	   * lea cf(dest*(ct-cf)),dest
11226 	   *
11227 	   * Size 14.
11228 	   *
11229 	   * This also catches the degenerate setcc-only case.
11230 	   */
11231 
11232 	  rtx tmp;
11233 	  int nops;
11234 
11235 	  out = emit_store_flag (out, code, ix86_compare_op0,
11236 				 ix86_compare_op1, VOIDmode, 0, 1);
11237 
11238 	  nops = 0;
11239 	  /* On x86_64 the lea instruction operates on Pmode, so we need
11240 	     to get arithmetics done in proper mode to match.  */
11241 	  if (diff == 1)
11242 	    tmp = copy_rtx (out);
11243 	  else
11244 	    {
11245 	      rtx out1;
11246 	      out1 = copy_rtx (out);
11247 	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11248 	      nops++;
11249 	      if (diff & 1)
11250 		{
11251 		  tmp = gen_rtx_PLUS (mode, tmp, out1);
11252 		  nops++;
11253 		}
11254 	    }
11255 	  if (cf != 0)
11256 	    {
11257 	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11258 	      nops++;
11259 	    }
11260 	  if (!rtx_equal_p (tmp, out))
11261 	    {
11262 	      if (nops == 1)
11263 		out = force_operand (tmp, copy_rtx (out));
11264 	      else
11265 		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11266 	    }
11267 	  if (!rtx_equal_p (out, operands[0]))
11268 	    emit_move_insn (operands[0], copy_rtx (out));
11269 
11270 	  return 1; /* DONE */
11271 	}
11272 
11273       /*
11274        * General case:			Jumpful:
11275        *   xorl dest,dest		cmpl op1, op2
11276        *   cmpl op1, op2		movl ct, dest
11277        *   setcc dest			jcc 1f
11278        *   decl dest			movl cf, dest
11279        *   andl (cf-ct),dest		1:
11280        *   addl ct,dest
11281        *
11282        * Size 20.			Size 14.
11283        *
11284        * This is reasonably steep, but branch mispredict costs are
11285        * high on modern cpus, so consider failing only if optimizing
11286        * for space.
11287        */
11288 
11289       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11290 	  && BRANCH_COST >= 2)
11291 	{
11292 	  if (cf == 0)
11293 	    {
11294 	      cf = ct;
11295 	      ct = 0;
11296 	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11297 		/* We may be reversing unordered compare to normal compare,
11298 		   that is not valid in general (we may convert non-trapping
11299 		   condition to trapping one), however on i386 we currently
11300 		   emit all comparisons unordered.  */
11301 		code = reverse_condition_maybe_unordered (code);
11302 	      else
11303 		{
11304 		  code = reverse_condition (code);
11305 		  if (compare_code != UNKNOWN)
11306 		    compare_code = reverse_condition (compare_code);
11307 		}
11308 	    }
11309 
11310 	  if (compare_code != UNKNOWN)
11311 	    {
11312 	      /* notl op1	(if needed)
11313 		 sarl $31, op1
11314 		 andl (cf-ct), op1
11315 		 addl ct, op1
11316 
11317 		 For x < 0 (resp. x <= -1) there will be no notl,
11318 		 so if possible swap the constants to get rid of the
11319 		 complement.
11320 		 True/false will be -1/0 while code below (store flag
11321 		 followed by decrement) is 0/-1, so the constants need
11322 		 to be exchanged once more.  */
11323 
11324 	      if (compare_code == GE || !cf)
11325 		{
11326 		  code = reverse_condition (code);
11327 		  compare_code = LT;
11328 		}
11329 	      else
11330 		{
11331 		  HOST_WIDE_INT tmp = cf;
11332 		  cf = ct;
11333 		  ct = tmp;
11334 		}
11335 
11336 	      out = emit_store_flag (out, code, ix86_compare_op0,
11337 				     ix86_compare_op1, VOIDmode, 0, -1);
11338 	    }
11339 	  else
11340 	    {
11341 	      out = emit_store_flag (out, code, ix86_compare_op0,
11342 				     ix86_compare_op1, VOIDmode, 0, 1);
11343 
11344 	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11345 					 copy_rtx (out), 1, OPTAB_DIRECT);
11346 	    }
11347 
11348 	  out = expand_simple_binop (mode, AND, copy_rtx (out),
11349 				     gen_int_mode (cf - ct, mode),
11350 				     copy_rtx (out), 1, OPTAB_DIRECT);
11351 	  if (ct)
11352 	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11353 				       copy_rtx (out), 1, OPTAB_DIRECT);
11354 	  if (!rtx_equal_p (out, operands[0]))
11355 	    emit_move_insn (operands[0], copy_rtx (out));
11356 
11357 	  return 1; /* DONE */
11358 	}
11359     }
11360 
11361   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11362     {
11363       /* Try a few things more with specific constants and a variable.  */
11364 
11365       optab op;
11366       rtx var, orig_out, out, tmp;
11367 
11368       if (BRANCH_COST <= 2)
11369 	return 0; /* FAIL */
11370 
11371       /* If one of the two operands is an interesting constant, load a
11372 	 constant with the above and mask it in with a logical operation.  */
11373 
11374       if (GET_CODE (operands[2]) == CONST_INT)
11375 	{
11376 	  var = operands[3];
11377 	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11378 	    operands[3] = constm1_rtx, op = and_optab;
11379 	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11380 	    operands[3] = const0_rtx, op = ior_optab;
11381 	  else
11382 	    return 0; /* FAIL */
11383 	}
11384       else if (GET_CODE (operands[3]) == CONST_INT)
11385 	{
11386 	  var = operands[2];
11387 	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11388 	    operands[2] = constm1_rtx, op = and_optab;
11389 	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11390 	    operands[2] = const0_rtx, op = ior_optab;
11391 	  else
11392 	    return 0; /* FAIL */
11393 	}
11394       else
11395         return 0; /* FAIL */
11396 
11397       orig_out = operands[0];
11398       tmp = gen_reg_rtx (mode);
11399       operands[0] = tmp;
11400 
11401       /* Recurse to get the constant loaded.  */
11402       if (ix86_expand_int_movcc (operands) == 0)
11403         return 0; /* FAIL */
11404 
11405       /* Mask in the interesting variable.  */
11406       out = expand_binop (mode, op, var, tmp, orig_out, 0,
11407 			  OPTAB_WIDEN);
11408       if (!rtx_equal_p (out, orig_out))
11409 	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11410 
11411       return 1; /* DONE */
11412     }
11413 
11414   /*
11415    * For comparison with above,
11416    *
11417    * movl cf,dest
11418    * movl ct,tmp
11419    * cmpl op1,op2
11420    * cmovcc tmp,dest
11421    *
11422    * Size 15.
11423    */
11424 
11425   if (! nonimmediate_operand (operands[2], mode))
11426     operands[2] = force_reg (mode, operands[2]);
11427   if (! nonimmediate_operand (operands[3], mode))
11428     operands[3] = force_reg (mode, operands[3]);
11429 
11430   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11431     {
11432       rtx tmp = gen_reg_rtx (mode);
11433       emit_move_insn (tmp, operands[3]);
11434       operands[3] = tmp;
11435     }
11436   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11437     {
11438       rtx tmp = gen_reg_rtx (mode);
11439       emit_move_insn (tmp, operands[2]);
11440       operands[2] = tmp;
11441     }
11442 
11443   if (! register_operand (operands[2], VOIDmode)
11444       && (mode == QImode
11445           || ! register_operand (operands[3], VOIDmode)))
11446     operands[2] = force_reg (mode, operands[2]);
11447 
11448   if (mode == QImode
11449       && ! register_operand (operands[3], VOIDmode))
11450     operands[3] = force_reg (mode, operands[3]);
11451 
11452   emit_insn (compare_seq);
11453   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11454 			  gen_rtx_IF_THEN_ELSE (mode,
11455 						compare_op, operands[2],
11456 						operands[3])));
11457   if (bypass_test)
11458     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11459 			    gen_rtx_IF_THEN_ELSE (mode,
11460 				  bypass_test,
11461 				  copy_rtx (operands[3]),
11462 				  copy_rtx (operands[0]))));
11463   if (second_test)
11464     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11465 			    gen_rtx_IF_THEN_ELSE (mode,
11466 				  second_test,
11467 				  copy_rtx (operands[2]),
11468 				  copy_rtx (operands[0]))));
11469 
11470   return 1; /* DONE */
11471 }
11472 
11473 /* Swap, force into registers, or otherwise massage the two operands
11474    to an sse comparison with a mask result.  Thus we differ a bit from
11475    ix86_prepare_fp_compare_args which expects to produce a flags result.
11476 
11477    The DEST operand exists to help determine whether to commute commutative
11478    operators.  The POP0/POP1 operands are updated in place.  The new
11479    comparison code is returned, or UNKNOWN if not implementable.  */
11480 
11481 static enum rtx_code
11482 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11483 				  rtx *pop0, rtx *pop1)
11484 {
11485   rtx tmp;
11486 
11487   switch (code)
11488     {
11489     case LTGT:
11490     case UNEQ:
11491       /* We have no LTGT as an operator.  We could implement it with
11492 	 NE & ORDERED, but this requires an extra temporary.  It's
11493 	 not clear that it's worth it.  */
11494       return UNKNOWN;
11495 
11496     case LT:
11497     case LE:
11498     case UNGT:
11499     case UNGE:
11500       /* These are supported directly.  */
11501       break;
11502 
11503     case EQ:
11504     case NE:
11505     case UNORDERED:
11506     case ORDERED:
11507       /* For commutative operators, try to canonicalize the destination
11508 	 operand to be first in the comparison - this helps reload to
11509 	 avoid extra moves.  */
11510       if (!dest || !rtx_equal_p (dest, *pop1))
11511 	break;
11512       /* FALLTHRU */
11513 
11514     case GE:
11515     case GT:
11516     case UNLE:
11517     case UNLT:
11518       /* These are not supported directly.  Swap the comparison operands
11519 	 to transform into something that is supported.  */
11520       tmp = *pop0;
11521       *pop0 = *pop1;
11522       *pop1 = tmp;
11523       code = swap_condition (code);
11524       break;
11525 
11526     default:
11527       gcc_unreachable ();
11528     }
11529 
11530   return code;
11531 }
11532 
11533 /* Detect conditional moves that exactly match min/max operational
11534    semantics.  Note that this is IEEE safe, as long as we don't
11535    interchange the operands.
11536 
11537    Returns FALSE if this conditional move doesn't match a MIN/MAX,
11538    and TRUE if the operation is successful and instructions are emitted.  */
11539 
11540 static bool
11541 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11542 			   rtx cmp_op1, rtx if_true, rtx if_false)
11543 {
11544   enum machine_mode mode;
11545   bool is_min;
11546   rtx tmp;
11547 
11548   if (code == LT)
11549     ;
11550   else if (code == UNGE)
11551     {
11552       tmp = if_true;
11553       if_true = if_false;
11554       if_false = tmp;
11555     }
11556   else
11557     return false;
11558 
11559   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11560     is_min = true;
11561   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11562     is_min = false;
11563   else
11564     return false;
11565 
11566   mode = GET_MODE (dest);
11567 
11568   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11569      but MODE may be a vector mode and thus not appropriate.  */
11570   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11571     {
11572       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11573       rtvec v;
11574 
11575       if_true = force_reg (mode, if_true);
11576       v = gen_rtvec (2, if_true, if_false);
11577       tmp = gen_rtx_UNSPEC (mode, v, u);
11578     }
11579   else
11580     {
11581       code = is_min ? SMIN : SMAX;
11582       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11583     }
11584 
11585   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11586   return true;
11587 }
11588 
11589 /* Expand an sse vector comparison.  Return the register with the result.  */
11590 
11591 static rtx
11592 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11593 		     rtx op_true, rtx op_false)
11594 {
11595   enum machine_mode mode = GET_MODE (dest);
11596   rtx x;
11597 
11598   cmp_op0 = force_reg (mode, cmp_op0);
11599   if (!nonimmediate_operand (cmp_op1, mode))
11600     cmp_op1 = force_reg (mode, cmp_op1);
11601 
11602   if (optimize
11603       || reg_overlap_mentioned_p (dest, op_true)
11604       || reg_overlap_mentioned_p (dest, op_false))
11605     dest = gen_reg_rtx (mode);
11606 
11607   x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11608   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11609 
11610   return dest;
11611 }
11612 
11613 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11614    operations.  This is used for both scalar and vector conditional moves.  */
11615 
11616 static void
11617 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11618 {
11619   enum machine_mode mode = GET_MODE (dest);
11620   rtx t2, t3, x;
11621 
11622   if (op_false == CONST0_RTX (mode))
11623     {
11624       op_true = force_reg (mode, op_true);
11625       x = gen_rtx_AND (mode, cmp, op_true);
11626       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11627     }
11628   else if (op_true == CONST0_RTX (mode))
11629     {
11630       op_false = force_reg (mode, op_false);
11631       x = gen_rtx_NOT (mode, cmp);
11632       x = gen_rtx_AND (mode, x, op_false);
11633       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11634     }
11635   else
11636     {
11637       op_true = force_reg (mode, op_true);
11638       op_false = force_reg (mode, op_false);
11639 
11640       t2 = gen_reg_rtx (mode);
11641       if (optimize)
11642 	t3 = gen_reg_rtx (mode);
11643       else
11644 	t3 = dest;
11645 
11646       x = gen_rtx_AND (mode, op_true, cmp);
11647       emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11648 
11649       x = gen_rtx_NOT (mode, cmp);
11650       x = gen_rtx_AND (mode, x, op_false);
11651       emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11652 
11653       x = gen_rtx_IOR (mode, t3, t2);
11654       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11655     }
11656 }
11657 
11658 /* Expand a floating-point conditional move.  Return true if successful.  */
11659 
11660 int
11661 ix86_expand_fp_movcc (rtx operands[])
11662 {
11663   enum machine_mode mode = GET_MODE (operands[0]);
11664   enum rtx_code code = GET_CODE (operands[1]);
11665   rtx tmp, compare_op, second_test, bypass_test;
11666 
11667   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11668     {
11669       enum machine_mode cmode;
11670 
11671       /* Since we've no cmove for sse registers, don't force bad register
11672 	 allocation just to gain access to it.  Deny movcc when the
11673 	 comparison mode doesn't match the move mode.  */
11674       cmode = GET_MODE (ix86_compare_op0);
11675       if (cmode == VOIDmode)
11676 	cmode = GET_MODE (ix86_compare_op1);
11677       if (cmode != mode)
11678 	return 0;
11679 
11680       code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11681 					       &ix86_compare_op0,
11682 					       &ix86_compare_op1);
11683       if (code == UNKNOWN)
11684 	return 0;
11685 
11686       if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11687 				     ix86_compare_op1, operands[2],
11688 				     operands[3]))
11689 	return 1;
11690 
11691       tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11692 				 ix86_compare_op1, operands[2], operands[3]);
11693       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11694       return 1;
11695     }
11696 
11697   /* The floating point conditional move instructions don't directly
11698      support conditions resulting from a signed integer comparison.  */
11699 
11700   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11701 
11702   /* The floating point conditional move instructions don't directly
11703      support signed integer comparisons.  */
11704 
11705   if (!fcmov_comparison_operator (compare_op, VOIDmode))
11706     {
11707       gcc_assert (!second_test && !bypass_test);
11708       tmp = gen_reg_rtx (QImode);
11709       ix86_expand_setcc (code, tmp);
11710       code = NE;
11711       ix86_compare_op0 = tmp;
11712       ix86_compare_op1 = const0_rtx;
11713       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11714     }
11715   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11716     {
11717       tmp = gen_reg_rtx (mode);
11718       emit_move_insn (tmp, operands[3]);
11719       operands[3] = tmp;
11720     }
11721   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11722     {
11723       tmp = gen_reg_rtx (mode);
11724       emit_move_insn (tmp, operands[2]);
11725       operands[2] = tmp;
11726     }
11727 
11728   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11729 			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
11730 						operands[2], operands[3])));
11731   if (bypass_test)
11732     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11733 			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11734 						  operands[3], operands[0])));
11735   if (second_test)
11736     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11737 			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11738 						  operands[2], operands[0])));
11739 
11740   return 1;
11741 }
11742 
11743 /* Expand a floating-point vector conditional move; a vcond operation
11744    rather than a movcc operation.  */
11745 
11746 bool
11747 ix86_expand_fp_vcond (rtx operands[])
11748 {
11749   enum rtx_code code = GET_CODE (operands[3]);
11750   rtx cmp;
11751 
11752   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11753 					   &operands[4], &operands[5]);
11754   if (code == UNKNOWN)
11755     return false;
11756 
11757   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11758 				 operands[5], operands[1], operands[2]))
11759     return true;
11760 
11761   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11762 			     operands[1], operands[2]);
11763   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11764   return true;
11765 }
11766 
11767 /* Expand a signed integral vector conditional move.  */
11768 
11769 bool
11770 ix86_expand_int_vcond (rtx operands[])
11771 {
11772   enum machine_mode mode = GET_MODE (operands[0]);
11773   enum rtx_code code = GET_CODE (operands[3]);
11774   bool negate = false;
11775   rtx x, cop0, cop1;
11776 
11777   cop0 = operands[4];
11778   cop1 = operands[5];
11779 
11780   /* Canonicalize the comparison to EQ, GT, GTU.  */
11781   switch (code)
11782     {
11783     case EQ:
11784     case GT:
11785     case GTU:
11786       break;
11787 
11788     case NE:
11789     case LE:
11790     case LEU:
11791       code = reverse_condition (code);
11792       negate = true;
11793       break;
11794 
11795     case GE:
11796     case GEU:
11797       code = reverse_condition (code);
11798       negate = true;
11799       /* FALLTHRU */
11800 
11801     case LT:
11802     case LTU:
11803       code = swap_condition (code);
11804       x = cop0, cop0 = cop1, cop1 = x;
11805       break;
11806 
11807     default:
11808       gcc_unreachable ();
11809     }
11810 
11811   /* Unsigned parallel compare is not supported by the hardware.  Play some
11812      tricks to turn this into a signed comparison against 0.  */
11813   if (code == GTU)
11814     {
11815       cop0 = force_reg (mode, cop0);
11816 
11817       switch (mode)
11818 	{
11819 	case V4SImode:
11820 	  {
11821 	    rtx t1, t2, mask;
11822 
11823 	    /* Perform a parallel modulo subtraction.  */
11824 	    t1 = gen_reg_rtx (mode);
11825 	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
11826 
11827 	    /* Extract the original sign bit of op0.  */
11828 	    mask = GEN_INT (-0x80000000);
11829 	    mask = gen_rtx_CONST_VECTOR (mode,
11830 			gen_rtvec (4, mask, mask, mask, mask));
11831 	    mask = force_reg (mode, mask);
11832 	    t2 = gen_reg_rtx (mode);
11833 	    emit_insn (gen_andv4si3 (t2, cop0, mask));
11834 
11835 	    /* XOR it back into the result of the subtraction.  This results
11836 	       in the sign bit set iff we saw unsigned underflow.  */
11837 	    x = gen_reg_rtx (mode);
11838 	    emit_insn (gen_xorv4si3 (x, t1, t2));
11839 
11840 	    code = GT;
11841 	  }
11842 	  break;
11843 
11844 	case V16QImode:
11845 	case V8HImode:
11846 	  /* Perform a parallel unsigned saturating subtraction.  */
11847 	  x = gen_reg_rtx (mode);
11848 	  emit_insn (gen_rtx_SET (VOIDmode, x,
11849 				  gen_rtx_US_MINUS (mode, cop0, cop1)));
11850 
11851 	  code = EQ;
11852 	  negate = !negate;
11853 	  break;
11854 
11855 	default:
11856 	  gcc_unreachable ();
11857 	}
11858 
11859       cop0 = x;
11860       cop1 = CONST0_RTX (mode);
11861     }
11862 
11863   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11864 			   operands[1+negate], operands[2-negate]);
11865 
11866   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11867 			 operands[2-negate]);
11868   return true;
11869 }
11870 
11871 /* Expand conditional increment or decrement using adb/sbb instructions.
11872    The default case using setcc followed by the conditional move can be
11873    done by generic code.  */
11874 int
11875 ix86_expand_int_addcc (rtx operands[])
11876 {
11877   enum rtx_code code = GET_CODE (operands[1]);
11878   rtx compare_op;
11879   rtx val = const0_rtx;
11880   bool fpcmp = false;
11881   enum machine_mode mode = GET_MODE (operands[0]);
11882 
11883   if (operands[3] != const1_rtx
11884       && operands[3] != constm1_rtx)
11885     return 0;
11886   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11887 				       ix86_compare_op1, &compare_op))
11888      return 0;
11889   code = GET_CODE (compare_op);
11890 
11891   if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11892       || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11893     {
11894       fpcmp = true;
11895       code = ix86_fp_compare_code_to_integer (code);
11896     }
11897 
11898   if (code != LTU)
11899     {
11900       val = constm1_rtx;
11901       if (fpcmp)
11902 	PUT_CODE (compare_op,
11903 		  reverse_condition_maybe_unordered
11904 		    (GET_CODE (compare_op)));
11905       else
11906 	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11907     }
11908   PUT_MODE (compare_op, mode);
11909 
11910   /* Construct either adc or sbb insn.  */
11911   if ((code == LTU) == (operands[3] == constm1_rtx))
11912     {
11913       switch (GET_MODE (operands[0]))
11914 	{
11915 	  case QImode:
11916             emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11917 	    break;
11918 	  case HImode:
11919             emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11920 	    break;
11921 	  case SImode:
11922             emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11923 	    break;
11924 	  case DImode:
11925             emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11926 	    break;
11927 	  default:
11928 	    gcc_unreachable ();
11929 	}
11930     }
11931   else
11932     {
11933       switch (GET_MODE (operands[0]))
11934 	{
11935 	  case QImode:
11936             emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11937 	    break;
11938 	  case HImode:
11939             emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11940 	    break;
11941 	  case SImode:
11942             emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11943 	    break;
11944 	  case DImode:
11945             emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11946 	    break;
11947 	  default:
11948 	    gcc_unreachable ();
11949 	}
11950     }
11951   return 1; /* DONE */
11952 }
11953 
11954 
11955 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11956    works for floating pointer parameters and nonoffsetable memories.
11957    For pushes, it returns just stack offsets; the values will be saved
11958    in the right order.  Maximally three parts are generated.  */
11959 
11960 static int
11961 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11962 {
11963   int size;
11964 
11965   if (!TARGET_64BIT)
11966     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11967   else
11968     size = (GET_MODE_SIZE (mode) + 4) / 8;
11969 
11970   gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11971   gcc_assert (size >= 2 && size <= 3);
11972 
11973   /* Optimize constant pool reference to immediates.  This is used by fp
11974      moves, that force all constants to memory to allow combining.  */
11975   if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11976     {
11977       rtx tmp = maybe_get_pool_constant (operand);
11978       if (tmp)
11979 	operand = tmp;
11980     }
11981 
11982   if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11983     {
11984       /* The only non-offsetable memories we handle are pushes.  */
11985       int ok = push_operand (operand, VOIDmode);
11986 
11987       gcc_assert (ok);
11988 
11989       operand = copy_rtx (operand);
11990       PUT_MODE (operand, Pmode);
11991       parts[0] = parts[1] = parts[2] = operand;
11992       return size;
11993     }
11994 
11995   if (GET_CODE (operand) == CONST_VECTOR)
11996     {
11997       enum machine_mode imode = int_mode_for_mode (mode);
11998       /* Caution: if we looked through a constant pool memory above,
11999 	 the operand may actually have a different mode now.  That's
12000 	 ok, since we want to pun this all the way back to an integer.  */
12001       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12002       gcc_assert (operand != NULL);
12003       mode = imode;
12004     }
12005 
12006   if (!TARGET_64BIT)
12007     {
12008       if (mode == DImode)
12009 	split_di (&operand, 1, &parts[0], &parts[1]);
12010       else
12011 	{
12012 	  if (REG_P (operand))
12013 	    {
12014 	      gcc_assert (reload_completed);
12015 	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12016 	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12017 	      if (size == 3)
12018 		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12019 	    }
12020 	  else if (offsettable_memref_p (operand))
12021 	    {
12022 	      operand = adjust_address (operand, SImode, 0);
12023 	      parts[0] = operand;
12024 	      parts[1] = adjust_address (operand, SImode, 4);
12025 	      if (size == 3)
12026 		parts[2] = adjust_address (operand, SImode, 8);
12027 	    }
12028 	  else if (GET_CODE (operand) == CONST_DOUBLE)
12029 	    {
12030 	      REAL_VALUE_TYPE r;
12031 	      long l[4];
12032 
12033 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12034 	      switch (mode)
12035 		{
12036 		case XFmode:
12037 		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12038 		  parts[2] = gen_int_mode (l[2], SImode);
12039 		  break;
12040 		case DFmode:
12041 		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12042 		  break;
12043 		default:
12044 		  gcc_unreachable ();
12045 		}
12046 	      parts[1] = gen_int_mode (l[1], SImode);
12047 	      parts[0] = gen_int_mode (l[0], SImode);
12048 	    }
12049 	  else
12050 	    gcc_unreachable ();
12051 	}
12052     }
12053   else
12054     {
12055       if (mode == TImode)
12056 	split_ti (&operand, 1, &parts[0], &parts[1]);
12057       if (mode == XFmode || mode == TFmode)
12058 	{
12059 	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12060 	  if (REG_P (operand))
12061 	    {
12062 	      gcc_assert (reload_completed);
12063 	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12064 	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12065 	    }
12066 	  else if (offsettable_memref_p (operand))
12067 	    {
12068 	      operand = adjust_address (operand, DImode, 0);
12069 	      parts[0] = operand;
12070 	      parts[1] = adjust_address (operand, upper_mode, 8);
12071 	    }
12072 	  else if (GET_CODE (operand) == CONST_DOUBLE)
12073 	    {
12074 	      REAL_VALUE_TYPE r;
12075 	      long l[4];
12076 
12077 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12078 	      real_to_target (l, &r, mode);
12079 
12080 	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
12081 	      if (HOST_BITS_PER_WIDE_INT >= 64)
12082 	        parts[0]
12083 		  = gen_int_mode
12084 		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12085 		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12086 		       DImode);
12087 	      else
12088 	        parts[0] = immed_double_const (l[0], l[1], DImode);
12089 
12090 	      if (upper_mode == SImode)
12091 	        parts[1] = gen_int_mode (l[2], SImode);
12092 	      else if (HOST_BITS_PER_WIDE_INT >= 64)
12093 	        parts[1]
12094 		  = gen_int_mode
12095 		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12096 		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12097 		       DImode);
12098 	      else
12099 	        parts[1] = immed_double_const (l[2], l[3], DImode);
12100 	    }
12101 	  else
12102 	    gcc_unreachable ();
12103 	}
12104     }
12105 
12106   return size;
12107 }
12108 
12109 /* Emit insns to perform a move or push of DI, DF, and XF values.
12110    Return false when normal moves are needed; true when all required
12111    insns have been emitted.  Operands 2-4 contain the input values
12112    int the correct order; operands 5-7 contain the output values.  */
12113 
12114 void
12115 ix86_split_long_move (rtx operands[])
12116 {
12117   rtx part[2][3];
12118   int nparts;
12119   int push = 0;
12120   int collisions = 0;
12121   enum machine_mode mode = GET_MODE (operands[0]);
12122 
12123   /* The DFmode expanders may ask us to move double.
12124      For 64bit target this is single move.  By hiding the fact
12125      here we simplify i386.md splitters.  */
12126   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12127     {
12128       /* Optimize constant pool reference to immediates.  This is used by
12129 	 fp moves, that force all constants to memory to allow combining.  */
12130 
12131       if (GET_CODE (operands[1]) == MEM
12132 	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12133 	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12134 	operands[1] = get_pool_constant (XEXP (operands[1], 0));
12135       if (push_operand (operands[0], VOIDmode))
12136 	{
12137 	  operands[0] = copy_rtx (operands[0]);
12138 	  PUT_MODE (operands[0], Pmode);
12139 	}
12140       else
12141         operands[0] = gen_lowpart (DImode, operands[0]);
12142       operands[1] = gen_lowpart (DImode, operands[1]);
12143       emit_move_insn (operands[0], operands[1]);
12144       return;
12145     }
12146 
12147   /* The only non-offsettable memory we handle is push.  */
12148   if (push_operand (operands[0], VOIDmode))
12149     push = 1;
12150   else
12151     gcc_assert (GET_CODE (operands[0]) != MEM
12152 		|| offsettable_memref_p (operands[0]));
12153 
12154   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12155   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12156 
12157   /* When emitting push, take care for source operands on the stack.  */
12158   if (push && GET_CODE (operands[1]) == MEM
12159       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12160     {
12161       if (nparts == 3)
12162 	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12163 				     XEXP (part[1][2], 0));
12164       part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12165 				   XEXP (part[1][1], 0));
12166     }
12167 
12168   /* We need to do copy in the right order in case an address register
12169      of the source overlaps the destination.  */
12170   if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12171     {
12172       if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12173 	collisions++;
12174       if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12175 	collisions++;
12176       if (nparts == 3
12177 	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12178 	collisions++;
12179 
12180       /* Collision in the middle part can be handled by reordering.  */
12181       if (collisions == 1 && nparts == 3
12182 	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12183 	{
12184 	  rtx tmp;
12185 	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12186 	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12187 	}
12188 
12189       /* If there are more collisions, we can't handle it by reordering.
12190 	 Do an lea to the last part and use only one colliding move.  */
12191       else if (collisions > 1)
12192 	{
12193 	  rtx base;
12194 
12195 	  collisions = 1;
12196 
12197 	  base = part[0][nparts - 1];
12198 
12199 	  /* Handle the case when the last part isn't valid for lea.
12200 	     Happens in 64-bit mode storing the 12-byte XFmode.  */
12201 	  if (GET_MODE (base) != Pmode)
12202 	    base = gen_rtx_REG (Pmode, REGNO (base));
12203 
12204 	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12205 	  part[1][0] = replace_equiv_address (part[1][0], base);
12206 	  part[1][1] = replace_equiv_address (part[1][1],
12207 				      plus_constant (base, UNITS_PER_WORD));
12208 	  if (nparts == 3)
12209 	    part[1][2] = replace_equiv_address (part[1][2],
12210 				      plus_constant (base, 8));
12211 	}
12212     }
12213 
12214   if (push)
12215     {
12216       if (!TARGET_64BIT)
12217 	{
12218 	  if (nparts == 3)
12219 	    {
12220 	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12221                 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12222 	      emit_move_insn (part[0][2], part[1][2]);
12223 	    }
12224 	}
12225       else
12226 	{
12227 	  /* In 64bit mode we don't have 32bit push available.  In case this is
12228 	     register, it is OK - we will just use larger counterpart.  We also
12229 	     retype memory - these comes from attempt to avoid REX prefix on
12230 	     moving of second half of TFmode value.  */
12231 	  if (GET_MODE (part[1][1]) == SImode)
12232 	    {
12233 	      switch (GET_CODE (part[1][1]))
12234 		{
12235 		case MEM:
12236 		  part[1][1] = adjust_address (part[1][1], DImode, 0);
12237 		  break;
12238 
12239 		case REG:
12240 		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12241 		  break;
12242 
12243 		default:
12244 		  gcc_unreachable ();
12245 		}
12246 
12247 	      if (GET_MODE (part[1][0]) == SImode)
12248 		part[1][0] = part[1][1];
12249 	    }
12250 	}
12251       emit_move_insn (part[0][1], part[1][1]);
12252       emit_move_insn (part[0][0], part[1][0]);
12253       return;
12254     }
12255 
12256   /* Choose correct order to not overwrite the source before it is copied.  */
12257   if ((REG_P (part[0][0])
12258        && REG_P (part[1][1])
12259        && (REGNO (part[0][0]) == REGNO (part[1][1])
12260 	   || (nparts == 3
12261 	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
12262       || (collisions > 0
12263 	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12264     {
12265       if (nparts == 3)
12266 	{
12267 	  operands[2] = part[0][2];
12268 	  operands[3] = part[0][1];
12269 	  operands[4] = part[0][0];
12270 	  operands[5] = part[1][2];
12271 	  operands[6] = part[1][1];
12272 	  operands[7] = part[1][0];
12273 	}
12274       else
12275 	{
12276 	  operands[2] = part[0][1];
12277 	  operands[3] = part[0][0];
12278 	  operands[5] = part[1][1];
12279 	  operands[6] = part[1][0];
12280 	}
12281     }
12282   else
12283     {
12284       if (nparts == 3)
12285 	{
12286 	  operands[2] = part[0][0];
12287 	  operands[3] = part[0][1];
12288 	  operands[4] = part[0][2];
12289 	  operands[5] = part[1][0];
12290 	  operands[6] = part[1][1];
12291 	  operands[7] = part[1][2];
12292 	}
12293       else
12294 	{
12295 	  operands[2] = part[0][0];
12296 	  operands[3] = part[0][1];
12297 	  operands[5] = part[1][0];
12298 	  operands[6] = part[1][1];
12299 	}
12300     }
12301 
12302   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12303   if (optimize_size)
12304     {
12305       if (GET_CODE (operands[5]) == CONST_INT
12306 	  && operands[5] != const0_rtx
12307 	  && REG_P (operands[2]))
12308 	{
12309 	  if (GET_CODE (operands[6]) == CONST_INT
12310 	      && INTVAL (operands[6]) == INTVAL (operands[5]))
12311 	    operands[6] = operands[2];
12312 
12313 	  if (nparts == 3
12314 	      && GET_CODE (operands[7]) == CONST_INT
12315 	      && INTVAL (operands[7]) == INTVAL (operands[5]))
12316 	    operands[7] = operands[2];
12317 	}
12318 
12319       if (nparts == 3
12320 	  && GET_CODE (operands[6]) == CONST_INT
12321 	  && operands[6] != const0_rtx
12322 	  && REG_P (operands[3])
12323 	  && GET_CODE (operands[7]) == CONST_INT
12324 	  && INTVAL (operands[7]) == INTVAL (operands[6]))
12325 	operands[7] = operands[3];
12326     }
12327 
12328   emit_move_insn (operands[2], operands[5]);
12329   emit_move_insn (operands[3], operands[6]);
12330   if (nparts == 3)
12331     emit_move_insn (operands[4], operands[7]);
12332 
12333   return;
12334 }
12335 
12336 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12337    left shift by a constant, either using a single shift or
12338    a sequence of add instructions.  */
12339 
12340 static void
12341 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12342 {
12343   if (count == 1)
12344     {
12345       emit_insn ((mode == DImode
12346 		  ? gen_addsi3
12347 		  : gen_adddi3) (operand, operand, operand));
12348     }
12349   else if (!optimize_size
12350 	   && count * ix86_cost->add <= ix86_cost->shift_const)
12351     {
12352       int i;
12353       for (i=0; i<count; i++)
12354 	{
12355 	  emit_insn ((mode == DImode
12356 		      ? gen_addsi3
12357 		      : gen_adddi3) (operand, operand, operand));
12358 	}
12359     }
12360   else
12361     emit_insn ((mode == DImode
12362 		? gen_ashlsi3
12363 		: gen_ashldi3) (operand, operand, GEN_INT (count)));
12364 }
12365 
12366 void
12367 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12368 {
12369   rtx low[2], high[2];
12370   int count;
12371   const int single_width = mode == DImode ? 32 : 64;
12372 
12373   if (GET_CODE (operands[2]) == CONST_INT)
12374     {
12375       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12376       count = INTVAL (operands[2]) & (single_width * 2 - 1);
12377 
12378       if (count >= single_width)
12379 	{
12380 	  emit_move_insn (high[0], low[1]);
12381 	  emit_move_insn (low[0], const0_rtx);
12382 
12383 	  if (count > single_width)
12384 	    ix86_expand_ashl_const (high[0], count - single_width, mode);
12385 	}
12386       else
12387 	{
12388 	  if (!rtx_equal_p (operands[0], operands[1]))
12389 	    emit_move_insn (operands[0], operands[1]);
12390 	  emit_insn ((mode == DImode
12391 		     ? gen_x86_shld_1
12392 		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12393 	  ix86_expand_ashl_const (low[0], count, mode);
12394 	}
12395       return;
12396     }
12397 
12398   (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12399 
12400   if (operands[1] == const1_rtx)
12401     {
12402       /* Assuming we've chosen a QImode capable registers, then 1 << N
12403 	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12404       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12405 	{
12406 	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12407 
12408 	  ix86_expand_clear (low[0]);
12409 	  ix86_expand_clear (high[0]);
12410 	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12411 
12412 	  d = gen_lowpart (QImode, low[0]);
12413 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12414 	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
12415 	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12416 
12417 	  d = gen_lowpart (QImode, high[0]);
12418 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12419 	  s = gen_rtx_NE (QImode, flags, const0_rtx);
12420 	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12421 	}
12422 
12423       /* Otherwise, we can get the same results by manually performing
12424 	 a bit extract operation on bit 5/6, and then performing the two
12425 	 shifts.  The two methods of getting 0/1 into low/high are exactly
12426 	 the same size.  Avoiding the shift in the bit extract case helps
12427 	 pentium4 a bit; no one else seems to care much either way.  */
12428       else
12429 	{
12430 	  rtx x;
12431 
12432 	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12433 	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12434 	  else
12435 	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12436 	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12437 
12438 	  emit_insn ((mode == DImode
12439 		      ? gen_lshrsi3
12440 		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12441 	  emit_insn ((mode == DImode
12442 		      ? gen_andsi3
12443 		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12444 	  emit_move_insn (low[0], high[0]);
12445 	  emit_insn ((mode == DImode
12446 		      ? gen_xorsi3
12447 		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12448 	}
12449 
12450       emit_insn ((mode == DImode
12451 		    ? gen_ashlsi3
12452 		    : gen_ashldi3) (low[0], low[0], operands[2]));
12453       emit_insn ((mode == DImode
12454 		    ? gen_ashlsi3
12455 		    : gen_ashldi3) (high[0], high[0], operands[2]));
12456       return;
12457     }
12458 
12459   if (operands[1] == constm1_rtx)
12460     {
12461       /* For -1 << N, we can avoid the shld instruction, because we
12462 	 know that we're shifting 0...31/63 ones into a -1.  */
12463       emit_move_insn (low[0], constm1_rtx);
12464       if (optimize_size)
12465 	emit_move_insn (high[0], low[0]);
12466       else
12467 	emit_move_insn (high[0], constm1_rtx);
12468     }
12469   else
12470     {
12471       if (!rtx_equal_p (operands[0], operands[1]))
12472 	emit_move_insn (operands[0], operands[1]);
12473 
12474       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12475       emit_insn ((mode == DImode
12476 		  ? gen_x86_shld_1
12477 		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12478     }
12479 
12480   emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12481 
12482   if (TARGET_CMOVE && scratch)
12483     {
12484       ix86_expand_clear (scratch);
12485       emit_insn ((mode == DImode
12486 		  ? gen_x86_shift_adj_1
12487 		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12488     }
12489   else
12490     emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12491 }
12492 
12493 void
12494 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12495 {
12496   rtx low[2], high[2];
12497   int count;
12498   const int single_width = mode == DImode ? 32 : 64;
12499 
12500   if (GET_CODE (operands[2]) == CONST_INT)
12501     {
12502       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12503       count = INTVAL (operands[2]) & (single_width * 2 - 1);
12504 
12505       if (count == single_width * 2 - 1)
12506 	{
12507 	  emit_move_insn (high[0], high[1]);
12508 	  emit_insn ((mode == DImode
12509 		      ? gen_ashrsi3
12510 		      : gen_ashrdi3) (high[0], high[0],
12511 				      GEN_INT (single_width - 1)));
12512 	  emit_move_insn (low[0], high[0]);
12513 
12514 	}
12515       else if (count >= single_width)
12516 	{
12517 	  emit_move_insn (low[0], high[1]);
12518 	  emit_move_insn (high[0], low[0]);
12519 	  emit_insn ((mode == DImode
12520 		      ? gen_ashrsi3
12521 		      : gen_ashrdi3) (high[0], high[0],
12522 				      GEN_INT (single_width - 1)));
12523 	  if (count > single_width)
12524 	    emit_insn ((mode == DImode
12525 			? gen_ashrsi3
12526 			: gen_ashrdi3) (low[0], low[0],
12527 					GEN_INT (count - single_width)));
12528 	}
12529       else
12530 	{
12531 	  if (!rtx_equal_p (operands[0], operands[1]))
12532 	    emit_move_insn (operands[0], operands[1]);
12533 	  emit_insn ((mode == DImode
12534 		      ? gen_x86_shrd_1
12535 		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12536 	  emit_insn ((mode == DImode
12537 		      ? gen_ashrsi3
12538 		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12539 	}
12540     }
12541   else
12542     {
12543       if (!rtx_equal_p (operands[0], operands[1]))
12544 	emit_move_insn (operands[0], operands[1]);
12545 
12546       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12547 
12548       emit_insn ((mode == DImode
12549 		  ? gen_x86_shrd_1
12550 		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12551       emit_insn ((mode == DImode
12552 		  ? gen_ashrsi3
12553 		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12554 
12555       if (TARGET_CMOVE && scratch)
12556 	{
12557 	  emit_move_insn (scratch, high[0]);
12558 	  emit_insn ((mode == DImode
12559 		      ? gen_ashrsi3
12560 		      : gen_ashrdi3) (scratch, scratch,
12561 				      GEN_INT (single_width - 1)));
12562 	  emit_insn ((mode == DImode
12563 		      ? gen_x86_shift_adj_1
12564 		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12565 					 scratch));
12566 	}
12567       else
12568 	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12569     }
12570 }
12571 
12572 void
12573 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12574 {
12575   rtx low[2], high[2];
12576   int count;
12577   const int single_width = mode == DImode ? 32 : 64;
12578 
12579   if (GET_CODE (operands[2]) == CONST_INT)
12580     {
12581       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12582       count = INTVAL (operands[2]) & (single_width * 2 - 1);
12583 
12584       if (count >= single_width)
12585 	{
12586 	  emit_move_insn (low[0], high[1]);
12587 	  ix86_expand_clear (high[0]);
12588 
12589 	  if (count > single_width)
12590 	    emit_insn ((mode == DImode
12591 			? gen_lshrsi3
12592 			: gen_lshrdi3) (low[0], low[0],
12593 					GEN_INT (count - single_width)));
12594 	}
12595       else
12596 	{
12597 	  if (!rtx_equal_p (operands[0], operands[1]))
12598 	    emit_move_insn (operands[0], operands[1]);
12599 	  emit_insn ((mode == DImode
12600 		      ? gen_x86_shrd_1
12601 		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12602 	  emit_insn ((mode == DImode
12603 		      ? gen_lshrsi3
12604 		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12605 	}
12606     }
12607   else
12608     {
12609       if (!rtx_equal_p (operands[0], operands[1]))
12610 	emit_move_insn (operands[0], operands[1]);
12611 
12612       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12613 
12614       emit_insn ((mode == DImode
12615 		  ? gen_x86_shrd_1
12616 		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12617       emit_insn ((mode == DImode
12618 		  ? gen_lshrsi3
12619 		  : gen_lshrdi3) (high[0], high[0], operands[2]));
12620 
12621       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12622       if (TARGET_CMOVE && scratch)
12623 	{
12624 	  ix86_expand_clear (scratch);
12625 	  emit_insn ((mode == DImode
12626 		      ? gen_x86_shift_adj_1
12627 		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12628 					       scratch));
12629 	}
12630       else
12631 	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12632     }
12633 }
12634 
12635 /* Helper function for the string operations below.  Dest VARIABLE whether
12636    it is aligned to VALUE bytes.  If true, jump to the label.  */
12637 static rtx
12638 ix86_expand_aligntest (rtx variable, int value)
12639 {
12640   rtx label = gen_label_rtx ();
12641   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12642   if (GET_MODE (variable) == DImode)
12643     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12644   else
12645     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12646   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12647 			   1, label);
12648   return label;
12649 }
12650 
12651 /* Adjust COUNTER by the VALUE.  */
12652 static void
12653 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12654 {
12655   if (GET_MODE (countreg) == DImode)
12656     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12657   else
12658     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12659 }
12660 
12661 /* Zero extend possibly SImode EXP to Pmode register.  */
12662 rtx
12663 ix86_zero_extend_to_Pmode (rtx exp)
12664 {
12665   rtx r;
12666   if (GET_MODE (exp) == VOIDmode)
12667     return force_reg (Pmode, exp);
12668   if (GET_MODE (exp) == Pmode)
12669     return copy_to_mode_reg (Pmode, exp);
12670   r = gen_reg_rtx (Pmode);
12671   emit_insn (gen_zero_extendsidi2 (r, exp));
12672   return r;
12673 }
12674 
12675 /* Expand string move (memcpy) operation.  Use i386 string operations when
12676    profitable.  expand_clrmem contains similar code.  */
12677 int
12678 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12679 {
12680   rtx srcreg, destreg, countreg, srcexp, destexp;
12681   enum machine_mode counter_mode;
12682   HOST_WIDE_INT align = 0;
12683   unsigned HOST_WIDE_INT count = 0;
12684 
12685   if (GET_CODE (align_exp) == CONST_INT)
12686     align = INTVAL (align_exp);
12687 
12688   /* Can't use any of this if the user has appropriated esi or edi.  */
12689   if (global_regs[4] || global_regs[5])
12690     return 0;
12691 
12692   /* This simple hack avoids all inlining code and simplifies code below.  */
12693   if (!TARGET_ALIGN_STRINGOPS)
12694     align = 64;
12695 
12696   if (GET_CODE (count_exp) == CONST_INT)
12697     {
12698       count = INTVAL (count_exp);
12699       if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12700 	return 0;
12701     }
12702 
12703   /* Figure out proper mode for counter.  For 32bits it is always SImode,
12704      for 64bits use SImode when possible, otherwise DImode.
12705      Set count to number of bytes copied when known at compile time.  */
12706   if (!TARGET_64BIT
12707       || GET_MODE (count_exp) == SImode
12708       || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12709     counter_mode = SImode;
12710   else
12711     counter_mode = DImode;
12712 
12713   gcc_assert (counter_mode == SImode || counter_mode == DImode);
12714 
12715   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12716   if (destreg != XEXP (dst, 0))
12717     dst = replace_equiv_address_nv (dst, destreg);
12718   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12719   if (srcreg != XEXP (src, 0))
12720     src = replace_equiv_address_nv (src, srcreg);
12721 
12722   /* When optimizing for size emit simple rep ; movsb instruction for
12723      counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12724      sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12725      Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12726      count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12727      but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12728      known to be zero or not.  The rep; movsb sequence causes higher
12729      register pressure though, so take that into account.  */
12730 
12731   if ((!optimize || optimize_size)
12732       && (count == 0
12733 	  || ((count & 0x03)
12734 	      && (!optimize_size
12735 		  || count > 5 * 4
12736 		  || (count & 3) + count / 4 > 6))))
12737     {
12738       emit_insn (gen_cld ());
12739       countreg = ix86_zero_extend_to_Pmode (count_exp);
12740       destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12741       srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12742       emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12743 			      destexp, srcexp));
12744     }
12745 
12746   /* For constant aligned (or small unaligned) copies use rep movsl
12747      followed by code copying the rest.  For PentiumPro ensure 8 byte
12748      alignment to allow rep movsl acceleration.  */
12749 
12750   else if (count != 0
12751 	   && (align >= 8
12752 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12753 	       || optimize_size || count < (unsigned int) 64))
12754     {
12755       unsigned HOST_WIDE_INT offset = 0;
12756       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12757       rtx srcmem, dstmem;
12758 
12759       emit_insn (gen_cld ());
12760       if (count & ~(size - 1))
12761 	{
12762 	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12763 	    {
12764 	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12765 
12766 	      while (offset < (count & ~(size - 1)))
12767 		{
12768 		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12769 							 srcreg, offset);
12770 		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12771 							 destreg, offset);
12772 		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12773 		  offset += size;
12774 		}
12775 	    }
12776 	  else
12777 	    {
12778 	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12779 				  & (TARGET_64BIT ? -1 : 0x3fffffff));
12780 	      countreg = copy_to_mode_reg (counter_mode, countreg);
12781 	      countreg = ix86_zero_extend_to_Pmode (countreg);
12782 
12783 	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12784 					GEN_INT (size == 4 ? 2 : 3));
12785 	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12786 	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12787 
12788 	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12789 				      countreg, destexp, srcexp));
12790 	      offset = count & ~(size - 1);
12791 	    }
12792 	}
12793       if (size == 8 && (count & 0x04))
12794 	{
12795 	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12796 						 offset);
12797 	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12798 						 offset);
12799 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12800 	  offset += 4;
12801 	}
12802       if (count & 0x02)
12803 	{
12804 	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12805 						 offset);
12806 	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12807 						 offset);
12808 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12809 	  offset += 2;
12810 	}
12811       if (count & 0x01)
12812 	{
12813 	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12814 						 offset);
12815 	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12816 						 offset);
12817 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12818 	}
12819     }
12820   /* The generic code based on the glibc implementation:
12821      - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12822      allowing accelerated copying there)
12823      - copy the data using rep movsl
12824      - copy the rest.  */
12825   else
12826     {
12827       rtx countreg2;
12828       rtx label = NULL;
12829       rtx srcmem, dstmem;
12830       int desired_alignment = (TARGET_PENTIUMPRO
12831 			       && (count == 0 || count >= (unsigned int) 260)
12832 			       ? 8 : UNITS_PER_WORD);
12833       /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12834       dst = change_address (dst, BLKmode, destreg);
12835       src = change_address (src, BLKmode, srcreg);
12836 
12837       /* In case we don't know anything about the alignment, default to
12838          library version, since it is usually equally fast and result in
12839          shorter code.
12840 
12841 	 Also emit call when we know that the count is large and call overhead
12842 	 will not be important.  */
12843       if (!TARGET_INLINE_ALL_STRINGOPS
12844 	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12845 	return 0;
12846 
12847       if (TARGET_SINGLE_STRINGOP)
12848 	emit_insn (gen_cld ());
12849 
12850       countreg2 = gen_reg_rtx (Pmode);
12851       countreg = copy_to_mode_reg (counter_mode, count_exp);
12852 
12853       /* We don't use loops to align destination and to copy parts smaller
12854          than 4 bytes, because gcc is able to optimize such code better (in
12855          the case the destination or the count really is aligned, gcc is often
12856          able to predict the branches) and also it is friendlier to the
12857          hardware branch prediction.
12858 
12859          Using loops is beneficial for generic case, because we can
12860          handle small counts using the loops.  Many CPUs (such as Athlon)
12861          have large REP prefix setup costs.
12862 
12863          This is quite costly.  Maybe we can revisit this decision later or
12864          add some customizability to this code.  */
12865 
12866       if (count == 0 && align < desired_alignment)
12867 	{
12868 	  label = gen_label_rtx ();
12869 	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12870 				   LEU, 0, counter_mode, 1, label);
12871 	}
12872       if (align <= 1)
12873 	{
12874 	  rtx label = ix86_expand_aligntest (destreg, 1);
12875 	  srcmem = change_address (src, QImode, srcreg);
12876 	  dstmem = change_address (dst, QImode, destreg);
12877 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12878 	  ix86_adjust_counter (countreg, 1);
12879 	  emit_label (label);
12880 	  LABEL_NUSES (label) = 1;
12881 	}
12882       if (align <= 2)
12883 	{
12884 	  rtx label = ix86_expand_aligntest (destreg, 2);
12885 	  srcmem = change_address (src, HImode, srcreg);
12886 	  dstmem = change_address (dst, HImode, destreg);
12887 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12888 	  ix86_adjust_counter (countreg, 2);
12889 	  emit_label (label);
12890 	  LABEL_NUSES (label) = 1;
12891 	}
12892       if (align <= 4 && desired_alignment > 4)
12893 	{
12894 	  rtx label = ix86_expand_aligntest (destreg, 4);
12895 	  srcmem = change_address (src, SImode, srcreg);
12896 	  dstmem = change_address (dst, SImode, destreg);
12897 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12898 	  ix86_adjust_counter (countreg, 4);
12899 	  emit_label (label);
12900 	  LABEL_NUSES (label) = 1;
12901 	}
12902 
12903       if (label && desired_alignment > 4 && !TARGET_64BIT)
12904 	{
12905 	  emit_label (label);
12906 	  LABEL_NUSES (label) = 1;
12907 	  label = NULL_RTX;
12908 	}
12909       if (!TARGET_SINGLE_STRINGOP)
12910 	emit_insn (gen_cld ());
12911       if (TARGET_64BIT)
12912 	{
12913 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12914 				  GEN_INT (3)));
12915 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12916 	}
12917       else
12918 	{
12919 	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12920 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12921 	}
12922       srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12923       destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12924       emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12925 			      countreg2, destexp, srcexp));
12926 
12927       if (label)
12928 	{
12929 	  emit_label (label);
12930 	  LABEL_NUSES (label) = 1;
12931 	}
12932       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12933 	{
12934 	  srcmem = change_address (src, SImode, srcreg);
12935 	  dstmem = change_address (dst, SImode, destreg);
12936 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12937 	}
12938       if ((align <= 4 || count == 0) && TARGET_64BIT)
12939 	{
12940 	  rtx label = ix86_expand_aligntest (countreg, 4);
12941 	  srcmem = change_address (src, SImode, srcreg);
12942 	  dstmem = change_address (dst, SImode, destreg);
12943 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12944 	  emit_label (label);
12945 	  LABEL_NUSES (label) = 1;
12946 	}
12947       if (align > 2 && count != 0 && (count & 2))
12948 	{
12949 	  srcmem = change_address (src, HImode, srcreg);
12950 	  dstmem = change_address (dst, HImode, destreg);
12951 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12952 	}
12953       if (align <= 2 || count == 0)
12954 	{
12955 	  rtx label = ix86_expand_aligntest (countreg, 2);
12956 	  srcmem = change_address (src, HImode, srcreg);
12957 	  dstmem = change_address (dst, HImode, destreg);
12958 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12959 	  emit_label (label);
12960 	  LABEL_NUSES (label) = 1;
12961 	}
12962       if (align > 1 && count != 0 && (count & 1))
12963 	{
12964 	  srcmem = change_address (src, QImode, srcreg);
12965 	  dstmem = change_address (dst, QImode, destreg);
12966 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12967 	}
12968       if (align <= 1 || count == 0)
12969 	{
12970 	  rtx label = ix86_expand_aligntest (countreg, 1);
12971 	  srcmem = change_address (src, QImode, srcreg);
12972 	  dstmem = change_address (dst, QImode, destreg);
12973 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12974 	  emit_label (label);
12975 	  LABEL_NUSES (label) = 1;
12976 	}
12977     }
12978 
12979   return 1;
12980 }
12981 
12982 /* Expand string clear operation (bzero).  Use i386 string operations when
12983    profitable.  expand_movmem contains similar code.  */
12984 int
12985 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12986 {
12987   rtx destreg, zeroreg, countreg, destexp;
12988   enum machine_mode counter_mode;
12989   HOST_WIDE_INT align = 0;
12990   unsigned HOST_WIDE_INT count = 0;
12991 
12992   if (GET_CODE (align_exp) == CONST_INT)
12993     align = INTVAL (align_exp);
12994 
12995   /* Can't use any of this if the user has appropriated esi.  */
12996   if (global_regs[4])
12997     return 0;
12998 
12999   /* This simple hack avoids all inlining code and simplifies code below.  */
13000   if (!TARGET_ALIGN_STRINGOPS)
13001     align = 32;
13002 
13003   if (GET_CODE (count_exp) == CONST_INT)
13004     {
13005       count = INTVAL (count_exp);
13006       if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13007 	return 0;
13008     }
13009   /* Figure out proper mode for counter.  For 32bits it is always SImode,
13010      for 64bits use SImode when possible, otherwise DImode.
13011      Set count to number of bytes copied when known at compile time.  */
13012   if (!TARGET_64BIT
13013       || GET_MODE (count_exp) == SImode
13014       || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13015     counter_mode = SImode;
13016   else
13017     counter_mode = DImode;
13018 
13019   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13020   if (destreg != XEXP (dst, 0))
13021     dst = replace_equiv_address_nv (dst, destreg);
13022 
13023 
13024   /* When optimizing for size emit simple rep ; movsb instruction for
13025      counts not divisible by 4.  The movl $N, %ecx; rep; stosb
13026      sequence is 7 bytes long, so if optimizing for size and count is
13027      small enough that some stosl, stosw and stosb instructions without
13028      rep are shorter, fall back into the next if.  */
13029 
13030   if ((!optimize || optimize_size)
13031       && (count == 0
13032 	  || ((count & 0x03)
13033 	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13034     {
13035       emit_insn (gen_cld ());
13036 
13037       countreg = ix86_zero_extend_to_Pmode (count_exp);
13038       zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13039       destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13040       emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13041     }
13042   else if (count != 0
13043 	   && (align >= 8
13044 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13045 	       || optimize_size || count < (unsigned int) 64))
13046     {
13047       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13048       unsigned HOST_WIDE_INT offset = 0;
13049 
13050       emit_insn (gen_cld ());
13051 
13052       zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13053       if (count & ~(size - 1))
13054 	{
13055 	  unsigned HOST_WIDE_INT repcount;
13056 	  unsigned int max_nonrep;
13057 
13058 	  repcount = count >> (size == 4 ? 2 : 3);
13059 	  if (!TARGET_64BIT)
13060 	    repcount &= 0x3fffffff;
13061 
13062 	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13063 	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13064 	     bytes.  In both cases the latter seems to be faster for small
13065 	     values of N.  */
13066 	  max_nonrep = size == 4 ? 7 : 4;
13067 	  if (!optimize_size)
13068 	    switch (ix86_tune)
13069 	      {
13070 	      case PROCESSOR_PENTIUM4:
13071 	      case PROCESSOR_NOCONA:
13072 	        max_nonrep = 3;
13073 	        break;
13074 	      default:
13075 	        break;
13076 	      }
13077 
13078 	  if (repcount <= max_nonrep)
13079 	    while (repcount-- > 0)
13080 	      {
13081 		rtx mem = adjust_automodify_address_nv (dst,
13082 							GET_MODE (zeroreg),
13083 							destreg, offset);
13084 		emit_insn (gen_strset (destreg, mem, zeroreg));
13085 		offset += size;
13086 	      }
13087 	  else
13088 	    {
13089 	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13090 	      countreg = ix86_zero_extend_to_Pmode (countreg);
13091 	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
13092 					GEN_INT (size == 4 ? 2 : 3));
13093 	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13094 	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13095 				       destexp));
13096 	      offset = count & ~(size - 1);
13097 	    }
13098 	}
13099       if (size == 8 && (count & 0x04))
13100 	{
13101 	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13102 						  offset);
13103 	  emit_insn (gen_strset (destreg, mem,
13104 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13105 	  offset += 4;
13106 	}
13107       if (count & 0x02)
13108 	{
13109 	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13110 						  offset);
13111 	  emit_insn (gen_strset (destreg, mem,
13112 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13113 	  offset += 2;
13114 	}
13115       if (count & 0x01)
13116 	{
13117 	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13118 						  offset);
13119 	  emit_insn (gen_strset (destreg, mem,
13120 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13121 	}
13122     }
13123   else
13124     {
13125       rtx countreg2;
13126       rtx label = NULL;
13127       /* Compute desired alignment of the string operation.  */
13128       int desired_alignment = (TARGET_PENTIUMPRO
13129 			       && (count == 0 || count >= (unsigned int) 260)
13130 			       ? 8 : UNITS_PER_WORD);
13131 
13132       /* In case we don't know anything about the alignment, default to
13133          library version, since it is usually equally fast and result in
13134          shorter code.
13135 
13136 	 Also emit call when we know that the count is large and call overhead
13137 	 will not be important.  */
13138       if (!TARGET_INLINE_ALL_STRINGOPS
13139 	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13140 	return 0;
13141 
13142       if (TARGET_SINGLE_STRINGOP)
13143 	emit_insn (gen_cld ());
13144 
13145       countreg2 = gen_reg_rtx (Pmode);
13146       countreg = copy_to_mode_reg (counter_mode, count_exp);
13147       zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13148       /* Get rid of MEM_OFFSET, it won't be accurate.  */
13149       dst = change_address (dst, BLKmode, destreg);
13150 
13151       if (count == 0 && align < desired_alignment)
13152 	{
13153 	  label = gen_label_rtx ();
13154 	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13155 				   LEU, 0, counter_mode, 1, label);
13156 	}
13157       if (align <= 1)
13158 	{
13159 	  rtx label = ix86_expand_aligntest (destreg, 1);
13160 	  emit_insn (gen_strset (destreg, dst,
13161 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13162 	  ix86_adjust_counter (countreg, 1);
13163 	  emit_label (label);
13164 	  LABEL_NUSES (label) = 1;
13165 	}
13166       if (align <= 2)
13167 	{
13168 	  rtx label = ix86_expand_aligntest (destreg, 2);
13169 	  emit_insn (gen_strset (destreg, dst,
13170 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13171 	  ix86_adjust_counter (countreg, 2);
13172 	  emit_label (label);
13173 	  LABEL_NUSES (label) = 1;
13174 	}
13175       if (align <= 4 && desired_alignment > 4)
13176 	{
13177 	  rtx label = ix86_expand_aligntest (destreg, 4);
13178 	  emit_insn (gen_strset (destreg, dst,
13179 				 (TARGET_64BIT
13180 				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13181 				  : zeroreg)));
13182 	  ix86_adjust_counter (countreg, 4);
13183 	  emit_label (label);
13184 	  LABEL_NUSES (label) = 1;
13185 	}
13186 
13187       if (label && desired_alignment > 4 && !TARGET_64BIT)
13188 	{
13189 	  emit_label (label);
13190 	  LABEL_NUSES (label) = 1;
13191 	  label = NULL_RTX;
13192 	}
13193 
13194       if (!TARGET_SINGLE_STRINGOP)
13195 	emit_insn (gen_cld ());
13196       if (TARGET_64BIT)
13197 	{
13198 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13199 				  GEN_INT (3)));
13200 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13201 	}
13202       else
13203 	{
13204 	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13205 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13206 	}
13207       destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13208       emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13209 
13210       if (label)
13211 	{
13212 	  emit_label (label);
13213 	  LABEL_NUSES (label) = 1;
13214 	}
13215 
13216       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13217 	emit_insn (gen_strset (destreg, dst,
13218 			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
13219       if (TARGET_64BIT && (align <= 4 || count == 0))
13220 	{
13221 	  rtx label = ix86_expand_aligntest (countreg, 4);
13222 	  emit_insn (gen_strset (destreg, dst,
13223 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13224 	  emit_label (label);
13225 	  LABEL_NUSES (label) = 1;
13226 	}
13227       if (align > 2 && count != 0 && (count & 2))
13228 	emit_insn (gen_strset (destreg, dst,
13229 			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
13230       if (align <= 2 || count == 0)
13231 	{
13232 	  rtx label = ix86_expand_aligntest (countreg, 2);
13233 	  emit_insn (gen_strset (destreg, dst,
13234 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13235 	  emit_label (label);
13236 	  LABEL_NUSES (label) = 1;
13237 	}
13238       if (align > 1 && count != 0 && (count & 1))
13239 	emit_insn (gen_strset (destreg, dst,
13240 			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
13241       if (align <= 1 || count == 0)
13242 	{
13243 	  rtx label = ix86_expand_aligntest (countreg, 1);
13244 	  emit_insn (gen_strset (destreg, dst,
13245 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13246 	  emit_label (label);
13247 	  LABEL_NUSES (label) = 1;
13248 	}
13249     }
13250   return 1;
13251 }
13252 
13253 /* Expand strlen.  */
13254 int
13255 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13256 {
13257   rtx addr, scratch1, scratch2, scratch3, scratch4;
13258 
13259   /* The generic case of strlen expander is long.  Avoid it's
13260      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13261 
13262   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13263       && !TARGET_INLINE_ALL_STRINGOPS
13264       && !optimize_size
13265       && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13266     return 0;
13267 
13268   addr = force_reg (Pmode, XEXP (src, 0));
13269   scratch1 = gen_reg_rtx (Pmode);
13270 
13271   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13272       && !optimize_size)
13273     {
13274       /* Well it seems that some optimizer does not combine a call like
13275          foo(strlen(bar), strlen(bar));
13276          when the move and the subtraction is done here.  It does calculate
13277          the length just once when these instructions are done inside of
13278          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13279          often used and I use one fewer register for the lifetime of
13280          output_strlen_unroll() this is better.  */
13281 
13282       emit_move_insn (out, addr);
13283 
13284       ix86_expand_strlensi_unroll_1 (out, src, align);
13285 
13286       /* strlensi_unroll_1 returns the address of the zero at the end of
13287          the string, like memchr(), so compute the length by subtracting
13288          the start address.  */
13289       if (TARGET_64BIT)
13290 	emit_insn (gen_subdi3 (out, out, addr));
13291       else
13292 	emit_insn (gen_subsi3 (out, out, addr));
13293     }
13294   else
13295     {
13296       rtx unspec;
13297       scratch2 = gen_reg_rtx (Pmode);
13298       scratch3 = gen_reg_rtx (Pmode);
13299       scratch4 = force_reg (Pmode, constm1_rtx);
13300 
13301       emit_move_insn (scratch3, addr);
13302       eoschar = force_reg (QImode, eoschar);
13303 
13304       emit_insn (gen_cld ());
13305       src = replace_equiv_address_nv (src, scratch3);
13306 
13307       /* If .md starts supporting :P, this can be done in .md.  */
13308       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13309 						 scratch4), UNSPEC_SCAS);
13310       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13311       if (TARGET_64BIT)
13312 	{
13313 	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13314 	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13315 	}
13316       else
13317 	{
13318 	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13319 	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13320 	}
13321     }
13322   return 1;
13323 }
13324 
13325 /* Expand the appropriate insns for doing strlen if not just doing
13326    repnz; scasb
13327 
13328    out = result, initialized with the start address
13329    align_rtx = alignment of the address.
13330    scratch = scratch register, initialized with the startaddress when
13331 	not aligned, otherwise undefined
13332 
13333    This is just the body. It needs the initializations mentioned above and
13334    some address computing at the end.  These things are done in i386.md.  */
13335 
13336 static void
13337 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13338 {
13339   int align;
13340   rtx tmp;
13341   rtx align_2_label = NULL_RTX;
13342   rtx align_3_label = NULL_RTX;
13343   rtx align_4_label = gen_label_rtx ();
13344   rtx end_0_label = gen_label_rtx ();
13345   rtx mem;
13346   rtx tmpreg = gen_reg_rtx (SImode);
13347   rtx scratch = gen_reg_rtx (SImode);
13348   rtx cmp;
13349 
13350   align = 0;
13351   if (GET_CODE (align_rtx) == CONST_INT)
13352     align = INTVAL (align_rtx);
13353 
13354   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13355 
13356   /* Is there a known alignment and is it less than 4?  */
13357   if (align < 4)
13358     {
13359       rtx scratch1 = gen_reg_rtx (Pmode);
13360       emit_move_insn (scratch1, out);
13361       /* Is there a known alignment and is it not 2? */
13362       if (align != 2)
13363 	{
13364 	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13365 	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13366 
13367 	  /* Leave just the 3 lower bits.  */
13368 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13369 				    NULL_RTX, 0, OPTAB_WIDEN);
13370 
13371 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13372 				   Pmode, 1, align_4_label);
13373 	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13374 				   Pmode, 1, align_2_label);
13375 	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13376 				   Pmode, 1, align_3_label);
13377 	}
13378       else
13379         {
13380 	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
13381 	     check if is aligned to 4 - byte.  */
13382 
13383 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13384 				    NULL_RTX, 0, OPTAB_WIDEN);
13385 
13386 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13387 				   Pmode, 1, align_4_label);
13388         }
13389 
13390       mem = change_address (src, QImode, out);
13391 
13392       /* Now compare the bytes.  */
13393 
13394       /* Compare the first n unaligned byte on a byte per byte basis.  */
13395       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13396 			       QImode, 1, end_0_label);
13397 
13398       /* Increment the address.  */
13399       if (TARGET_64BIT)
13400 	emit_insn (gen_adddi3 (out, out, const1_rtx));
13401       else
13402 	emit_insn (gen_addsi3 (out, out, const1_rtx));
13403 
13404       /* Not needed with an alignment of 2 */
13405       if (align != 2)
13406 	{
13407 	  emit_label (align_2_label);
13408 
13409 	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13410 				   end_0_label);
13411 
13412 	  if (TARGET_64BIT)
13413 	    emit_insn (gen_adddi3 (out, out, const1_rtx));
13414 	  else
13415 	    emit_insn (gen_addsi3 (out, out, const1_rtx));
13416 
13417 	  emit_label (align_3_label);
13418 	}
13419 
13420       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13421 			       end_0_label);
13422 
13423       if (TARGET_64BIT)
13424 	emit_insn (gen_adddi3 (out, out, const1_rtx));
13425       else
13426 	emit_insn (gen_addsi3 (out, out, const1_rtx));
13427     }
13428 
13429   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13430      align this loop.  It gives only huge programs, but does not help to
13431      speed up.  */
13432   emit_label (align_4_label);
13433 
13434   mem = change_address (src, SImode, out);
13435   emit_move_insn (scratch, mem);
13436   if (TARGET_64BIT)
13437     emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13438   else
13439     emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13440 
13441   /* This formula yields a nonzero result iff one of the bytes is zero.
13442      This saves three branches inside loop and many cycles.  */
13443 
13444   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13445   emit_insn (gen_one_cmplsi2 (scratch, scratch));
13446   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13447   emit_insn (gen_andsi3 (tmpreg, tmpreg,
13448 			 gen_int_mode (0x80808080, SImode)));
13449   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13450 			   align_4_label);
13451 
13452   if (TARGET_CMOVE)
13453     {
13454        rtx reg = gen_reg_rtx (SImode);
13455        rtx reg2 = gen_reg_rtx (Pmode);
13456        emit_move_insn (reg, tmpreg);
13457        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13458 
13459        /* If zero is not in the first two bytes, move two bytes forward.  */
13460        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13461        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13462        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13463        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13464 			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
13465 						     reg,
13466 						     tmpreg)));
13467        /* Emit lea manually to avoid clobbering of flags.  */
13468        emit_insn (gen_rtx_SET (SImode, reg2,
13469 			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
13470 
13471        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13472        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13473        emit_insn (gen_rtx_SET (VOIDmode, out,
13474 			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13475 						     reg2,
13476 						     out)));
13477 
13478     }
13479   else
13480     {
13481        rtx end_2_label = gen_label_rtx ();
13482        /* Is zero in the first two bytes? */
13483 
13484        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13485        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13486        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13487        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13488                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13489                             pc_rtx);
13490        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13491        JUMP_LABEL (tmp) = end_2_label;
13492 
13493        /* Not in the first two.  Move two bytes forward.  */
13494        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13495        if (TARGET_64BIT)
13496 	 emit_insn (gen_adddi3 (out, out, const2_rtx));
13497        else
13498 	 emit_insn (gen_addsi3 (out, out, const2_rtx));
13499 
13500        emit_label (end_2_label);
13501 
13502     }
13503 
13504   /* Avoid branch in fixing the byte.  */
13505   tmpreg = gen_lowpart (QImode, tmpreg);
13506   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13507   cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13508   if (TARGET_64BIT)
13509     emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13510   else
13511     emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13512 
13513   emit_label (end_0_label);
13514 }
13515 
13516 void
13517 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13518 		  rtx callarg2 ATTRIBUTE_UNUSED,
13519 		  rtx pop, int sibcall)
13520 {
13521   rtx use = NULL, call;
13522 
13523   if (pop == const0_rtx)
13524     pop = NULL;
13525   gcc_assert (!TARGET_64BIT || !pop);
13526 
13527   if (TARGET_MACHO && !TARGET_64BIT)
13528     {
13529 #if TARGET_MACHO
13530       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13531 	fnaddr = machopic_indirect_call_target (fnaddr);
13532 #endif
13533     }
13534   else
13535     {
13536       /* Static functions and indirect calls don't need the pic register.  */
13537       if (! TARGET_64BIT && flag_pic
13538 	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13539 	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13540 	use_reg (&use, pic_offset_table_rtx);
13541     }
13542 
13543   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13544     {
13545       rtx al = gen_rtx_REG (QImode, 0);
13546       emit_move_insn (al, callarg2);
13547       use_reg (&use, al);
13548     }
13549 
13550   if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13551     {
13552       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13553       fnaddr = gen_rtx_MEM (QImode, fnaddr);
13554     }
13555   if (sibcall && TARGET_64BIT
13556       && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13557     {
13558       rtx addr;
13559       addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13560       fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13561       emit_move_insn (fnaddr, addr);
13562       fnaddr = gen_rtx_MEM (QImode, fnaddr);
13563     }
13564 
13565   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13566   if (retval)
13567     call = gen_rtx_SET (VOIDmode, retval, call);
13568   if (pop)
13569     {
13570       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13571       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13572       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13573     }
13574 
13575   call = emit_call_insn (call);
13576   if (use)
13577     CALL_INSN_FUNCTION_USAGE (call) = use;
13578 }
13579 
13580 
13581 /* Clear stack slot assignments remembered from previous functions.
13582    This is called from INIT_EXPANDERS once before RTL is emitted for each
13583    function.  */
13584 
13585 static struct machine_function *
13586 ix86_init_machine_status (void)
13587 {
13588   struct machine_function *f;
13589 
13590   f = ggc_alloc_cleared (sizeof (struct machine_function));
13591   f->use_fast_prologue_epilogue_nregs = -1;
13592   f->tls_descriptor_call_expanded_p = 0;
13593 
13594   return f;
13595 }
13596 
13597 /* Return a MEM corresponding to a stack slot with mode MODE.
13598    Allocate a new slot if necessary.
13599 
13600    The RTL for a function can have several slots available: N is
13601    which slot to use.  */
13602 
13603 rtx
13604 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13605 {
13606   struct stack_local_entry *s;
13607 
13608   gcc_assert (n < MAX_386_STACK_LOCALS);
13609 
13610   /* Virtual slot is valid only before vregs are instantiated.  */
13611   gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13612 
13613   for (s = ix86_stack_locals; s; s = s->next)
13614     if (s->mode == mode && s->n == n)
13615       return s->rtl;
13616 
13617   s = (struct stack_local_entry *)
13618     ggc_alloc (sizeof (struct stack_local_entry));
13619   s->n = n;
13620   s->mode = mode;
13621   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13622 
13623   s->next = ix86_stack_locals;
13624   ix86_stack_locals = s;
13625   return s->rtl;
13626 }
13627 
13628 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
13629 
13630 static GTY(()) rtx ix86_tls_symbol;
13631 rtx
13632 ix86_tls_get_addr (void)
13633 {
13634 
13635   if (!ix86_tls_symbol)
13636     {
13637       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13638 					    (TARGET_ANY_GNU_TLS
13639 					     && !TARGET_64BIT)
13640 					    ? "___tls_get_addr"
13641 					    : "__tls_get_addr");
13642     }
13643 
13644   return ix86_tls_symbol;
13645 }
13646 
13647 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13648 
13649 static GTY(()) rtx ix86_tls_module_base_symbol;
13650 rtx
13651 ix86_tls_module_base (void)
13652 {
13653 
13654   if (!ix86_tls_module_base_symbol)
13655     {
13656       ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13657 							"_TLS_MODULE_BASE_");
13658       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13659 	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13660     }
13661 
13662   return ix86_tls_module_base_symbol;
13663 }
13664 
13665 /* Calculate the length of the memory address in the instruction
13666    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13667 
13668 int
13669 memory_address_length (rtx addr)
13670 {
13671   struct ix86_address parts;
13672   rtx base, index, disp;
13673   int len;
13674   int ok;
13675 
13676   if (GET_CODE (addr) == PRE_DEC
13677       || GET_CODE (addr) == POST_INC
13678       || GET_CODE (addr) == PRE_MODIFY
13679       || GET_CODE (addr) == POST_MODIFY)
13680     return 0;
13681 
13682   ok = ix86_decompose_address (addr, &parts);
13683   gcc_assert (ok);
13684 
13685   if (parts.base && GET_CODE (parts.base) == SUBREG)
13686     parts.base = SUBREG_REG (parts.base);
13687   if (parts.index && GET_CODE (parts.index) == SUBREG)
13688     parts.index = SUBREG_REG (parts.index);
13689 
13690   base = parts.base;
13691   index = parts.index;
13692   disp = parts.disp;
13693   len = 0;
13694 
13695   /* Rule of thumb:
13696        - esp as the base always wants an index,
13697        - ebp as the base always wants a displacement.  */
13698 
13699   /* Register Indirect.  */
13700   if (base && !index && !disp)
13701     {
13702       /* esp (for its index) and ebp (for its displacement) need
13703 	 the two-byte modrm form.  */
13704       if (addr == stack_pointer_rtx
13705 	  || addr == arg_pointer_rtx
13706 	  || addr == frame_pointer_rtx
13707 	  || addr == hard_frame_pointer_rtx)
13708 	len = 1;
13709     }
13710 
13711   /* Direct Addressing.  */
13712   else if (disp && !base && !index)
13713     len = 4;
13714 
13715   else
13716     {
13717       /* Find the length of the displacement constant.  */
13718       if (disp)
13719 	{
13720 	  if (base && satisfies_constraint_K (disp))
13721 	    len = 1;
13722 	  else
13723 	    len = 4;
13724 	}
13725       /* ebp always wants a displacement.  */
13726       else if (base == hard_frame_pointer_rtx)
13727         len = 1;
13728 
13729       /* An index requires the two-byte modrm form....  */
13730       if (index
13731 	  /* ...like esp, which always wants an index.  */
13732 	  || base == stack_pointer_rtx
13733 	  || base == arg_pointer_rtx
13734 	  || base == frame_pointer_rtx)
13735 	len += 1;
13736     }
13737 
13738   return len;
13739 }
13740 
13741 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
13742    is set, expect that insn have 8bit immediate alternative.  */
13743 int
13744 ix86_attr_length_immediate_default (rtx insn, int shortform)
13745 {
13746   int len = 0;
13747   int i;
13748   extract_insn_cached (insn);
13749   for (i = recog_data.n_operands - 1; i >= 0; --i)
13750     if (CONSTANT_P (recog_data.operand[i]))
13751       {
13752 	gcc_assert (!len);
13753 	if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13754 	  len = 1;
13755 	else
13756 	  {
13757 	    switch (get_attr_mode (insn))
13758 	      {
13759 		case MODE_QI:
13760 		  len+=1;
13761 		  break;
13762 		case MODE_HI:
13763 		  len+=2;
13764 		  break;
13765 		case MODE_SI:
13766 		  len+=4;
13767 		  break;
13768 		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13769 		case MODE_DI:
13770 		  len+=4;
13771 		  break;
13772 		default:
13773 		  fatal_insn ("unknown insn mode", insn);
13774 	      }
13775 	  }
13776       }
13777   return len;
13778 }
13779 /* Compute default value for "length_address" attribute.  */
13780 int
13781 ix86_attr_length_address_default (rtx insn)
13782 {
13783   int i;
13784 
13785   if (get_attr_type (insn) == TYPE_LEA)
13786     {
13787       rtx set = PATTERN (insn);
13788 
13789       if (GET_CODE (set) == PARALLEL)
13790 	set = XVECEXP (set, 0, 0);
13791 
13792       gcc_assert (GET_CODE (set) == SET);
13793 
13794       return memory_address_length (SET_SRC (set));
13795     }
13796 
13797   extract_insn_cached (insn);
13798   for (i = recog_data.n_operands - 1; i >= 0; --i)
13799     if (GET_CODE (recog_data.operand[i]) == MEM)
13800       {
13801 	return memory_address_length (XEXP (recog_data.operand[i], 0));
13802 	break;
13803       }
13804   return 0;
13805 }
13806 
13807 /* Return the maximum number of instructions a cpu can issue.  */
13808 
13809 static int
13810 ix86_issue_rate (void)
13811 {
13812   switch (ix86_tune)
13813     {
13814     case PROCESSOR_PENTIUM:
13815     case PROCESSOR_K6:
13816       return 2;
13817 
13818     case PROCESSOR_PENTIUMPRO:
13819     case PROCESSOR_PENTIUM4:
13820     case PROCESSOR_ATHLON:
13821     case PROCESSOR_K8:
13822     case PROCESSOR_NOCONA:
13823     case PROCESSOR_GENERIC32:
13824     case PROCESSOR_GENERIC64:
13825       return 3;
13826 
13827     default:
13828       return 1;
13829     }
13830 }
13831 
13832 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13833    by DEP_INSN and nothing set by DEP_INSN.  */
13834 
13835 static int
13836 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13837 {
13838   rtx set, set2;
13839 
13840   /* Simplify the test for uninteresting insns.  */
13841   if (insn_type != TYPE_SETCC
13842       && insn_type != TYPE_ICMOV
13843       && insn_type != TYPE_FCMOV
13844       && insn_type != TYPE_IBR)
13845     return 0;
13846 
13847   if ((set = single_set (dep_insn)) != 0)
13848     {
13849       set = SET_DEST (set);
13850       set2 = NULL_RTX;
13851     }
13852   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13853 	   && XVECLEN (PATTERN (dep_insn), 0) == 2
13854 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13855 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13856     {
13857       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13858       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13859     }
13860   else
13861     return 0;
13862 
13863   if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13864     return 0;
13865 
13866   /* This test is true if the dependent insn reads the flags but
13867      not any other potentially set register.  */
13868   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13869     return 0;
13870 
13871   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13872     return 0;
13873 
13874   return 1;
13875 }
13876 
13877 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13878    address with operands set by DEP_INSN.  */
13879 
13880 static int
13881 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13882 {
13883   rtx addr;
13884 
13885   if (insn_type == TYPE_LEA
13886       && TARGET_PENTIUM)
13887     {
13888       addr = PATTERN (insn);
13889 
13890       if (GET_CODE (addr) == PARALLEL)
13891 	addr = XVECEXP (addr, 0, 0);
13892 
13893       gcc_assert (GET_CODE (addr) == SET);
13894 
13895       addr = SET_SRC (addr);
13896     }
13897   else
13898     {
13899       int i;
13900       extract_insn_cached (insn);
13901       for (i = recog_data.n_operands - 1; i >= 0; --i)
13902 	if (GET_CODE (recog_data.operand[i]) == MEM)
13903 	  {
13904 	    addr = XEXP (recog_data.operand[i], 0);
13905 	    goto found;
13906 	  }
13907       return 0;
13908     found:;
13909     }
13910 
13911   return modified_in_p (addr, dep_insn);
13912 }
13913 
13914 static int
13915 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13916 {
13917   enum attr_type insn_type, dep_insn_type;
13918   enum attr_memory memory;
13919   rtx set, set2;
13920   int dep_insn_code_number;
13921 
13922   /* Anti and output dependencies have zero cost on all CPUs.  */
13923   if (REG_NOTE_KIND (link) != 0)
13924     return 0;
13925 
13926   dep_insn_code_number = recog_memoized (dep_insn);
13927 
13928   /* If we can't recognize the insns, we can't really do anything.  */
13929   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13930     return cost;
13931 
13932   insn_type = get_attr_type (insn);
13933   dep_insn_type = get_attr_type (dep_insn);
13934 
13935   switch (ix86_tune)
13936     {
13937     case PROCESSOR_PENTIUM:
13938       /* Address Generation Interlock adds a cycle of latency.  */
13939       if (ix86_agi_dependent (insn, dep_insn, insn_type))
13940 	cost += 1;
13941 
13942       /* ??? Compares pair with jump/setcc.  */
13943       if (ix86_flags_dependent (insn, dep_insn, insn_type))
13944 	cost = 0;
13945 
13946       /* Floating point stores require value to be ready one cycle earlier.  */
13947       if (insn_type == TYPE_FMOV
13948 	  && get_attr_memory (insn) == MEMORY_STORE
13949 	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13950 	cost += 1;
13951       break;
13952 
13953     case PROCESSOR_PENTIUMPRO:
13954       memory = get_attr_memory (insn);
13955 
13956       /* INT->FP conversion is expensive.  */
13957       if (get_attr_fp_int_src (dep_insn))
13958 	cost += 5;
13959 
13960       /* There is one cycle extra latency between an FP op and a store.  */
13961       if (insn_type == TYPE_FMOV
13962 	  && (set = single_set (dep_insn)) != NULL_RTX
13963 	  && (set2 = single_set (insn)) != NULL_RTX
13964 	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13965 	  && GET_CODE (SET_DEST (set2)) == MEM)
13966 	cost += 1;
13967 
13968       /* Show ability of reorder buffer to hide latency of load by executing
13969 	 in parallel with previous instruction in case
13970 	 previous instruction is not needed to compute the address.  */
13971       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13972 	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13973 	{
13974 	  /* Claim moves to take one cycle, as core can issue one load
13975 	     at time and the next load can start cycle later.  */
13976 	  if (dep_insn_type == TYPE_IMOV
13977 	      || dep_insn_type == TYPE_FMOV)
13978 	    cost = 1;
13979 	  else if (cost > 1)
13980 	    cost--;
13981 	}
13982       break;
13983 
13984     case PROCESSOR_K6:
13985       memory = get_attr_memory (insn);
13986 
13987       /* The esp dependency is resolved before the instruction is really
13988          finished.  */
13989       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13990 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13991 	return 1;
13992 
13993       /* INT->FP conversion is expensive.  */
13994       if (get_attr_fp_int_src (dep_insn))
13995 	cost += 5;
13996 
13997       /* Show ability of reorder buffer to hide latency of load by executing
13998 	 in parallel with previous instruction in case
13999 	 previous instruction is not needed to compute the address.  */
14000       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14001 	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14002 	{
14003 	  /* Claim moves to take one cycle, as core can issue one load
14004 	     at time and the next load can start cycle later.  */
14005 	  if (dep_insn_type == TYPE_IMOV
14006 	      || dep_insn_type == TYPE_FMOV)
14007 	    cost = 1;
14008 	  else if (cost > 2)
14009 	    cost -= 2;
14010 	  else
14011 	    cost = 1;
14012 	}
14013       break;
14014 
14015     case PROCESSOR_ATHLON:
14016     case PROCESSOR_K8:
14017     case PROCESSOR_GENERIC32:
14018     case PROCESSOR_GENERIC64:
14019       memory = get_attr_memory (insn);
14020 
14021       /* Show ability of reorder buffer to hide latency of load by executing
14022 	 in parallel with previous instruction in case
14023 	 previous instruction is not needed to compute the address.  */
14024       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14025 	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14026 	{
14027 	  enum attr_unit unit = get_attr_unit (insn);
14028 	  int loadcost = 3;
14029 
14030 	  /* Because of the difference between the length of integer and
14031 	     floating unit pipeline preparation stages, the memory operands
14032 	     for floating point are cheaper.
14033 
14034 	     ??? For Athlon it the difference is most probably 2.  */
14035 	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14036 	    loadcost = 3;
14037 	  else
14038 	    loadcost = TARGET_ATHLON ? 2 : 0;
14039 
14040 	  if (cost >= loadcost)
14041 	    cost -= loadcost;
14042 	  else
14043 	    cost = 0;
14044 	}
14045 
14046     default:
14047       break;
14048     }
14049 
14050   return cost;
14051 }
14052 
14053 /* How many alternative schedules to try.  This should be as wide as the
14054    scheduling freedom in the DFA, but no wider.  Making this value too
14055    large results extra work for the scheduler.  */
14056 
14057 static int
14058 ia32_multipass_dfa_lookahead (void)
14059 {
14060   if (ix86_tune == PROCESSOR_PENTIUM)
14061     return 2;
14062 
14063   if (ix86_tune == PROCESSOR_PENTIUMPRO
14064       || ix86_tune == PROCESSOR_K6)
14065     return 1;
14066 
14067   else
14068     return 0;
14069 }
14070 
14071 
14072 /* Compute the alignment given to a constant that is being placed in memory.
14073    EXP is the constant and ALIGN is the alignment that the object would
14074    ordinarily have.
14075    The value of this function is used instead of that alignment to align
14076    the object.  */
14077 
14078 int
14079 ix86_constant_alignment (tree exp, int align)
14080 {
14081   if (TREE_CODE (exp) == REAL_CST)
14082     {
14083       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14084 	return 64;
14085       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14086 	return 128;
14087     }
14088   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14089 	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14090     return BITS_PER_WORD;
14091 
14092   return align;
14093 }
14094 
14095 /* Compute the alignment for a static variable.
14096    TYPE is the data type, and ALIGN is the alignment that
14097    the object would ordinarily have.  The value of this function is used
14098    instead of that alignment to align the object.  */
14099 
14100 int
14101 ix86_data_alignment (tree type, int align)
14102 {
14103   int max_align = optimize_size ? BITS_PER_WORD : 256;
14104 
14105   if (AGGREGATE_TYPE_P (type)
14106       && TYPE_SIZE (type)
14107       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14108       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14109 	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14110       && align < max_align)
14111     align = max_align;
14112 
14113   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14114      to 16byte boundary.  */
14115   if (TARGET_64BIT)
14116     {
14117       if (AGGREGATE_TYPE_P (type)
14118 	   && TYPE_SIZE (type)
14119 	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14120 	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14121 	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14122 	return 128;
14123     }
14124 
14125   if (TREE_CODE (type) == ARRAY_TYPE)
14126     {
14127       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14128 	return 64;
14129       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14130 	return 128;
14131     }
14132   else if (TREE_CODE (type) == COMPLEX_TYPE)
14133     {
14134 
14135       if (TYPE_MODE (type) == DCmode && align < 64)
14136 	return 64;
14137       if (TYPE_MODE (type) == XCmode && align < 128)
14138 	return 128;
14139     }
14140   else if ((TREE_CODE (type) == RECORD_TYPE
14141 	    || TREE_CODE (type) == UNION_TYPE
14142 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14143 	   && TYPE_FIELDS (type))
14144     {
14145       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14146 	return 64;
14147       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14148 	return 128;
14149     }
14150   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14151 	   || TREE_CODE (type) == INTEGER_TYPE)
14152     {
14153       if (TYPE_MODE (type) == DFmode && align < 64)
14154 	return 64;
14155       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14156 	return 128;
14157     }
14158 
14159   return align;
14160 }
14161 
14162 /* Compute the alignment for a local variable.
14163    TYPE is the data type, and ALIGN is the alignment that
14164    the object would ordinarily have.  The value of this macro is used
14165    instead of that alignment to align the object.  */
14166 
14167 int
14168 ix86_local_alignment (tree type, int align)
14169 {
14170   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14171      to 16byte boundary.  */
14172   if (TARGET_64BIT)
14173     {
14174       if (AGGREGATE_TYPE_P (type)
14175 	   && TYPE_SIZE (type)
14176 	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14177 	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14178 	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14179 	return 128;
14180     }
14181   if (TREE_CODE (type) == ARRAY_TYPE)
14182     {
14183       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14184 	return 64;
14185       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14186 	return 128;
14187     }
14188   else if (TREE_CODE (type) == COMPLEX_TYPE)
14189     {
14190       if (TYPE_MODE (type) == DCmode && align < 64)
14191 	return 64;
14192       if (TYPE_MODE (type) == XCmode && align < 128)
14193 	return 128;
14194     }
14195   else if ((TREE_CODE (type) == RECORD_TYPE
14196 	    || TREE_CODE (type) == UNION_TYPE
14197 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14198 	   && TYPE_FIELDS (type))
14199     {
14200       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14201 	return 64;
14202       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14203 	return 128;
14204     }
14205   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14206 	   || TREE_CODE (type) == INTEGER_TYPE)
14207     {
14208 
14209       if (TYPE_MODE (type) == DFmode && align < 64)
14210 	return 64;
14211       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14212 	return 128;
14213     }
14214   return align;
14215 }
14216 
14217 /* Emit RTL insns to initialize the variable parts of a trampoline.
14218    FNADDR is an RTX for the address of the function's pure code.
14219    CXT is an RTX for the static chain value for the function.  */
14220 void
14221 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14222 {
14223   if (!TARGET_64BIT)
14224     {
14225       /* Compute offset from the end of the jmp to the target function.  */
14226       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14227 			       plus_constant (tramp, 10),
14228 			       NULL_RTX, 1, OPTAB_DIRECT);
14229       emit_move_insn (gen_rtx_MEM (QImode, tramp),
14230 		      gen_int_mode (0xb9, QImode));
14231       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14232       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14233 		      gen_int_mode (0xe9, QImode));
14234       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14235     }
14236   else
14237     {
14238       int offset = 0;
14239       /* Try to load address using shorter movl instead of movabs.
14240          We may want to support movq for kernel mode, but kernel does not use
14241          trampolines at the moment.  */
14242       if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14243 	{
14244 	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
14245 	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14246 			  gen_int_mode (0xbb41, HImode));
14247 	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14248 			  gen_lowpart (SImode, fnaddr));
14249 	  offset += 6;
14250 	}
14251       else
14252 	{
14253 	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14254 			  gen_int_mode (0xbb49, HImode));
14255 	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14256 			  fnaddr);
14257 	  offset += 10;
14258 	}
14259       /* Load static chain using movabs to r10.  */
14260       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14261 		      gen_int_mode (0xba49, HImode));
14262       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14263 		      cxt);
14264       offset += 10;
14265       /* Jump to the r11 */
14266       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14267 		      gen_int_mode (0xff49, HImode));
14268       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14269 		      gen_int_mode (0xe3, QImode));
14270       offset += 3;
14271       gcc_assert (offset <= TRAMPOLINE_SIZE);
14272     }
14273 
14274 #ifdef ENABLE_EXECUTE_STACK
14275   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14276 		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14277 #endif
14278 }
14279 
14280 /* Codes for all the SSE/MMX builtins.  */
14281 enum ix86_builtins
14282 {
14283   IX86_BUILTIN_ADDPS,
14284   IX86_BUILTIN_ADDSS,
14285   IX86_BUILTIN_DIVPS,
14286   IX86_BUILTIN_DIVSS,
14287   IX86_BUILTIN_MULPS,
14288   IX86_BUILTIN_MULSS,
14289   IX86_BUILTIN_SUBPS,
14290   IX86_BUILTIN_SUBSS,
14291 
14292   IX86_BUILTIN_CMPEQPS,
14293   IX86_BUILTIN_CMPLTPS,
14294   IX86_BUILTIN_CMPLEPS,
14295   IX86_BUILTIN_CMPGTPS,
14296   IX86_BUILTIN_CMPGEPS,
14297   IX86_BUILTIN_CMPNEQPS,
14298   IX86_BUILTIN_CMPNLTPS,
14299   IX86_BUILTIN_CMPNLEPS,
14300   IX86_BUILTIN_CMPNGTPS,
14301   IX86_BUILTIN_CMPNGEPS,
14302   IX86_BUILTIN_CMPORDPS,
14303   IX86_BUILTIN_CMPUNORDPS,
14304   IX86_BUILTIN_CMPEQSS,
14305   IX86_BUILTIN_CMPLTSS,
14306   IX86_BUILTIN_CMPLESS,
14307   IX86_BUILTIN_CMPNEQSS,
14308   IX86_BUILTIN_CMPNLTSS,
14309   IX86_BUILTIN_CMPNLESS,
14310   IX86_BUILTIN_CMPNGTSS,
14311   IX86_BUILTIN_CMPNGESS,
14312   IX86_BUILTIN_CMPORDSS,
14313   IX86_BUILTIN_CMPUNORDSS,
14314 
14315   IX86_BUILTIN_COMIEQSS,
14316   IX86_BUILTIN_COMILTSS,
14317   IX86_BUILTIN_COMILESS,
14318   IX86_BUILTIN_COMIGTSS,
14319   IX86_BUILTIN_COMIGESS,
14320   IX86_BUILTIN_COMINEQSS,
14321   IX86_BUILTIN_UCOMIEQSS,
14322   IX86_BUILTIN_UCOMILTSS,
14323   IX86_BUILTIN_UCOMILESS,
14324   IX86_BUILTIN_UCOMIGTSS,
14325   IX86_BUILTIN_UCOMIGESS,
14326   IX86_BUILTIN_UCOMINEQSS,
14327 
14328   IX86_BUILTIN_CVTPI2PS,
14329   IX86_BUILTIN_CVTPS2PI,
14330   IX86_BUILTIN_CVTSI2SS,
14331   IX86_BUILTIN_CVTSI642SS,
14332   IX86_BUILTIN_CVTSS2SI,
14333   IX86_BUILTIN_CVTSS2SI64,
14334   IX86_BUILTIN_CVTTPS2PI,
14335   IX86_BUILTIN_CVTTSS2SI,
14336   IX86_BUILTIN_CVTTSS2SI64,
14337 
14338   IX86_BUILTIN_MAXPS,
14339   IX86_BUILTIN_MAXSS,
14340   IX86_BUILTIN_MINPS,
14341   IX86_BUILTIN_MINSS,
14342 
14343   IX86_BUILTIN_LOADUPS,
14344   IX86_BUILTIN_STOREUPS,
14345   IX86_BUILTIN_MOVSS,
14346 
14347   IX86_BUILTIN_MOVHLPS,
14348   IX86_BUILTIN_MOVLHPS,
14349   IX86_BUILTIN_LOADHPS,
14350   IX86_BUILTIN_LOADLPS,
14351   IX86_BUILTIN_STOREHPS,
14352   IX86_BUILTIN_STORELPS,
14353 
14354   IX86_BUILTIN_MASKMOVQ,
14355   IX86_BUILTIN_MOVMSKPS,
14356   IX86_BUILTIN_PMOVMSKB,
14357 
14358   IX86_BUILTIN_MOVNTPS,
14359   IX86_BUILTIN_MOVNTQ,
14360 
14361   IX86_BUILTIN_LOADDQU,
14362   IX86_BUILTIN_STOREDQU,
14363 
14364   IX86_BUILTIN_PACKSSWB,
14365   IX86_BUILTIN_PACKSSDW,
14366   IX86_BUILTIN_PACKUSWB,
14367 
14368   IX86_BUILTIN_PADDB,
14369   IX86_BUILTIN_PADDW,
14370   IX86_BUILTIN_PADDD,
14371   IX86_BUILTIN_PADDQ,
14372   IX86_BUILTIN_PADDSB,
14373   IX86_BUILTIN_PADDSW,
14374   IX86_BUILTIN_PADDUSB,
14375   IX86_BUILTIN_PADDUSW,
14376   IX86_BUILTIN_PSUBB,
14377   IX86_BUILTIN_PSUBW,
14378   IX86_BUILTIN_PSUBD,
14379   IX86_BUILTIN_PSUBQ,
14380   IX86_BUILTIN_PSUBSB,
14381   IX86_BUILTIN_PSUBSW,
14382   IX86_BUILTIN_PSUBUSB,
14383   IX86_BUILTIN_PSUBUSW,
14384 
14385   IX86_BUILTIN_PAND,
14386   IX86_BUILTIN_PANDN,
14387   IX86_BUILTIN_POR,
14388   IX86_BUILTIN_PXOR,
14389 
14390   IX86_BUILTIN_PAVGB,
14391   IX86_BUILTIN_PAVGW,
14392 
14393   IX86_BUILTIN_PCMPEQB,
14394   IX86_BUILTIN_PCMPEQW,
14395   IX86_BUILTIN_PCMPEQD,
14396   IX86_BUILTIN_PCMPGTB,
14397   IX86_BUILTIN_PCMPGTW,
14398   IX86_BUILTIN_PCMPGTD,
14399 
14400   IX86_BUILTIN_PMADDWD,
14401 
14402   IX86_BUILTIN_PMAXSW,
14403   IX86_BUILTIN_PMAXUB,
14404   IX86_BUILTIN_PMINSW,
14405   IX86_BUILTIN_PMINUB,
14406 
14407   IX86_BUILTIN_PMULHUW,
14408   IX86_BUILTIN_PMULHW,
14409   IX86_BUILTIN_PMULLW,
14410 
14411   IX86_BUILTIN_PSADBW,
14412   IX86_BUILTIN_PSHUFW,
14413 
14414   IX86_BUILTIN_PSLLW,
14415   IX86_BUILTIN_PSLLD,
14416   IX86_BUILTIN_PSLLQ,
14417   IX86_BUILTIN_PSRAW,
14418   IX86_BUILTIN_PSRAD,
14419   IX86_BUILTIN_PSRLW,
14420   IX86_BUILTIN_PSRLD,
14421   IX86_BUILTIN_PSRLQ,
14422   IX86_BUILTIN_PSLLWI,
14423   IX86_BUILTIN_PSLLDI,
14424   IX86_BUILTIN_PSLLQI,
14425   IX86_BUILTIN_PSRAWI,
14426   IX86_BUILTIN_PSRADI,
14427   IX86_BUILTIN_PSRLWI,
14428   IX86_BUILTIN_PSRLDI,
14429   IX86_BUILTIN_PSRLQI,
14430 
14431   IX86_BUILTIN_PUNPCKHBW,
14432   IX86_BUILTIN_PUNPCKHWD,
14433   IX86_BUILTIN_PUNPCKHDQ,
14434   IX86_BUILTIN_PUNPCKLBW,
14435   IX86_BUILTIN_PUNPCKLWD,
14436   IX86_BUILTIN_PUNPCKLDQ,
14437 
14438   IX86_BUILTIN_SHUFPS,
14439 
14440   IX86_BUILTIN_RCPPS,
14441   IX86_BUILTIN_RCPSS,
14442   IX86_BUILTIN_RSQRTPS,
14443   IX86_BUILTIN_RSQRTSS,
14444   IX86_BUILTIN_SQRTPS,
14445   IX86_BUILTIN_SQRTSS,
14446 
14447   IX86_BUILTIN_UNPCKHPS,
14448   IX86_BUILTIN_UNPCKLPS,
14449 
14450   IX86_BUILTIN_ANDPS,
14451   IX86_BUILTIN_ANDNPS,
14452   IX86_BUILTIN_ORPS,
14453   IX86_BUILTIN_XORPS,
14454 
14455   IX86_BUILTIN_EMMS,
14456   IX86_BUILTIN_LDMXCSR,
14457   IX86_BUILTIN_STMXCSR,
14458   IX86_BUILTIN_SFENCE,
14459 
14460   /* 3DNow! Original */
14461   IX86_BUILTIN_FEMMS,
14462   IX86_BUILTIN_PAVGUSB,
14463   IX86_BUILTIN_PF2ID,
14464   IX86_BUILTIN_PFACC,
14465   IX86_BUILTIN_PFADD,
14466   IX86_BUILTIN_PFCMPEQ,
14467   IX86_BUILTIN_PFCMPGE,
14468   IX86_BUILTIN_PFCMPGT,
14469   IX86_BUILTIN_PFMAX,
14470   IX86_BUILTIN_PFMIN,
14471   IX86_BUILTIN_PFMUL,
14472   IX86_BUILTIN_PFRCP,
14473   IX86_BUILTIN_PFRCPIT1,
14474   IX86_BUILTIN_PFRCPIT2,
14475   IX86_BUILTIN_PFRSQIT1,
14476   IX86_BUILTIN_PFRSQRT,
14477   IX86_BUILTIN_PFSUB,
14478   IX86_BUILTIN_PFSUBR,
14479   IX86_BUILTIN_PI2FD,
14480   IX86_BUILTIN_PMULHRW,
14481 
14482   /* 3DNow! Athlon Extensions */
14483   IX86_BUILTIN_PF2IW,
14484   IX86_BUILTIN_PFNACC,
14485   IX86_BUILTIN_PFPNACC,
14486   IX86_BUILTIN_PI2FW,
14487   IX86_BUILTIN_PSWAPDSI,
14488   IX86_BUILTIN_PSWAPDSF,
14489 
14490   /* SSE2 */
14491   IX86_BUILTIN_ADDPD,
14492   IX86_BUILTIN_ADDSD,
14493   IX86_BUILTIN_DIVPD,
14494   IX86_BUILTIN_DIVSD,
14495   IX86_BUILTIN_MULPD,
14496   IX86_BUILTIN_MULSD,
14497   IX86_BUILTIN_SUBPD,
14498   IX86_BUILTIN_SUBSD,
14499 
14500   IX86_BUILTIN_CMPEQPD,
14501   IX86_BUILTIN_CMPLTPD,
14502   IX86_BUILTIN_CMPLEPD,
14503   IX86_BUILTIN_CMPGTPD,
14504   IX86_BUILTIN_CMPGEPD,
14505   IX86_BUILTIN_CMPNEQPD,
14506   IX86_BUILTIN_CMPNLTPD,
14507   IX86_BUILTIN_CMPNLEPD,
14508   IX86_BUILTIN_CMPNGTPD,
14509   IX86_BUILTIN_CMPNGEPD,
14510   IX86_BUILTIN_CMPORDPD,
14511   IX86_BUILTIN_CMPUNORDPD,
14512   IX86_BUILTIN_CMPNEPD,
14513   IX86_BUILTIN_CMPEQSD,
14514   IX86_BUILTIN_CMPLTSD,
14515   IX86_BUILTIN_CMPLESD,
14516   IX86_BUILTIN_CMPNEQSD,
14517   IX86_BUILTIN_CMPNLTSD,
14518   IX86_BUILTIN_CMPNLESD,
14519   IX86_BUILTIN_CMPORDSD,
14520   IX86_BUILTIN_CMPUNORDSD,
14521   IX86_BUILTIN_CMPNESD,
14522 
14523   IX86_BUILTIN_COMIEQSD,
14524   IX86_BUILTIN_COMILTSD,
14525   IX86_BUILTIN_COMILESD,
14526   IX86_BUILTIN_COMIGTSD,
14527   IX86_BUILTIN_COMIGESD,
14528   IX86_BUILTIN_COMINEQSD,
14529   IX86_BUILTIN_UCOMIEQSD,
14530   IX86_BUILTIN_UCOMILTSD,
14531   IX86_BUILTIN_UCOMILESD,
14532   IX86_BUILTIN_UCOMIGTSD,
14533   IX86_BUILTIN_UCOMIGESD,
14534   IX86_BUILTIN_UCOMINEQSD,
14535 
14536   IX86_BUILTIN_MAXPD,
14537   IX86_BUILTIN_MAXSD,
14538   IX86_BUILTIN_MINPD,
14539   IX86_BUILTIN_MINSD,
14540 
14541   IX86_BUILTIN_ANDPD,
14542   IX86_BUILTIN_ANDNPD,
14543   IX86_BUILTIN_ORPD,
14544   IX86_BUILTIN_XORPD,
14545 
14546   IX86_BUILTIN_SQRTPD,
14547   IX86_BUILTIN_SQRTSD,
14548 
14549   IX86_BUILTIN_UNPCKHPD,
14550   IX86_BUILTIN_UNPCKLPD,
14551 
14552   IX86_BUILTIN_SHUFPD,
14553 
14554   IX86_BUILTIN_LOADUPD,
14555   IX86_BUILTIN_STOREUPD,
14556   IX86_BUILTIN_MOVSD,
14557 
14558   IX86_BUILTIN_LOADHPD,
14559   IX86_BUILTIN_LOADLPD,
14560 
14561   IX86_BUILTIN_CVTDQ2PD,
14562   IX86_BUILTIN_CVTDQ2PS,
14563 
14564   IX86_BUILTIN_CVTPD2DQ,
14565   IX86_BUILTIN_CVTPD2PI,
14566   IX86_BUILTIN_CVTPD2PS,
14567   IX86_BUILTIN_CVTTPD2DQ,
14568   IX86_BUILTIN_CVTTPD2PI,
14569 
14570   IX86_BUILTIN_CVTPI2PD,
14571   IX86_BUILTIN_CVTSI2SD,
14572   IX86_BUILTIN_CVTSI642SD,
14573 
14574   IX86_BUILTIN_CVTSD2SI,
14575   IX86_BUILTIN_CVTSD2SI64,
14576   IX86_BUILTIN_CVTSD2SS,
14577   IX86_BUILTIN_CVTSS2SD,
14578   IX86_BUILTIN_CVTTSD2SI,
14579   IX86_BUILTIN_CVTTSD2SI64,
14580 
14581   IX86_BUILTIN_CVTPS2DQ,
14582   IX86_BUILTIN_CVTPS2PD,
14583   IX86_BUILTIN_CVTTPS2DQ,
14584 
14585   IX86_BUILTIN_MOVNTI,
14586   IX86_BUILTIN_MOVNTPD,
14587   IX86_BUILTIN_MOVNTDQ,
14588 
14589   /* SSE2 MMX */
14590   IX86_BUILTIN_MASKMOVDQU,
14591   IX86_BUILTIN_MOVMSKPD,
14592   IX86_BUILTIN_PMOVMSKB128,
14593 
14594   IX86_BUILTIN_PACKSSWB128,
14595   IX86_BUILTIN_PACKSSDW128,
14596   IX86_BUILTIN_PACKUSWB128,
14597 
14598   IX86_BUILTIN_PADDB128,
14599   IX86_BUILTIN_PADDW128,
14600   IX86_BUILTIN_PADDD128,
14601   IX86_BUILTIN_PADDQ128,
14602   IX86_BUILTIN_PADDSB128,
14603   IX86_BUILTIN_PADDSW128,
14604   IX86_BUILTIN_PADDUSB128,
14605   IX86_BUILTIN_PADDUSW128,
14606   IX86_BUILTIN_PSUBB128,
14607   IX86_BUILTIN_PSUBW128,
14608   IX86_BUILTIN_PSUBD128,
14609   IX86_BUILTIN_PSUBQ128,
14610   IX86_BUILTIN_PSUBSB128,
14611   IX86_BUILTIN_PSUBSW128,
14612   IX86_BUILTIN_PSUBUSB128,
14613   IX86_BUILTIN_PSUBUSW128,
14614 
14615   IX86_BUILTIN_PAND128,
14616   IX86_BUILTIN_PANDN128,
14617   IX86_BUILTIN_POR128,
14618   IX86_BUILTIN_PXOR128,
14619 
14620   IX86_BUILTIN_PAVGB128,
14621   IX86_BUILTIN_PAVGW128,
14622 
14623   IX86_BUILTIN_PCMPEQB128,
14624   IX86_BUILTIN_PCMPEQW128,
14625   IX86_BUILTIN_PCMPEQD128,
14626   IX86_BUILTIN_PCMPGTB128,
14627   IX86_BUILTIN_PCMPGTW128,
14628   IX86_BUILTIN_PCMPGTD128,
14629 
14630   IX86_BUILTIN_PMADDWD128,
14631 
14632   IX86_BUILTIN_PMAXSW128,
14633   IX86_BUILTIN_PMAXUB128,
14634   IX86_BUILTIN_PMINSW128,
14635   IX86_BUILTIN_PMINUB128,
14636 
14637   IX86_BUILTIN_PMULUDQ,
14638   IX86_BUILTIN_PMULUDQ128,
14639   IX86_BUILTIN_PMULHUW128,
14640   IX86_BUILTIN_PMULHW128,
14641   IX86_BUILTIN_PMULLW128,
14642 
14643   IX86_BUILTIN_PSADBW128,
14644   IX86_BUILTIN_PSHUFHW,
14645   IX86_BUILTIN_PSHUFLW,
14646   IX86_BUILTIN_PSHUFD,
14647 
14648   IX86_BUILTIN_PSLLW128,
14649   IX86_BUILTIN_PSLLD128,
14650   IX86_BUILTIN_PSLLQ128,
14651   IX86_BUILTIN_PSRAW128,
14652   IX86_BUILTIN_PSRAD128,
14653   IX86_BUILTIN_PSRLW128,
14654   IX86_BUILTIN_PSRLD128,
14655   IX86_BUILTIN_PSRLQ128,
14656   IX86_BUILTIN_PSLLDQI128,
14657   IX86_BUILTIN_PSLLWI128,
14658   IX86_BUILTIN_PSLLDI128,
14659   IX86_BUILTIN_PSLLQI128,
14660   IX86_BUILTIN_PSRAWI128,
14661   IX86_BUILTIN_PSRADI128,
14662   IX86_BUILTIN_PSRLDQI128,
14663   IX86_BUILTIN_PSRLWI128,
14664   IX86_BUILTIN_PSRLDI128,
14665   IX86_BUILTIN_PSRLQI128,
14666 
14667   IX86_BUILTIN_PUNPCKHBW128,
14668   IX86_BUILTIN_PUNPCKHWD128,
14669   IX86_BUILTIN_PUNPCKHDQ128,
14670   IX86_BUILTIN_PUNPCKHQDQ128,
14671   IX86_BUILTIN_PUNPCKLBW128,
14672   IX86_BUILTIN_PUNPCKLWD128,
14673   IX86_BUILTIN_PUNPCKLDQ128,
14674   IX86_BUILTIN_PUNPCKLQDQ128,
14675 
14676   IX86_BUILTIN_CLFLUSH,
14677   IX86_BUILTIN_MFENCE,
14678   IX86_BUILTIN_LFENCE,
14679 
14680   /* Prescott New Instructions.  */
14681   IX86_BUILTIN_ADDSUBPS,
14682   IX86_BUILTIN_HADDPS,
14683   IX86_BUILTIN_HSUBPS,
14684   IX86_BUILTIN_MOVSHDUP,
14685   IX86_BUILTIN_MOVSLDUP,
14686   IX86_BUILTIN_ADDSUBPD,
14687   IX86_BUILTIN_HADDPD,
14688   IX86_BUILTIN_HSUBPD,
14689   IX86_BUILTIN_LDDQU,
14690 
14691   IX86_BUILTIN_MONITOR,
14692   IX86_BUILTIN_MWAIT,
14693 
14694   IX86_BUILTIN_VEC_INIT_V2SI,
14695   IX86_BUILTIN_VEC_INIT_V4HI,
14696   IX86_BUILTIN_VEC_INIT_V8QI,
14697   IX86_BUILTIN_VEC_EXT_V2DF,
14698   IX86_BUILTIN_VEC_EXT_V2DI,
14699   IX86_BUILTIN_VEC_EXT_V4SF,
14700   IX86_BUILTIN_VEC_EXT_V4SI,
14701   IX86_BUILTIN_VEC_EXT_V8HI,
14702   IX86_BUILTIN_VEC_EXT_V16QI,
14703   IX86_BUILTIN_VEC_EXT_V2SI,
14704   IX86_BUILTIN_VEC_EXT_V4HI,
14705   IX86_BUILTIN_VEC_SET_V8HI,
14706   IX86_BUILTIN_VEC_SET_V4HI,
14707 
14708   IX86_BUILTIN_MAX
14709 };
14710 
14711 #define def_builtin(MASK, NAME, TYPE, CODE)				\
14712 do {									\
14713   if ((MASK) & target_flags						\
14714       && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
14715     lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
14716 				 NULL, NULL_TREE);			\
14717 } while (0)
14718 
14719 /* Bits for builtin_description.flag.  */
14720 
14721 /* Set when we don't support the comparison natively, and should
14722    swap_comparison in order to support it.  */
14723 #define BUILTIN_DESC_SWAP_OPERANDS	1
14724 
14725 struct builtin_description
14726 {
14727   const unsigned int mask;
14728   const enum insn_code icode;
14729   const char *const name;
14730   const enum ix86_builtins code;
14731   const enum rtx_code comparison;
14732   const unsigned int flag;
14733 };
14734 
14735 static const struct builtin_description bdesc_comi[] =
14736 {
14737   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14738   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14739   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14740   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14741   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14742   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14743   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14744   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14745   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14746   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14747   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14748   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14749   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14750   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14751   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14752   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14753   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14754   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14755   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14756   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14757   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14758   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14759   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14760   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14761 };
14762 
14763 static const struct builtin_description bdesc_2arg[] =
14764 {
14765   /* SSE */
14766   { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14767   { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14768   { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14769   { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14770   { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14771   { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14772   { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14773   { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14774 
14775   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14776   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14777   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14778   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14779     BUILTIN_DESC_SWAP_OPERANDS },
14780   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14781     BUILTIN_DESC_SWAP_OPERANDS },
14782   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14783   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14784   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14785   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14786   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14787     BUILTIN_DESC_SWAP_OPERANDS },
14788   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14789     BUILTIN_DESC_SWAP_OPERANDS },
14790   { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14791   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14792   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14793   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14794   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14795   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14796   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14797   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14798   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14799     BUILTIN_DESC_SWAP_OPERANDS },
14800   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14801     BUILTIN_DESC_SWAP_OPERANDS },
14802   { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14803 
14804   { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14805   { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14806   { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14807   { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14808 
14809   { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14810   { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14811   { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14812   { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14813 
14814   { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14815   { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14816   { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14817   { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14818   { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14819 
14820   /* MMX */
14821   { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14822   { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14823   { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14824   { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14825   { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14826   { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14827   { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14828   { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14829 
14830   { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14831   { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14832   { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14833   { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14834   { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14835   { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14836   { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14837   { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14838 
14839   { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14840   { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14841   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14842 
14843   { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14844   { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14845   { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14846   { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14847 
14848   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14849   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14850 
14851   { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14852   { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14853   { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14854   { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14855   { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14856   { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14857 
14858   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14859   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14860   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14861   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14862 
14863   { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14864   { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14865   { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14866   { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14867   { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14868   { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14869 
14870   /* Special.  */
14871   { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14872   { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14873   { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14874 
14875   { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14876   { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14877   { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14878 
14879   { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14880   { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14881   { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14882   { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14883   { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14884   { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14885 
14886   { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14887   { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14888   { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14889   { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14890   { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14891   { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14892 
14893   { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14894   { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14895   { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14896   { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14897 
14898   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14899   { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14900 
14901   /* SSE2 */
14902   { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14903   { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14904   { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14905   { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14906   { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14907   { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14908   { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14909   { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14910 
14911   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14912   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14913   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14914   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14915     BUILTIN_DESC_SWAP_OPERANDS },
14916   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14917     BUILTIN_DESC_SWAP_OPERANDS },
14918   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14919   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14920   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14921   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14922   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14923     BUILTIN_DESC_SWAP_OPERANDS },
14924   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14925     BUILTIN_DESC_SWAP_OPERANDS },
14926   { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14927   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14928   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14929   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14930   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14931   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14932   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14933   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14934   { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14935 
14936   { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14937   { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14938   { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14939   { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14940 
14941   { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14942   { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14943   { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14944   { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14945 
14946   { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14947   { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14948   { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14949 
14950   /* SSE2 MMX */
14951   { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14952   { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14953   { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14954   { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14955   { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14956   { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14957   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14958   { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14959 
14960   { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14961   { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14962   { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14963   { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14964   { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14965   { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14966   { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14967   { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14968 
14969   { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14970   { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14971 
14972   { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14973   { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14974   { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14975   { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14976 
14977   { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14978   { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14979 
14980   { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14981   { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14982   { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14983   { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14984   { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14985   { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14986 
14987   { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14988   { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14989   { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14990   { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14991 
14992   { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14993   { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14994   { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14995   { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14996   { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14997   { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14998   { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14999   { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15000 
15001   { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15002   { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15003   { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15004 
15005   { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15006   { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15007 
15008   { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15009   { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15010 
15011   { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15012   { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15013   { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15014 
15015   { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15016   { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15017   { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15018 
15019   { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15020   { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15021 
15022   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15023 
15024   { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15025   { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15026   { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15027   { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15028 
15029   /* SSE3 MMX */
15030   { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15031   { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15032   { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15033   { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15034   { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15035   { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
15036 };
15037 
15038 static const struct builtin_description bdesc_1arg[] =
15039 {
15040   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15041   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15042 
15043   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15044   { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15045   { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15046 
15047   { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15048   { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15049   { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15050   { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15051   { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15052   { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15053 
15054   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15055   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15056 
15057   { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15058 
15059   { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15060   { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15061 
15062   { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15063   { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15064   { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15065   { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15066   { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15067 
15068   { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15069 
15070   { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15071   { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15072   { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15073   { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15074 
15075   { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15076   { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15077   { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15078 
15079   /* SSE3 */
15080   { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15081   { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15082 };
15083 
15084 static void
15085 ix86_init_builtins (void)
15086 {
15087   if (TARGET_MMX)
15088     ix86_init_mmx_sse_builtins ();
15089 }
15090 
15091 /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
15092    is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
15093    builtins.  */
15094 static void
15095 ix86_init_mmx_sse_builtins (void)
15096 {
15097   const struct builtin_description * d;
15098   size_t i;
15099 
15100   tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15101   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15102   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15103   tree V2DI_type_node
15104     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15105   tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15106   tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15107   tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15108   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15109   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15110   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15111 
15112   tree pchar_type_node = build_pointer_type (char_type_node);
15113   tree pcchar_type_node = build_pointer_type (
15114 			     build_type_variant (char_type_node, 1, 0));
15115   tree pfloat_type_node = build_pointer_type (float_type_node);
15116   tree pcfloat_type_node = build_pointer_type (
15117 			     build_type_variant (float_type_node, 1, 0));
15118   tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15119   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15120   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15121 
15122   /* Comparisons.  */
15123   tree int_ftype_v4sf_v4sf
15124     = build_function_type_list (integer_type_node,
15125 				V4SF_type_node, V4SF_type_node, NULL_TREE);
15126   tree v4si_ftype_v4sf_v4sf
15127     = build_function_type_list (V4SI_type_node,
15128 				V4SF_type_node, V4SF_type_node, NULL_TREE);
15129   /* MMX/SSE/integer conversions.  */
15130   tree int_ftype_v4sf
15131     = build_function_type_list (integer_type_node,
15132 				V4SF_type_node, NULL_TREE);
15133   tree int64_ftype_v4sf
15134     = build_function_type_list (long_long_integer_type_node,
15135 				V4SF_type_node, NULL_TREE);
15136   tree int_ftype_v8qi
15137     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15138   tree v4sf_ftype_v4sf_int
15139     = build_function_type_list (V4SF_type_node,
15140 				V4SF_type_node, integer_type_node, NULL_TREE);
15141   tree v4sf_ftype_v4sf_int64
15142     = build_function_type_list (V4SF_type_node,
15143 				V4SF_type_node, long_long_integer_type_node,
15144 				NULL_TREE);
15145   tree v4sf_ftype_v4sf_v2si
15146     = build_function_type_list (V4SF_type_node,
15147 				V4SF_type_node, V2SI_type_node, NULL_TREE);
15148 
15149   /* Miscellaneous.  */
15150   tree v8qi_ftype_v4hi_v4hi
15151     = build_function_type_list (V8QI_type_node,
15152 				V4HI_type_node, V4HI_type_node, NULL_TREE);
15153   tree v4hi_ftype_v2si_v2si
15154     = build_function_type_list (V4HI_type_node,
15155 				V2SI_type_node, V2SI_type_node, NULL_TREE);
15156   tree v4sf_ftype_v4sf_v4sf_int
15157     = build_function_type_list (V4SF_type_node,
15158 				V4SF_type_node, V4SF_type_node,
15159 				integer_type_node, NULL_TREE);
15160   tree v2si_ftype_v4hi_v4hi
15161     = build_function_type_list (V2SI_type_node,
15162 				V4HI_type_node, V4HI_type_node, NULL_TREE);
15163   tree v4hi_ftype_v4hi_int
15164     = build_function_type_list (V4HI_type_node,
15165 				V4HI_type_node, integer_type_node, NULL_TREE);
15166   tree v4hi_ftype_v4hi_di
15167     = build_function_type_list (V4HI_type_node,
15168 				V4HI_type_node, long_long_unsigned_type_node,
15169 				NULL_TREE);
15170   tree v2si_ftype_v2si_di
15171     = build_function_type_list (V2SI_type_node,
15172 				V2SI_type_node, long_long_unsigned_type_node,
15173 				NULL_TREE);
15174   tree void_ftype_void
15175     = build_function_type (void_type_node, void_list_node);
15176   tree void_ftype_unsigned
15177     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15178   tree void_ftype_unsigned_unsigned
15179     = build_function_type_list (void_type_node, unsigned_type_node,
15180 				unsigned_type_node, NULL_TREE);
15181   tree void_ftype_pcvoid_unsigned_unsigned
15182     = build_function_type_list (void_type_node, const_ptr_type_node,
15183 				unsigned_type_node, unsigned_type_node,
15184 				NULL_TREE);
15185   tree unsigned_ftype_void
15186     = build_function_type (unsigned_type_node, void_list_node);
15187   tree v2si_ftype_v4sf
15188     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15189   /* Loads/stores.  */
15190   tree void_ftype_v8qi_v8qi_pchar
15191     = build_function_type_list (void_type_node,
15192 				V8QI_type_node, V8QI_type_node,
15193 				pchar_type_node, NULL_TREE);
15194   tree v4sf_ftype_pcfloat
15195     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15196   /* @@@ the type is bogus */
15197   tree v4sf_ftype_v4sf_pv2si
15198     = build_function_type_list (V4SF_type_node,
15199 				V4SF_type_node, pv2si_type_node, NULL_TREE);
15200   tree void_ftype_pv2si_v4sf
15201     = build_function_type_list (void_type_node,
15202 				pv2si_type_node, V4SF_type_node, NULL_TREE);
15203   tree void_ftype_pfloat_v4sf
15204     = build_function_type_list (void_type_node,
15205 				pfloat_type_node, V4SF_type_node, NULL_TREE);
15206   tree void_ftype_pdi_di
15207     = build_function_type_list (void_type_node,
15208 				pdi_type_node, long_long_unsigned_type_node,
15209 				NULL_TREE);
15210   tree void_ftype_pv2di_v2di
15211     = build_function_type_list (void_type_node,
15212 				pv2di_type_node, V2DI_type_node, NULL_TREE);
15213   /* Normal vector unops.  */
15214   tree v4sf_ftype_v4sf
15215     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15216 
15217   /* Normal vector binops.  */
15218   tree v4sf_ftype_v4sf_v4sf
15219     = build_function_type_list (V4SF_type_node,
15220 				V4SF_type_node, V4SF_type_node, NULL_TREE);
15221   tree v8qi_ftype_v8qi_v8qi
15222     = build_function_type_list (V8QI_type_node,
15223 				V8QI_type_node, V8QI_type_node, NULL_TREE);
15224   tree v4hi_ftype_v4hi_v4hi
15225     = build_function_type_list (V4HI_type_node,
15226 				V4HI_type_node, V4HI_type_node, NULL_TREE);
15227   tree v2si_ftype_v2si_v2si
15228     = build_function_type_list (V2SI_type_node,
15229 				V2SI_type_node, V2SI_type_node, NULL_TREE);
15230   tree di_ftype_di_di
15231     = build_function_type_list (long_long_unsigned_type_node,
15232 				long_long_unsigned_type_node,
15233 				long_long_unsigned_type_node, NULL_TREE);
15234 
15235   tree v2si_ftype_v2sf
15236     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15237   tree v2sf_ftype_v2si
15238     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15239   tree v2si_ftype_v2si
15240     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15241   tree v2sf_ftype_v2sf
15242     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15243   tree v2sf_ftype_v2sf_v2sf
15244     = build_function_type_list (V2SF_type_node,
15245 				V2SF_type_node, V2SF_type_node, NULL_TREE);
15246   tree v2si_ftype_v2sf_v2sf
15247     = build_function_type_list (V2SI_type_node,
15248 				V2SF_type_node, V2SF_type_node, NULL_TREE);
15249   tree pint_type_node    = build_pointer_type (integer_type_node);
15250   tree pdouble_type_node = build_pointer_type (double_type_node);
15251   tree pcdouble_type_node = build_pointer_type (
15252 				build_type_variant (double_type_node, 1, 0));
15253   tree int_ftype_v2df_v2df
15254     = build_function_type_list (integer_type_node,
15255 				V2DF_type_node, V2DF_type_node, NULL_TREE);
15256 
15257   tree void_ftype_pcvoid
15258     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15259   tree v4sf_ftype_v4si
15260     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15261   tree v4si_ftype_v4sf
15262     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15263   tree v2df_ftype_v4si
15264     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15265   tree v4si_ftype_v2df
15266     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15267   tree v2si_ftype_v2df
15268     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15269   tree v4sf_ftype_v2df
15270     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15271   tree v2df_ftype_v2si
15272     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15273   tree v2df_ftype_v4sf
15274     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15275   tree int_ftype_v2df
15276     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15277   tree int64_ftype_v2df
15278     = build_function_type_list (long_long_integer_type_node,
15279 				V2DF_type_node, NULL_TREE);
15280   tree v2df_ftype_v2df_int
15281     = build_function_type_list (V2DF_type_node,
15282 				V2DF_type_node, integer_type_node, NULL_TREE);
15283   tree v2df_ftype_v2df_int64
15284     = build_function_type_list (V2DF_type_node,
15285 				V2DF_type_node, long_long_integer_type_node,
15286 				NULL_TREE);
15287   tree v4sf_ftype_v4sf_v2df
15288     = build_function_type_list (V4SF_type_node,
15289 				V4SF_type_node, V2DF_type_node, NULL_TREE);
15290   tree v2df_ftype_v2df_v4sf
15291     = build_function_type_list (V2DF_type_node,
15292 				V2DF_type_node, V4SF_type_node, NULL_TREE);
15293   tree v2df_ftype_v2df_v2df_int
15294     = build_function_type_list (V2DF_type_node,
15295 				V2DF_type_node, V2DF_type_node,
15296 				integer_type_node,
15297 				NULL_TREE);
15298   tree v2df_ftype_v2df_pcdouble
15299     = build_function_type_list (V2DF_type_node,
15300 				V2DF_type_node, pcdouble_type_node, NULL_TREE);
15301   tree void_ftype_pdouble_v2df
15302     = build_function_type_list (void_type_node,
15303 				pdouble_type_node, V2DF_type_node, NULL_TREE);
15304   tree void_ftype_pint_int
15305     = build_function_type_list (void_type_node,
15306 				pint_type_node, integer_type_node, NULL_TREE);
15307   tree void_ftype_v16qi_v16qi_pchar
15308     = build_function_type_list (void_type_node,
15309 				V16QI_type_node, V16QI_type_node,
15310 				pchar_type_node, NULL_TREE);
15311   tree v2df_ftype_pcdouble
15312     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15313   tree v2df_ftype_v2df_v2df
15314     = build_function_type_list (V2DF_type_node,
15315 				V2DF_type_node, V2DF_type_node, NULL_TREE);
15316   tree v16qi_ftype_v16qi_v16qi
15317     = build_function_type_list (V16QI_type_node,
15318 				V16QI_type_node, V16QI_type_node, NULL_TREE);
15319   tree v8hi_ftype_v8hi_v8hi
15320     = build_function_type_list (V8HI_type_node,
15321 				V8HI_type_node, V8HI_type_node, NULL_TREE);
15322   tree v4si_ftype_v4si_v4si
15323     = build_function_type_list (V4SI_type_node,
15324 				V4SI_type_node, V4SI_type_node, NULL_TREE);
15325   tree v2di_ftype_v2di_v2di
15326     = build_function_type_list (V2DI_type_node,
15327 				V2DI_type_node, V2DI_type_node, NULL_TREE);
15328   tree v2di_ftype_v2df_v2df
15329     = build_function_type_list (V2DI_type_node,
15330 				V2DF_type_node, V2DF_type_node, NULL_TREE);
15331   tree v2df_ftype_v2df
15332     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15333   tree v2di_ftype_v2di_int
15334     = build_function_type_list (V2DI_type_node,
15335 				V2DI_type_node, integer_type_node, NULL_TREE);
15336   tree v4si_ftype_v4si_int
15337     = build_function_type_list (V4SI_type_node,
15338 				V4SI_type_node, integer_type_node, NULL_TREE);
15339   tree v8hi_ftype_v8hi_int
15340     = build_function_type_list (V8HI_type_node,
15341 				V8HI_type_node, integer_type_node, NULL_TREE);
15342   tree v4si_ftype_v8hi_v8hi
15343     = build_function_type_list (V4SI_type_node,
15344 				V8HI_type_node, V8HI_type_node, NULL_TREE);
15345   tree di_ftype_v8qi_v8qi
15346     = build_function_type_list (long_long_unsigned_type_node,
15347 				V8QI_type_node, V8QI_type_node, NULL_TREE);
15348   tree di_ftype_v2si_v2si
15349     = build_function_type_list (long_long_unsigned_type_node,
15350 				V2SI_type_node, V2SI_type_node, NULL_TREE);
15351   tree v2di_ftype_v16qi_v16qi
15352     = build_function_type_list (V2DI_type_node,
15353 				V16QI_type_node, V16QI_type_node, NULL_TREE);
15354   tree v2di_ftype_v4si_v4si
15355     = build_function_type_list (V2DI_type_node,
15356 				V4SI_type_node, V4SI_type_node, NULL_TREE);
15357   tree int_ftype_v16qi
15358     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15359   tree v16qi_ftype_pcchar
15360     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15361   tree void_ftype_pchar_v16qi
15362     = build_function_type_list (void_type_node,
15363 			        pchar_type_node, V16QI_type_node, NULL_TREE);
15364 
15365   tree float80_type;
15366   tree float128_type;
15367   tree ftype;
15368 
15369   /* The __float80 type.  */
15370   if (TYPE_MODE (long_double_type_node) == XFmode)
15371     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15372 					       "__float80");
15373   else
15374     {
15375       /* The __float80 type.  */
15376       float80_type = make_node (REAL_TYPE);
15377       TYPE_PRECISION (float80_type) = 80;
15378       layout_type (float80_type);
15379       (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15380     }
15381 
15382   if (TARGET_64BIT)
15383     {
15384       float128_type = make_node (REAL_TYPE);
15385       TYPE_PRECISION (float128_type) = 128;
15386       layout_type (float128_type);
15387       (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15388     }
15389 
15390   /* Add all builtins that are more or less simple operations on two
15391      operands.  */
15392   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15393     {
15394       /* Use one of the operands; the target can have a different mode for
15395 	 mask-generating compares.  */
15396       enum machine_mode mode;
15397       tree type;
15398 
15399       if (d->name == 0)
15400 	continue;
15401       mode = insn_data[d->icode].operand[1].mode;
15402 
15403       switch (mode)
15404 	{
15405 	case V16QImode:
15406 	  type = v16qi_ftype_v16qi_v16qi;
15407 	  break;
15408 	case V8HImode:
15409 	  type = v8hi_ftype_v8hi_v8hi;
15410 	  break;
15411 	case V4SImode:
15412 	  type = v4si_ftype_v4si_v4si;
15413 	  break;
15414 	case V2DImode:
15415 	  type = v2di_ftype_v2di_v2di;
15416 	  break;
15417 	case V2DFmode:
15418 	  type = v2df_ftype_v2df_v2df;
15419 	  break;
15420 	case V4SFmode:
15421 	  type = v4sf_ftype_v4sf_v4sf;
15422 	  break;
15423 	case V8QImode:
15424 	  type = v8qi_ftype_v8qi_v8qi;
15425 	  break;
15426 	case V4HImode:
15427 	  type = v4hi_ftype_v4hi_v4hi;
15428 	  break;
15429 	case V2SImode:
15430 	  type = v2si_ftype_v2si_v2si;
15431 	  break;
15432 	case DImode:
15433 	  type = di_ftype_di_di;
15434 	  break;
15435 
15436 	default:
15437 	  gcc_unreachable ();
15438 	}
15439 
15440       /* Override for comparisons.  */
15441       if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15442 	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15443 	type = v4si_ftype_v4sf_v4sf;
15444 
15445       if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15446 	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15447 	type = v2di_ftype_v2df_v2df;
15448 
15449       def_builtin (d->mask, d->name, type, d->code);
15450     }
15451 
15452   /* Add the remaining MMX insns with somewhat more complicated types.  */
15453   def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15454   def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15455   def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15456   def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15457 
15458   def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15459   def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15460   def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15461 
15462   def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15463   def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15464 
15465   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15466   def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15467 
15468   /* comi/ucomi insns.  */
15469   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15470     if (d->mask == MASK_SSE2)
15471       def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15472     else
15473       def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15474 
15475   def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15476   def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15477   def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15478 
15479   def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15480   def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15481   def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15482   def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15483   def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15484   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15485   def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15486   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15487   def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15488   def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15489   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15490 
15491   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15492 
15493   def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15494   def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15495 
15496   def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15497   def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15498   def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15499   def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15500 
15501   def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15502   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15503   def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15504   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15505 
15506   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15507 
15508   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15509 
15510   def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15511   def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15512   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15513   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15514   def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15515   def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15516 
15517   def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15518 
15519   /* Original 3DNow!  */
15520   def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15521   def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15522   def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15523   def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15524   def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15525   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15526   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15527   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15528   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15529   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15530   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15531   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15532   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15533   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15534   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15535   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15536   def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15537   def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15538   def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15539   def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15540 
15541   /* 3DNow! extension as used in the Athlon CPU.  */
15542   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15543   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15544   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15545   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15546   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15547   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15548 
15549   /* SSE2 */
15550   def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15551 
15552   def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15553   def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15554 
15555   def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15556   def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15557 
15558   def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15559   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15560   def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15561   def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15562   def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15563 
15564   def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15565   def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15566   def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15567   def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15568 
15569   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15570   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15571 
15572   def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15573 
15574   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15575   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15576 
15577   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15578   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15579   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15580   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15581   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15582 
15583   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15584 
15585   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15586   def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15587   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15588   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15589 
15590   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15591   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15592   def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15593 
15594   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15595   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15596   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15597   def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15598 
15599   def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15600   def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15601   def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15602 
15603   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15604   def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15605 
15606   def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15607   def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15608 
15609   def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15610   def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15611   def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15612 
15613   def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15614   def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15615   def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15616 
15617   def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15618   def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15619 
15620   def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15621   def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15622   def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15623   def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15624 
15625   def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15626   def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15627   def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15628   def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15629 
15630   def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15631   def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15632 
15633   def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15634 
15635   /* Prescott New Instructions.  */
15636   def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15637 	       void_ftype_pcvoid_unsigned_unsigned,
15638 	       IX86_BUILTIN_MONITOR);
15639   def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15640 	       void_ftype_unsigned_unsigned,
15641 	       IX86_BUILTIN_MWAIT);
15642   def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15643 	       v4sf_ftype_v4sf,
15644 	       IX86_BUILTIN_MOVSHDUP);
15645   def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15646 	       v4sf_ftype_v4sf,
15647 	       IX86_BUILTIN_MOVSLDUP);
15648   def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15649 	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15650 
15651   /* Access to the vec_init patterns.  */
15652   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15653 				    integer_type_node, NULL_TREE);
15654   def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15655 	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15656 
15657   ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15658 				    short_integer_type_node,
15659 				    short_integer_type_node,
15660 				    short_integer_type_node, NULL_TREE);
15661   def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15662 	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15663 
15664   ftype = build_function_type_list (V8QI_type_node, char_type_node,
15665 				    char_type_node, char_type_node,
15666 				    char_type_node, char_type_node,
15667 				    char_type_node, char_type_node,
15668 				    char_type_node, NULL_TREE);
15669   def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15670 	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15671 
15672   /* Access to the vec_extract patterns.  */
15673   ftype = build_function_type_list (double_type_node, V2DF_type_node,
15674 				    integer_type_node, NULL_TREE);
15675   def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15676 	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15677 
15678   ftype = build_function_type_list (long_long_integer_type_node,
15679 				    V2DI_type_node, integer_type_node,
15680 				    NULL_TREE);
15681   def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15682 	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15683 
15684   ftype = build_function_type_list (float_type_node, V4SF_type_node,
15685 				    integer_type_node, NULL_TREE);
15686   def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15687 	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15688 
15689   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15690 				    integer_type_node, NULL_TREE);
15691   def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15692 	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15693 
15694   ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15695 				    integer_type_node, NULL_TREE);
15696   def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15697 	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15698 
15699   ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15700 				    integer_type_node, NULL_TREE);
15701   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15702 	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15703 
15704   ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15705 				    integer_type_node, NULL_TREE);
15706   def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15707 	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15708 
15709   ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15710 				    integer_type_node, NULL_TREE);
15711   def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15712 
15713   /* Access to the vec_set patterns.  */
15714   ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15715 				    intHI_type_node,
15716 				    integer_type_node, NULL_TREE);
15717   def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15718 	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
15719 
15720   ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15721 				    intHI_type_node,
15722 				    integer_type_node, NULL_TREE);
15723   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15724 	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
15725 }
15726 
15727 /* Errors in the source file can cause expand_expr to return const0_rtx
15728    where we expect a vector.  To avoid crashing, use one of the vector
15729    clear instructions.  */
15730 static rtx
15731 safe_vector_operand (rtx x, enum machine_mode mode)
15732 {
15733   if (x == const0_rtx)
15734     x = CONST0_RTX (mode);
15735   return x;
15736 }
15737 
15738 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
15739 
15740 static rtx
15741 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15742 {
15743   rtx pat, xops[3];
15744   tree arg0 = TREE_VALUE (arglist);
15745   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15746   rtx op0 = expand_normal (arg0);
15747   rtx op1 = expand_normal (arg1);
15748   enum machine_mode tmode = insn_data[icode].operand[0].mode;
15749   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15750   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15751 
15752   if (VECTOR_MODE_P (mode0))
15753     op0 = safe_vector_operand (op0, mode0);
15754   if (VECTOR_MODE_P (mode1))
15755     op1 = safe_vector_operand (op1, mode1);
15756 
15757   if (optimize || !target
15758       || GET_MODE (target) != tmode
15759       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15760     target = gen_reg_rtx (tmode);
15761 
15762   if (GET_MODE (op1) == SImode && mode1 == TImode)
15763     {
15764       rtx x = gen_reg_rtx (V4SImode);
15765       emit_insn (gen_sse2_loadd (x, op1));
15766       op1 = gen_lowpart (TImode, x);
15767     }
15768 
15769   /* The insn must want input operands in the same modes as the
15770      result.  */
15771   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15772 	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15773 
15774   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15775     op0 = copy_to_mode_reg (mode0, op0);
15776   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15777     op1 = copy_to_mode_reg (mode1, op1);
15778 
15779   /* ??? Using ix86_fixup_binary_operands is problematic when
15780      we've got mismatched modes.  Fake it.  */
15781 
15782   xops[0] = target;
15783   xops[1] = op0;
15784   xops[2] = op1;
15785 
15786   if (tmode == mode0 && tmode == mode1)
15787     {
15788       target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15789       op0 = xops[1];
15790       op1 = xops[2];
15791     }
15792   else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15793     {
15794       op0 = force_reg (mode0, op0);
15795       op1 = force_reg (mode1, op1);
15796       target = gen_reg_rtx (tmode);
15797     }
15798 
15799   pat = GEN_FCN (icode) (target, op0, op1);
15800   if (! pat)
15801     return 0;
15802   emit_insn (pat);
15803   return target;
15804 }
15805 
15806 /* Subroutine of ix86_expand_builtin to take care of stores.  */
15807 
15808 static rtx
15809 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15810 {
15811   rtx pat;
15812   tree arg0 = TREE_VALUE (arglist);
15813   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15814   rtx op0 = expand_normal (arg0);
15815   rtx op1 = expand_normal (arg1);
15816   enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15817   enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15818 
15819   if (VECTOR_MODE_P (mode1))
15820     op1 = safe_vector_operand (op1, mode1);
15821 
15822   op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15823   op1 = copy_to_mode_reg (mode1, op1);
15824 
15825   pat = GEN_FCN (icode) (op0, op1);
15826   if (pat)
15827     emit_insn (pat);
15828   return 0;
15829 }
15830 
15831 /* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15832 
15833 static rtx
15834 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15835 			  rtx target, int do_load)
15836 {
15837   rtx pat;
15838   tree arg0 = TREE_VALUE (arglist);
15839   rtx op0 = expand_normal (arg0);
15840   enum machine_mode tmode = insn_data[icode].operand[0].mode;
15841   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15842 
15843   if (optimize || !target
15844       || GET_MODE (target) != tmode
15845       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15846     target = gen_reg_rtx (tmode);
15847   if (do_load)
15848     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15849   else
15850     {
15851       if (VECTOR_MODE_P (mode0))
15852 	op0 = safe_vector_operand (op0, mode0);
15853 
15854       if ((optimize && !register_operand (op0, mode0))
15855 	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15856 	op0 = copy_to_mode_reg (mode0, op0);
15857     }
15858 
15859   pat = GEN_FCN (icode) (target, op0);
15860   if (! pat)
15861     return 0;
15862   emit_insn (pat);
15863   return target;
15864 }
15865 
15866 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15867    sqrtss, rsqrtss, rcpss.  */
15868 
15869 static rtx
15870 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15871 {
15872   rtx pat;
15873   tree arg0 = TREE_VALUE (arglist);
15874   rtx op1, op0 = expand_normal (arg0);
15875   enum machine_mode tmode = insn_data[icode].operand[0].mode;
15876   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15877 
15878   if (optimize || !target
15879       || GET_MODE (target) != tmode
15880       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15881     target = gen_reg_rtx (tmode);
15882 
15883   if (VECTOR_MODE_P (mode0))
15884     op0 = safe_vector_operand (op0, mode0);
15885 
15886   if ((optimize && !register_operand (op0, mode0))
15887       || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15888     op0 = copy_to_mode_reg (mode0, op0);
15889 
15890   op1 = op0;
15891   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15892     op1 = copy_to_mode_reg (mode0, op1);
15893 
15894   pat = GEN_FCN (icode) (target, op0, op1);
15895   if (! pat)
15896     return 0;
15897   emit_insn (pat);
15898   return target;
15899 }
15900 
15901 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
15902 
15903 static rtx
15904 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15905 			 rtx target)
15906 {
15907   rtx pat;
15908   tree arg0 = TREE_VALUE (arglist);
15909   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15910   rtx op0 = expand_normal (arg0);
15911   rtx op1 = expand_normal (arg1);
15912   rtx op2;
15913   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15914   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15915   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15916   enum rtx_code comparison = d->comparison;
15917 
15918   if (VECTOR_MODE_P (mode0))
15919     op0 = safe_vector_operand (op0, mode0);
15920   if (VECTOR_MODE_P (mode1))
15921     op1 = safe_vector_operand (op1, mode1);
15922 
15923   /* Swap operands if we have a comparison that isn't available in
15924      hardware.  */
15925   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15926     {
15927       rtx tmp = gen_reg_rtx (mode1);
15928       emit_move_insn (tmp, op1);
15929       op1 = op0;
15930       op0 = tmp;
15931     }
15932 
15933   if (optimize || !target
15934       || GET_MODE (target) != tmode
15935       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15936     target = gen_reg_rtx (tmode);
15937 
15938   if ((optimize && !register_operand (op0, mode0))
15939       || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15940     op0 = copy_to_mode_reg (mode0, op0);
15941   if ((optimize && !register_operand (op1, mode1))
15942       || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15943     op1 = copy_to_mode_reg (mode1, op1);
15944 
15945   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15946   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15947   if (! pat)
15948     return 0;
15949   emit_insn (pat);
15950   return target;
15951 }
15952 
15953 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
15954 
15955 static rtx
15956 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15957 		      rtx target)
15958 {
15959   rtx pat;
15960   tree arg0 = TREE_VALUE (arglist);
15961   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15962   rtx op0 = expand_normal (arg0);
15963   rtx op1 = expand_normal (arg1);
15964   rtx op2;
15965   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15966   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15967   enum rtx_code comparison = d->comparison;
15968 
15969   if (VECTOR_MODE_P (mode0))
15970     op0 = safe_vector_operand (op0, mode0);
15971   if (VECTOR_MODE_P (mode1))
15972     op1 = safe_vector_operand (op1, mode1);
15973 
15974   /* Swap operands if we have a comparison that isn't available in
15975      hardware.  */
15976   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15977     {
15978       rtx tmp = op1;
15979       op1 = op0;
15980       op0 = tmp;
15981     }
15982 
15983   target = gen_reg_rtx (SImode);
15984   emit_move_insn (target, const0_rtx);
15985   target = gen_rtx_SUBREG (QImode, target, 0);
15986 
15987   if ((optimize && !register_operand (op0, mode0))
15988       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15989     op0 = copy_to_mode_reg (mode0, op0);
15990   if ((optimize && !register_operand (op1, mode1))
15991       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15992     op1 = copy_to_mode_reg (mode1, op1);
15993 
15994   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15995   pat = GEN_FCN (d->icode) (op0, op1);
15996   if (! pat)
15997     return 0;
15998   emit_insn (pat);
15999   emit_insn (gen_rtx_SET (VOIDmode,
16000 			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16001 			  gen_rtx_fmt_ee (comparison, QImode,
16002 					  SET_DEST (pat),
16003 					  const0_rtx)));
16004 
16005   return SUBREG_REG (target);
16006 }
16007 
16008 /* Return the integer constant in ARG.  Constrain it to be in the range
16009    of the subparts of VEC_TYPE; issue an error if not.  */
16010 
16011 static int
16012 get_element_number (tree vec_type, tree arg)
16013 {
16014   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16015 
16016   if (!host_integerp (arg, 1)
16017       || (elt = tree_low_cst (arg, 1), elt > max))
16018     {
16019       error ("selector must be an integer constant in the range 0..%wi", max);
16020       return 0;
16021     }
16022 
16023   return elt;
16024 }
16025 
16026 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16027    ix86_expand_vector_init.  We DO have language-level syntax for this, in
16028    the form of  (type){ init-list }.  Except that since we can't place emms
16029    instructions from inside the compiler, we can't allow the use of MMX
16030    registers unless the user explicitly asks for it.  So we do *not* define
16031    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
16032    we have builtins invoked by mmintrin.h that gives us license to emit
16033    these sorts of instructions.  */
16034 
16035 static rtx
16036 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16037 {
16038   enum machine_mode tmode = TYPE_MODE (type);
16039   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16040   int i, n_elt = GET_MODE_NUNITS (tmode);
16041   rtvec v = rtvec_alloc (n_elt);
16042 
16043   gcc_assert (VECTOR_MODE_P (tmode));
16044 
16045   for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16046     {
16047       rtx x = expand_normal (TREE_VALUE (arglist));
16048       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16049     }
16050 
16051   gcc_assert (arglist == NULL);
16052 
16053   if (!target || !register_operand (target, tmode))
16054     target = gen_reg_rtx (tmode);
16055 
16056   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16057   return target;
16058 }
16059 
16060 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16061    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
16062    had a language-level syntax for referencing vector elements.  */
16063 
16064 static rtx
16065 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16066 {
16067   enum machine_mode tmode, mode0;
16068   tree arg0, arg1;
16069   int elt;
16070   rtx op0;
16071 
16072   arg0 = TREE_VALUE (arglist);
16073   arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16074 
16075   op0 = expand_normal (arg0);
16076   elt = get_element_number (TREE_TYPE (arg0), arg1);
16077 
16078   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16079   mode0 = TYPE_MODE (TREE_TYPE (arg0));
16080   gcc_assert (VECTOR_MODE_P (mode0));
16081 
16082   op0 = force_reg (mode0, op0);
16083 
16084   if (optimize || !target || !register_operand (target, tmode))
16085     target = gen_reg_rtx (tmode);
16086 
16087   ix86_expand_vector_extract (true, target, op0, elt);
16088 
16089   return target;
16090 }
16091 
16092 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16093    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
16094    a language-level syntax for referencing vector elements.  */
16095 
16096 static rtx
16097 ix86_expand_vec_set_builtin (tree arglist)
16098 {
16099   enum machine_mode tmode, mode1;
16100   tree arg0, arg1, arg2;
16101   int elt;
16102   rtx op0, op1, target;
16103 
16104   arg0 = TREE_VALUE (arglist);
16105   arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16106   arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16107 
16108   tmode = TYPE_MODE (TREE_TYPE (arg0));
16109   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16110   gcc_assert (VECTOR_MODE_P (tmode));
16111 
16112   op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16113   op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16114   elt = get_element_number (TREE_TYPE (arg0), arg2);
16115 
16116   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16117     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16118 
16119   op0 = force_reg (tmode, op0);
16120   op1 = force_reg (mode1, op1);
16121 
16122   /* OP0 is the source of these builtin functions and shouldn't be
16123      modified.  Create a copy, use it and return it as target.  */
16124   target = gen_reg_rtx (tmode);
16125   emit_move_insn (target, op0);
16126   ix86_expand_vector_set (true, target, op1, elt);
16127 
16128   return target;
16129 }
16130 
16131 /* Expand an expression EXP that calls a built-in function,
16132    with result going to TARGET if that's convenient
16133    (and in mode MODE if that's convenient).
16134    SUBTARGET may be used as the target for computing one of EXP's operands.
16135    IGNORE is nonzero if the value is to be ignored.  */
16136 
16137 static rtx
16138 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16139 		     enum machine_mode mode ATTRIBUTE_UNUSED,
16140 		     int ignore ATTRIBUTE_UNUSED)
16141 {
16142   const struct builtin_description *d;
16143   size_t i;
16144   enum insn_code icode;
16145   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16146   tree arglist = TREE_OPERAND (exp, 1);
16147   tree arg0, arg1, arg2;
16148   rtx op0, op1, op2, pat;
16149   enum machine_mode tmode, mode0, mode1, mode2;
16150   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16151 
16152   switch (fcode)
16153     {
16154     case IX86_BUILTIN_EMMS:
16155       emit_insn (gen_mmx_emms ());
16156       return 0;
16157 
16158     case IX86_BUILTIN_SFENCE:
16159       emit_insn (gen_sse_sfence ());
16160       return 0;
16161 
16162     case IX86_BUILTIN_MASKMOVQ:
16163     case IX86_BUILTIN_MASKMOVDQU:
16164       icode = (fcode == IX86_BUILTIN_MASKMOVQ
16165 	       ? CODE_FOR_mmx_maskmovq
16166 	       : CODE_FOR_sse2_maskmovdqu);
16167       /* Note the arg order is different from the operand order.  */
16168       arg1 = TREE_VALUE (arglist);
16169       arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16170       arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16171       op0 = expand_normal (arg0);
16172       op1 = expand_normal (arg1);
16173       op2 = expand_normal (arg2);
16174       mode0 = insn_data[icode].operand[0].mode;
16175       mode1 = insn_data[icode].operand[1].mode;
16176       mode2 = insn_data[icode].operand[2].mode;
16177 
16178       op0 = force_reg (Pmode, op0);
16179       op0 = gen_rtx_MEM (mode1, op0);
16180 
16181       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16182 	op0 = copy_to_mode_reg (mode0, op0);
16183       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16184 	op1 = copy_to_mode_reg (mode1, op1);
16185       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16186 	op2 = copy_to_mode_reg (mode2, op2);
16187       pat = GEN_FCN (icode) (op0, op1, op2);
16188       if (! pat)
16189 	return 0;
16190       emit_insn (pat);
16191       return 0;
16192 
16193     case IX86_BUILTIN_SQRTSS:
16194       return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16195     case IX86_BUILTIN_RSQRTSS:
16196       return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16197     case IX86_BUILTIN_RCPSS:
16198       return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16199 
16200     case IX86_BUILTIN_LOADUPS:
16201       return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16202 
16203     case IX86_BUILTIN_STOREUPS:
16204       return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16205 
16206     case IX86_BUILTIN_LOADHPS:
16207     case IX86_BUILTIN_LOADLPS:
16208     case IX86_BUILTIN_LOADHPD:
16209     case IX86_BUILTIN_LOADLPD:
16210       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16211 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16212 	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16213 	       : CODE_FOR_sse2_loadlpd);
16214       arg0 = TREE_VALUE (arglist);
16215       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16216       op0 = expand_normal (arg0);
16217       op1 = expand_normal (arg1);
16218       tmode = insn_data[icode].operand[0].mode;
16219       mode0 = insn_data[icode].operand[1].mode;
16220       mode1 = insn_data[icode].operand[2].mode;
16221 
16222       op0 = force_reg (mode0, op0);
16223       op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16224       if (optimize || target == 0
16225 	  || GET_MODE (target) != tmode
16226 	  || !register_operand (target, tmode))
16227 	target = gen_reg_rtx (tmode);
16228       pat = GEN_FCN (icode) (target, op0, op1);
16229       if (! pat)
16230 	return 0;
16231       emit_insn (pat);
16232       return target;
16233 
16234     case IX86_BUILTIN_STOREHPS:
16235     case IX86_BUILTIN_STORELPS:
16236       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16237 	       : CODE_FOR_sse_storelps);
16238       arg0 = TREE_VALUE (arglist);
16239       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16240       op0 = expand_normal (arg0);
16241       op1 = expand_normal (arg1);
16242       mode0 = insn_data[icode].operand[0].mode;
16243       mode1 = insn_data[icode].operand[1].mode;
16244 
16245       op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16246       op1 = force_reg (mode1, op1);
16247 
16248       pat = GEN_FCN (icode) (op0, op1);
16249       if (! pat)
16250 	return 0;
16251       emit_insn (pat);
16252       return const0_rtx;
16253 
16254     case IX86_BUILTIN_MOVNTPS:
16255       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16256     case IX86_BUILTIN_MOVNTQ:
16257       return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16258 
16259     case IX86_BUILTIN_LDMXCSR:
16260       op0 = expand_normal (TREE_VALUE (arglist));
16261       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16262       emit_move_insn (target, op0);
16263       emit_insn (gen_sse_ldmxcsr (target));
16264       return 0;
16265 
16266     case IX86_BUILTIN_STMXCSR:
16267       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16268       emit_insn (gen_sse_stmxcsr (target));
16269       return copy_to_mode_reg (SImode, target);
16270 
16271     case IX86_BUILTIN_SHUFPS:
16272     case IX86_BUILTIN_SHUFPD:
16273       icode = (fcode == IX86_BUILTIN_SHUFPS
16274 	       ? CODE_FOR_sse_shufps
16275 	       : CODE_FOR_sse2_shufpd);
16276       arg0 = TREE_VALUE (arglist);
16277       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16278       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16279       op0 = expand_normal (arg0);
16280       op1 = expand_normal (arg1);
16281       op2 = expand_normal (arg2);
16282       tmode = insn_data[icode].operand[0].mode;
16283       mode0 = insn_data[icode].operand[1].mode;
16284       mode1 = insn_data[icode].operand[2].mode;
16285       mode2 = insn_data[icode].operand[3].mode;
16286 
16287       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16288 	op0 = copy_to_mode_reg (mode0, op0);
16289       if ((optimize && !register_operand (op1, mode1))
16290 	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16291 	op1 = copy_to_mode_reg (mode1, op1);
16292       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16293 	{
16294 	  /* @@@ better error message */
16295 	  error ("mask must be an immediate");
16296 	  return gen_reg_rtx (tmode);
16297 	}
16298       if (optimize || target == 0
16299 	  || GET_MODE (target) != tmode
16300 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16301 	target = gen_reg_rtx (tmode);
16302       pat = GEN_FCN (icode) (target, op0, op1, op2);
16303       if (! pat)
16304 	return 0;
16305       emit_insn (pat);
16306       return target;
16307 
16308     case IX86_BUILTIN_PSHUFW:
16309     case IX86_BUILTIN_PSHUFD:
16310     case IX86_BUILTIN_PSHUFHW:
16311     case IX86_BUILTIN_PSHUFLW:
16312       icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16313 	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16314 	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16315 	       : CODE_FOR_mmx_pshufw);
16316       arg0 = TREE_VALUE (arglist);
16317       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16318       op0 = expand_normal (arg0);
16319       op1 = expand_normal (arg1);
16320       tmode = insn_data[icode].operand[0].mode;
16321       mode1 = insn_data[icode].operand[1].mode;
16322       mode2 = insn_data[icode].operand[2].mode;
16323 
16324       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16325 	op0 = copy_to_mode_reg (mode1, op0);
16326       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16327 	{
16328 	  /* @@@ better error message */
16329 	  error ("mask must be an immediate");
16330 	  return const0_rtx;
16331 	}
16332       if (target == 0
16333 	  || GET_MODE (target) != tmode
16334 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16335 	target = gen_reg_rtx (tmode);
16336       pat = GEN_FCN (icode) (target, op0, op1);
16337       if (! pat)
16338 	return 0;
16339       emit_insn (pat);
16340       return target;
16341 
16342     case IX86_BUILTIN_PSLLWI128:
16343       icode = CODE_FOR_ashlv8hi3;
16344       goto do_pshifti;
16345     case IX86_BUILTIN_PSLLDI128:
16346       icode = CODE_FOR_ashlv4si3;
16347       goto do_pshifti;
16348     case IX86_BUILTIN_PSLLQI128:
16349       icode = CODE_FOR_ashlv2di3;
16350       goto do_pshifti;
16351     case IX86_BUILTIN_PSRAWI128:
16352       icode = CODE_FOR_ashrv8hi3;
16353       goto do_pshifti;
16354     case IX86_BUILTIN_PSRADI128:
16355       icode = CODE_FOR_ashrv4si3;
16356       goto do_pshifti;
16357     case IX86_BUILTIN_PSRLWI128:
16358       icode = CODE_FOR_lshrv8hi3;
16359       goto do_pshifti;
16360     case IX86_BUILTIN_PSRLDI128:
16361       icode = CODE_FOR_lshrv4si3;
16362       goto do_pshifti;
16363     case IX86_BUILTIN_PSRLQI128:
16364       icode = CODE_FOR_lshrv2di3;
16365       goto do_pshifti;
16366     do_pshifti:
16367       arg0 = TREE_VALUE (arglist);
16368       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16369       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16370       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16371 
16372       if (GET_CODE (op1) != CONST_INT)
16373 	{
16374 	  error ("shift must be an immediate");
16375 	  return const0_rtx;
16376 	}
16377       if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16378 	op1 = GEN_INT (255);
16379 
16380       tmode = insn_data[icode].operand[0].mode;
16381       mode1 = insn_data[icode].operand[1].mode;
16382       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16383 	op0 = copy_to_reg (op0);
16384 
16385       target = gen_reg_rtx (tmode);
16386       pat = GEN_FCN (icode) (target, op0, op1);
16387       if (!pat)
16388 	return 0;
16389       emit_insn (pat);
16390       return target;
16391 
16392     case IX86_BUILTIN_PSLLW128:
16393       icode = CODE_FOR_ashlv8hi3;
16394       goto do_pshift;
16395     case IX86_BUILTIN_PSLLD128:
16396       icode = CODE_FOR_ashlv4si3;
16397       goto do_pshift;
16398     case IX86_BUILTIN_PSLLQ128:
16399       icode = CODE_FOR_ashlv2di3;
16400       goto do_pshift;
16401     case IX86_BUILTIN_PSRAW128:
16402       icode = CODE_FOR_ashrv8hi3;
16403       goto do_pshift;
16404     case IX86_BUILTIN_PSRAD128:
16405       icode = CODE_FOR_ashrv4si3;
16406       goto do_pshift;
16407     case IX86_BUILTIN_PSRLW128:
16408       icode = CODE_FOR_lshrv8hi3;
16409       goto do_pshift;
16410     case IX86_BUILTIN_PSRLD128:
16411       icode = CODE_FOR_lshrv4si3;
16412       goto do_pshift;
16413     case IX86_BUILTIN_PSRLQ128:
16414       icode = CODE_FOR_lshrv2di3;
16415       goto do_pshift;
16416     do_pshift:
16417       arg0 = TREE_VALUE (arglist);
16418       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16419       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16420       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16421 
16422       tmode = insn_data[icode].operand[0].mode;
16423       mode1 = insn_data[icode].operand[1].mode;
16424 
16425       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16426 	op0 = copy_to_reg (op0);
16427 
16428       op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16429       if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16430 	op1 = copy_to_reg (op1);
16431 
16432       target = gen_reg_rtx (tmode);
16433       pat = GEN_FCN (icode) (target, op0, op1);
16434       if (!pat)
16435 	return 0;
16436       emit_insn (pat);
16437       return target;
16438 
16439     case IX86_BUILTIN_PSLLDQI128:
16440     case IX86_BUILTIN_PSRLDQI128:
16441       icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16442 	       : CODE_FOR_sse2_lshrti3);
16443       arg0 = TREE_VALUE (arglist);
16444       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16445       op0 = expand_normal (arg0);
16446       op1 = expand_normal (arg1);
16447       tmode = insn_data[icode].operand[0].mode;
16448       mode1 = insn_data[icode].operand[1].mode;
16449       mode2 = insn_data[icode].operand[2].mode;
16450 
16451       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16452 	{
16453 	  op0 = copy_to_reg (op0);
16454 	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16455 	}
16456       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16457 	{
16458 	  error ("shift must be an immediate");
16459 	  return const0_rtx;
16460 	}
16461       target = gen_reg_rtx (V2DImode);
16462       pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16463 			     op0, op1);
16464       if (! pat)
16465 	return 0;
16466       emit_insn (pat);
16467       return target;
16468 
16469     case IX86_BUILTIN_FEMMS:
16470       emit_insn (gen_mmx_femms ());
16471       return NULL_RTX;
16472 
16473     case IX86_BUILTIN_PAVGUSB:
16474       return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16475 
16476     case IX86_BUILTIN_PF2ID:
16477       return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16478 
16479     case IX86_BUILTIN_PFACC:
16480       return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16481 
16482     case IX86_BUILTIN_PFADD:
16483      return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16484 
16485     case IX86_BUILTIN_PFCMPEQ:
16486       return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16487 
16488     case IX86_BUILTIN_PFCMPGE:
16489       return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16490 
16491     case IX86_BUILTIN_PFCMPGT:
16492       return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16493 
16494     case IX86_BUILTIN_PFMAX:
16495       return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16496 
16497     case IX86_BUILTIN_PFMIN:
16498       return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16499 
16500     case IX86_BUILTIN_PFMUL:
16501       return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16502 
16503     case IX86_BUILTIN_PFRCP:
16504       return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16505 
16506     case IX86_BUILTIN_PFRCPIT1:
16507       return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16508 
16509     case IX86_BUILTIN_PFRCPIT2:
16510       return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16511 
16512     case IX86_BUILTIN_PFRSQIT1:
16513       return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16514 
16515     case IX86_BUILTIN_PFRSQRT:
16516       return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16517 
16518     case IX86_BUILTIN_PFSUB:
16519       return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16520 
16521     case IX86_BUILTIN_PFSUBR:
16522       return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16523 
16524     case IX86_BUILTIN_PI2FD:
16525       return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16526 
16527     case IX86_BUILTIN_PMULHRW:
16528       return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16529 
16530     case IX86_BUILTIN_PF2IW:
16531       return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16532 
16533     case IX86_BUILTIN_PFNACC:
16534       return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16535 
16536     case IX86_BUILTIN_PFPNACC:
16537       return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16538 
16539     case IX86_BUILTIN_PI2FW:
16540       return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16541 
16542     case IX86_BUILTIN_PSWAPDSI:
16543       return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16544 
16545     case IX86_BUILTIN_PSWAPDSF:
16546       return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16547 
16548     case IX86_BUILTIN_SQRTSD:
16549       return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16550     case IX86_BUILTIN_LOADUPD:
16551       return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16552     case IX86_BUILTIN_STOREUPD:
16553       return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16554 
16555     case IX86_BUILTIN_MFENCE:
16556 	emit_insn (gen_sse2_mfence ());
16557 	return 0;
16558     case IX86_BUILTIN_LFENCE:
16559 	emit_insn (gen_sse2_lfence ());
16560 	return 0;
16561 
16562     case IX86_BUILTIN_CLFLUSH:
16563 	arg0 = TREE_VALUE (arglist);
16564 	op0 = expand_normal (arg0);
16565 	icode = CODE_FOR_sse2_clflush;
16566 	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16567 	    op0 = copy_to_mode_reg (Pmode, op0);
16568 
16569 	emit_insn (gen_sse2_clflush (op0));
16570 	return 0;
16571 
16572     case IX86_BUILTIN_MOVNTPD:
16573       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16574     case IX86_BUILTIN_MOVNTDQ:
16575       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16576     case IX86_BUILTIN_MOVNTI:
16577       return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16578 
16579     case IX86_BUILTIN_LOADDQU:
16580       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16581     case IX86_BUILTIN_STOREDQU:
16582       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16583 
16584     case IX86_BUILTIN_MONITOR:
16585       arg0 = TREE_VALUE (arglist);
16586       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16587       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16588       op0 = expand_normal (arg0);
16589       op1 = expand_normal (arg1);
16590       op2 = expand_normal (arg2);
16591       if (!REG_P (op0))
16592 	op0 = copy_to_mode_reg (Pmode, op0);
16593       if (!REG_P (op1))
16594 	op1 = copy_to_mode_reg (SImode, op1);
16595       if (!REG_P (op2))
16596 	op2 = copy_to_mode_reg (SImode, op2);
16597       if (!TARGET_64BIT)
16598 	emit_insn (gen_sse3_monitor (op0, op1, op2));
16599       else
16600 	emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16601       return 0;
16602 
16603     case IX86_BUILTIN_MWAIT:
16604       arg0 = TREE_VALUE (arglist);
16605       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16606       op0 = expand_normal (arg0);
16607       op1 = expand_normal (arg1);
16608       if (!REG_P (op0))
16609 	op0 = copy_to_mode_reg (SImode, op0);
16610       if (!REG_P (op1))
16611 	op1 = copy_to_mode_reg (SImode, op1);
16612       emit_insn (gen_sse3_mwait (op0, op1));
16613       return 0;
16614 
16615     case IX86_BUILTIN_LDDQU:
16616       return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16617 				       target, 1);
16618 
16619     case IX86_BUILTIN_VEC_INIT_V2SI:
16620     case IX86_BUILTIN_VEC_INIT_V4HI:
16621     case IX86_BUILTIN_VEC_INIT_V8QI:
16622       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16623 
16624     case IX86_BUILTIN_VEC_EXT_V2DF:
16625     case IX86_BUILTIN_VEC_EXT_V2DI:
16626     case IX86_BUILTIN_VEC_EXT_V4SF:
16627     case IX86_BUILTIN_VEC_EXT_V4SI:
16628     case IX86_BUILTIN_VEC_EXT_V8HI:
16629     case IX86_BUILTIN_VEC_EXT_V16QI:
16630     case IX86_BUILTIN_VEC_EXT_V2SI:
16631     case IX86_BUILTIN_VEC_EXT_V4HI:
16632       return ix86_expand_vec_ext_builtin (arglist, target);
16633 
16634     case IX86_BUILTIN_VEC_SET_V8HI:
16635     case IX86_BUILTIN_VEC_SET_V4HI:
16636       return ix86_expand_vec_set_builtin (arglist);
16637 
16638     default:
16639       break;
16640     }
16641 
16642   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16643     if (d->code == fcode)
16644       {
16645 	/* Compares are treated specially.  */
16646 	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16647 	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16648 	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
16649 	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16650 	  return ix86_expand_sse_compare (d, arglist, target);
16651 
16652 	return ix86_expand_binop_builtin (d->icode, arglist, target);
16653       }
16654 
16655   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16656     if (d->code == fcode)
16657       return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16658 
16659   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16660     if (d->code == fcode)
16661       return ix86_expand_sse_comi (d, arglist, target);
16662 
16663   gcc_unreachable ();
16664 }
16665 
16666 /* Store OPERAND to the memory after reload is completed.  This means
16667    that we can't easily use assign_stack_local.  */
16668 rtx
16669 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16670 {
16671   rtx result;
16672 
16673   gcc_assert (reload_completed);
16674   if (TARGET_RED_ZONE)
16675     {
16676       result = gen_rtx_MEM (mode,
16677 			    gen_rtx_PLUS (Pmode,
16678 					  stack_pointer_rtx,
16679 					  GEN_INT (-RED_ZONE_SIZE)));
16680       emit_move_insn (result, operand);
16681     }
16682   else if (!TARGET_RED_ZONE && TARGET_64BIT)
16683     {
16684       switch (mode)
16685 	{
16686 	case HImode:
16687 	case SImode:
16688 	  operand = gen_lowpart (DImode, operand);
16689 	  /* FALLTHRU */
16690 	case DImode:
16691 	  emit_insn (
16692 		      gen_rtx_SET (VOIDmode,
16693 				   gen_rtx_MEM (DImode,
16694 						gen_rtx_PRE_DEC (DImode,
16695 							stack_pointer_rtx)),
16696 				   operand));
16697 	  break;
16698 	default:
16699 	  gcc_unreachable ();
16700 	}
16701       result = gen_rtx_MEM (mode, stack_pointer_rtx);
16702     }
16703   else
16704     {
16705       switch (mode)
16706 	{
16707 	case DImode:
16708 	  {
16709 	    rtx operands[2];
16710 	    split_di (&operand, 1, operands, operands + 1);
16711 	    emit_insn (
16712 			gen_rtx_SET (VOIDmode,
16713 				     gen_rtx_MEM (SImode,
16714 						  gen_rtx_PRE_DEC (Pmode,
16715 							stack_pointer_rtx)),
16716 				     operands[1]));
16717 	    emit_insn (
16718 			gen_rtx_SET (VOIDmode,
16719 				     gen_rtx_MEM (SImode,
16720 						  gen_rtx_PRE_DEC (Pmode,
16721 							stack_pointer_rtx)),
16722 				     operands[0]));
16723 	  }
16724 	  break;
16725 	case HImode:
16726 	  /* Store HImodes as SImodes.  */
16727 	  operand = gen_lowpart (SImode, operand);
16728 	  /* FALLTHRU */
16729 	case SImode:
16730 	  emit_insn (
16731 		      gen_rtx_SET (VOIDmode,
16732 				   gen_rtx_MEM (GET_MODE (operand),
16733 						gen_rtx_PRE_DEC (SImode,
16734 							stack_pointer_rtx)),
16735 				   operand));
16736 	  break;
16737 	default:
16738 	  gcc_unreachable ();
16739 	}
16740       result = gen_rtx_MEM (mode, stack_pointer_rtx);
16741     }
16742   return result;
16743 }
16744 
16745 /* Free operand from the memory.  */
16746 void
16747 ix86_free_from_memory (enum machine_mode mode)
16748 {
16749   if (!TARGET_RED_ZONE)
16750     {
16751       int size;
16752 
16753       if (mode == DImode || TARGET_64BIT)
16754 	size = 8;
16755       else
16756 	size = 4;
16757       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
16758          to pop or add instruction if registers are available.  */
16759       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16760 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16761 					    GEN_INT (size))));
16762     }
16763 }
16764 
16765 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16766    QImode must go into class Q_REGS.
16767    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
16768    movdf to do mem-to-mem moves through integer regs.  */
16769 enum reg_class
16770 ix86_preferred_reload_class (rtx x, enum reg_class class)
16771 {
16772   enum machine_mode mode = GET_MODE (x);
16773 
16774   /* We're only allowed to return a subclass of CLASS.  Many of the
16775      following checks fail for NO_REGS, so eliminate that early.  */
16776   if (class == NO_REGS)
16777     return NO_REGS;
16778 
16779   /* All classes can load zeros.  */
16780   if (x == CONST0_RTX (mode))
16781     return class;
16782 
16783   /* Force constants into memory if we are loading a (nonzero) constant into
16784      an MMX or SSE register.  This is because there are no MMX/SSE instructions
16785      to load from a constant.  */
16786   if (CONSTANT_P (x)
16787       && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16788     return NO_REGS;
16789 
16790   /* Prefer SSE regs only, if we can use them for math.  */
16791   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16792     return SSE_CLASS_P (class) ? class : NO_REGS;
16793 
16794   /* Floating-point constants need more complex checks.  */
16795   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16796     {
16797       /* General regs can load everything.  */
16798       if (reg_class_subset_p (class, GENERAL_REGS))
16799         return class;
16800 
16801       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
16802 	 zero above.  We only want to wind up preferring 80387 registers if
16803 	 we plan on doing computation with them.  */
16804       if (TARGET_80387
16805 	  && standard_80387_constant_p (x))
16806 	{
16807 	  /* Limit class to non-sse.  */
16808 	  if (class == FLOAT_SSE_REGS)
16809 	    return FLOAT_REGS;
16810 	  if (class == FP_TOP_SSE_REGS)
16811 	    return FP_TOP_REG;
16812 	  if (class == FP_SECOND_SSE_REGS)
16813 	    return FP_SECOND_REG;
16814 	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16815 	    return class;
16816 	}
16817 
16818       return NO_REGS;
16819     }
16820 
16821   /* Generally when we see PLUS here, it's the function invariant
16822      (plus soft-fp const_int).  Which can only be computed into general
16823      regs.  */
16824   if (GET_CODE (x) == PLUS)
16825     return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16826 
16827   /* QImode constants are easy to load, but non-constant QImode data
16828      must go into Q_REGS.  */
16829   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16830     {
16831       if (reg_class_subset_p (class, Q_REGS))
16832 	return class;
16833       if (reg_class_subset_p (Q_REGS, class))
16834 	return Q_REGS;
16835       return NO_REGS;
16836     }
16837 
16838   return class;
16839 }
16840 
16841 /* Discourage putting floating-point values in SSE registers unless
16842    SSE math is being used, and likewise for the 387 registers.  */
16843 enum reg_class
16844 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16845 {
16846   enum machine_mode mode = GET_MODE (x);
16847 
16848   /* Restrict the output reload class to the register bank that we are doing
16849      math on.  If we would like not to return a subset of CLASS, reject this
16850      alternative: if reload cannot do this, it will still use its choice.  */
16851   mode = GET_MODE (x);
16852   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16853     return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16854 
16855   if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16856     {
16857       if (class == FP_TOP_SSE_REGS)
16858 	return FP_TOP_REG;
16859       else if (class == FP_SECOND_SSE_REGS)
16860 	return FP_SECOND_REG;
16861       else
16862 	return FLOAT_CLASS_P (class) ? class : NO_REGS;
16863     }
16864 
16865   return class;
16866 }
16867 
16868 /* If we are copying between general and FP registers, we need a memory
16869    location. The same is true for SSE and MMX registers.
16870 
16871    The macro can't work reliably when one of the CLASSES is class containing
16872    registers from multiple units (SSE, MMX, integer).  We avoid this by never
16873    combining those units in single alternative in the machine description.
16874    Ensure that this constraint holds to avoid unexpected surprises.
16875 
16876    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16877    enforce these sanity checks.  */
16878 
16879 int
16880 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16881 			      enum machine_mode mode, int strict)
16882 {
16883   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16884       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16885       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16886       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16887       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16888       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16889     {
16890       gcc_assert (!strict);
16891       return true;
16892     }
16893 
16894   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16895     return true;
16896 
16897   /* ??? This is a lie.  We do have moves between mmx/general, and for
16898      mmx/sse2.  But by saying we need secondary memory we discourage the
16899      register allocator from using the mmx registers unless needed.  */
16900   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16901     return true;
16902 
16903   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16904     {
16905       /* SSE1 doesn't have any direct moves from other classes.  */
16906       if (!TARGET_SSE2)
16907 	return true;
16908 
16909       /* If the target says that inter-unit moves are more expensive
16910 	 than moving through memory, then don't generate them.  */
16911       if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16912 	return true;
16913 
16914       /* Between SSE and general, we have moves no larger than word size.  */
16915       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16916 	return true;
16917 
16918       /* ??? For the cost of one register reformat penalty, we could use
16919 	 the same instructions to move SFmode and DFmode data, but the
16920 	 relevant move patterns don't support those alternatives.  */
16921       if (mode == SFmode || mode == DFmode)
16922 	return true;
16923     }
16924 
16925   return false;
16926 }
16927 
16928 /* Return true if the registers in CLASS cannot represent the change from
16929    modes FROM to TO.  */
16930 
16931 bool
16932 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16933 			       enum reg_class class)
16934 {
16935   if (from == to)
16936     return false;
16937 
16938   /* x87 registers can't do subreg at all, as all values are reformatted
16939      to extended precision.  */
16940   if (MAYBE_FLOAT_CLASS_P (class))
16941     return true;
16942 
16943   if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16944     {
16945       /* Vector registers do not support QI or HImode loads.  If we don't
16946 	 disallow a change to these modes, reload will assume it's ok to
16947 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
16948 	 the vec_dupv4hi pattern.  */
16949       if (GET_MODE_SIZE (from) < 4)
16950 	return true;
16951 
16952       /* Vector registers do not support subreg with nonzero offsets, which
16953 	 are otherwise valid for integer registers.  Since we can't see
16954 	 whether we have a nonzero offset from here, prohibit all
16955          nonparadoxical subregs changing size.  */
16956       if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16957 	return true;
16958     }
16959 
16960   return false;
16961 }
16962 
16963 /* Return the cost of moving data from a register in class CLASS1 to
16964    one in class CLASS2.
16965 
16966    It is not required that the cost always equal 2 when FROM is the same as TO;
16967    on some machines it is expensive to move between registers if they are not
16968    general registers.  */
16969 
16970 int
16971 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16972 			 enum reg_class class2)
16973 {
16974   /* In case we require secondary memory, compute cost of the store followed
16975      by load.  In order to avoid bad register allocation choices, we need
16976      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
16977 
16978   if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16979     {
16980       int cost = 1;
16981 
16982       cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16983 		   MEMORY_MOVE_COST (mode, class1, 1));
16984       cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16985 		   MEMORY_MOVE_COST (mode, class2, 1));
16986 
16987       /* In case of copying from general_purpose_register we may emit multiple
16988          stores followed by single load causing memory size mismatch stall.
16989          Count this as arbitrarily high cost of 20.  */
16990       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16991 	cost += 20;
16992 
16993       /* In the case of FP/MMX moves, the registers actually overlap, and we
16994 	 have to switch modes in order to treat them differently.  */
16995       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16996           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16997 	cost += 20;
16998 
16999       return cost;
17000     }
17001 
17002   /* Moves between SSE/MMX and integer unit are expensive.  */
17003   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17004       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17005     return ix86_cost->mmxsse_to_integer;
17006   if (MAYBE_FLOAT_CLASS_P (class1))
17007     return ix86_cost->fp_move;
17008   if (MAYBE_SSE_CLASS_P (class1))
17009     return ix86_cost->sse_move;
17010   if (MAYBE_MMX_CLASS_P (class1))
17011     return ix86_cost->mmx_move;
17012   return 2;
17013 }
17014 
17015 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
17016 
17017 bool
17018 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17019 {
17020   /* Flags and only flags can only hold CCmode values.  */
17021   if (CC_REGNO_P (regno))
17022     return GET_MODE_CLASS (mode) == MODE_CC;
17023   if (GET_MODE_CLASS (mode) == MODE_CC
17024       || GET_MODE_CLASS (mode) == MODE_RANDOM
17025       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17026     return 0;
17027   if (FP_REGNO_P (regno))
17028     return VALID_FP_MODE_P (mode);
17029   if (SSE_REGNO_P (regno))
17030     {
17031       /* We implement the move patterns for all vector modes into and
17032 	 out of SSE registers, even when no operation instructions
17033 	 are available.  */
17034       return (VALID_SSE_REG_MODE (mode)
17035 	      || VALID_SSE2_REG_MODE (mode)
17036 	      || VALID_MMX_REG_MODE (mode)
17037 	      || VALID_MMX_REG_MODE_3DNOW (mode));
17038     }
17039   if (MMX_REGNO_P (regno))
17040     {
17041       /* We implement the move patterns for 3DNOW modes even in MMX mode,
17042 	 so if the register is available at all, then we can move data of
17043 	 the given mode into or out of it.  */
17044       return (VALID_MMX_REG_MODE (mode)
17045 	      || VALID_MMX_REG_MODE_3DNOW (mode));
17046     }
17047 
17048   if (mode == QImode)
17049     {
17050       /* Take care for QImode values - they can be in non-QI regs,
17051 	 but then they do cause partial register stalls.  */
17052       if (regno < 4 || TARGET_64BIT)
17053 	return 1;
17054       if (!TARGET_PARTIAL_REG_STALL)
17055 	return 1;
17056       return reload_in_progress || reload_completed;
17057     }
17058   /* We handle both integer and floats in the general purpose registers.  */
17059   else if (VALID_INT_MODE_P (mode))
17060     return 1;
17061   else if (VALID_FP_MODE_P (mode))
17062     return 1;
17063   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
17064      on to use that value in smaller contexts, this can easily force a
17065      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
17066      supporting DImode, allow it.  */
17067   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17068     return 1;
17069 
17070   return 0;
17071 }
17072 
17073 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
17074    tieable integer mode.  */
17075 
17076 static bool
17077 ix86_tieable_integer_mode_p (enum machine_mode mode)
17078 {
17079   switch (mode)
17080     {
17081     case HImode:
17082     case SImode:
17083       return true;
17084 
17085     case QImode:
17086       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17087 
17088     case DImode:
17089       return TARGET_64BIT;
17090 
17091     default:
17092       return false;
17093     }
17094 }
17095 
17096 /* Return true if MODE1 is accessible in a register that can hold MODE2
17097    without copying.  That is, all register classes that can hold MODE2
17098    can also hold MODE1.  */
17099 
17100 bool
17101 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17102 {
17103   if (mode1 == mode2)
17104     return true;
17105 
17106   if (ix86_tieable_integer_mode_p (mode1)
17107       && ix86_tieable_integer_mode_p (mode2))
17108     return true;
17109 
17110   /* MODE2 being XFmode implies fp stack or general regs, which means we
17111      can tie any smaller floating point modes to it.  Note that we do not
17112      tie this with TFmode.  */
17113   if (mode2 == XFmode)
17114     return mode1 == SFmode || mode1 == DFmode;
17115 
17116   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17117      that we can tie it with SFmode.  */
17118   if (mode2 == DFmode)
17119     return mode1 == SFmode;
17120 
17121   /* If MODE2 is only appropriate for an SSE register, then tie with
17122      any other mode acceptable to SSE registers.  */
17123   if (GET_MODE_SIZE (mode2) >= 8
17124       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17125     return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17126 
17127   /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17128      with any other mode acceptable to MMX registers.  */
17129   if (GET_MODE_SIZE (mode2) == 8
17130       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17131     return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17132 
17133   return false;
17134 }
17135 
17136 /* Return the cost of moving data of mode M between a
17137    register and memory.  A value of 2 is the default; this cost is
17138    relative to those in `REGISTER_MOVE_COST'.
17139 
17140    If moving between registers and memory is more expensive than
17141    between two registers, you should define this macro to express the
17142    relative cost.
17143 
17144    Model also increased moving costs of QImode registers in non
17145    Q_REGS classes.
17146  */
17147 int
17148 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17149 {
17150   if (FLOAT_CLASS_P (class))
17151     {
17152       int index;
17153       switch (mode)
17154 	{
17155 	  case SFmode:
17156 	    index = 0;
17157 	    break;
17158 	  case DFmode:
17159 	    index = 1;
17160 	    break;
17161 	  case XFmode:
17162 	    index = 2;
17163 	    break;
17164 	  default:
17165 	    return 100;
17166 	}
17167       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17168     }
17169   if (SSE_CLASS_P (class))
17170     {
17171       int index;
17172       switch (GET_MODE_SIZE (mode))
17173 	{
17174 	  case 4:
17175 	    index = 0;
17176 	    break;
17177 	  case 8:
17178 	    index = 1;
17179 	    break;
17180 	  case 16:
17181 	    index = 2;
17182 	    break;
17183 	  default:
17184 	    return 100;
17185 	}
17186       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17187     }
17188   if (MMX_CLASS_P (class))
17189     {
17190       int index;
17191       switch (GET_MODE_SIZE (mode))
17192 	{
17193 	  case 4:
17194 	    index = 0;
17195 	    break;
17196 	  case 8:
17197 	    index = 1;
17198 	    break;
17199 	  default:
17200 	    return 100;
17201 	}
17202       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17203     }
17204   switch (GET_MODE_SIZE (mode))
17205     {
17206       case 1:
17207 	if (in)
17208 	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17209 		  : ix86_cost->movzbl_load);
17210 	else
17211 	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17212 		  : ix86_cost->int_store[0] + 4);
17213 	break;
17214       case 2:
17215 	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17216       default:
17217 	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17218 	if (mode == TFmode)
17219 	  mode = XFmode;
17220 	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17221 		* (((int) GET_MODE_SIZE (mode)
17222 		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17223     }
17224 }
17225 
17226 /* Compute a (partial) cost for rtx X.  Return true if the complete
17227    cost has been computed, and false if subexpressions should be
17228    scanned.  In either case, *TOTAL contains the cost result.  */
17229 
17230 static bool
17231 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17232 {
17233   enum machine_mode mode = GET_MODE (x);
17234 
17235   switch (code)
17236     {
17237     case CONST_INT:
17238     case CONST:
17239     case LABEL_REF:
17240     case SYMBOL_REF:
17241       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17242 	*total = 3;
17243       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17244 	*total = 2;
17245       else if (flag_pic && SYMBOLIC_CONST (x)
17246 	       && (!TARGET_64BIT
17247 		   || (!GET_CODE (x) != LABEL_REF
17248 		       && (GET_CODE (x) != SYMBOL_REF
17249 		           || !SYMBOL_REF_LOCAL_P (x)))))
17250 	*total = 1;
17251       else
17252 	*total = 0;
17253       return true;
17254 
17255     case CONST_DOUBLE:
17256       if (mode == VOIDmode)
17257 	*total = 0;
17258       else
17259 	switch (standard_80387_constant_p (x))
17260 	  {
17261 	  case 1: /* 0.0 */
17262 	    *total = 1;
17263 	    break;
17264 	  default: /* Other constants */
17265 	    *total = 2;
17266 	    break;
17267 	  case 0:
17268 	  case -1:
17269 	    /* Start with (MEM (SYMBOL_REF)), since that's where
17270 	       it'll probably end up.  Add a penalty for size.  */
17271 	    *total = (COSTS_N_INSNS (1)
17272 		      + (flag_pic != 0 && !TARGET_64BIT)
17273 		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17274 	    break;
17275 	  }
17276       return true;
17277 
17278     case ZERO_EXTEND:
17279       /* The zero extensions is often completely free on x86_64, so make
17280 	 it as cheap as possible.  */
17281       if (TARGET_64BIT && mode == DImode
17282 	  && GET_MODE (XEXP (x, 0)) == SImode)
17283 	*total = 1;
17284       else if (TARGET_ZERO_EXTEND_WITH_AND)
17285 	*total = ix86_cost->add;
17286       else
17287 	*total = ix86_cost->movzx;
17288       return false;
17289 
17290     case SIGN_EXTEND:
17291       *total = ix86_cost->movsx;
17292       return false;
17293 
17294     case ASHIFT:
17295       if (GET_CODE (XEXP (x, 1)) == CONST_INT
17296 	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17297 	{
17298 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17299 	  if (value == 1)
17300 	    {
17301 	      *total = ix86_cost->add;
17302 	      return false;
17303 	    }
17304 	  if ((value == 2 || value == 3)
17305 	      && ix86_cost->lea <= ix86_cost->shift_const)
17306 	    {
17307 	      *total = ix86_cost->lea;
17308 	      return false;
17309 	    }
17310 	}
17311       /* FALLTHRU */
17312 
17313     case ROTATE:
17314     case ASHIFTRT:
17315     case LSHIFTRT:
17316     case ROTATERT:
17317       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17318 	{
17319 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17320 	    {
17321 	      if (INTVAL (XEXP (x, 1)) > 32)
17322 		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17323 	      else
17324 		*total = ix86_cost->shift_const * 2;
17325 	    }
17326 	  else
17327 	    {
17328 	      if (GET_CODE (XEXP (x, 1)) == AND)
17329 		*total = ix86_cost->shift_var * 2;
17330 	      else
17331 		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17332 	    }
17333 	}
17334       else
17335 	{
17336 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17337 	    *total = ix86_cost->shift_const;
17338 	  else
17339 	    *total = ix86_cost->shift_var;
17340 	}
17341       return false;
17342 
17343     case MULT:
17344       if (FLOAT_MODE_P (mode))
17345 	{
17346 	  *total = ix86_cost->fmul;
17347 	  return false;
17348 	}
17349       else
17350 	{
17351 	  rtx op0 = XEXP (x, 0);
17352 	  rtx op1 = XEXP (x, 1);
17353 	  int nbits;
17354 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17355 	    {
17356 	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17357 	      for (nbits = 0; value != 0; value &= value - 1)
17358 	        nbits++;
17359 	    }
17360 	  else
17361 	    /* This is arbitrary.  */
17362 	    nbits = 7;
17363 
17364 	  /* Compute costs correctly for widening multiplication.  */
17365 	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17366 	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17367 	         == GET_MODE_SIZE (mode))
17368 	    {
17369 	      int is_mulwiden = 0;
17370 	      enum machine_mode inner_mode = GET_MODE (op0);
17371 
17372 	      if (GET_CODE (op0) == GET_CODE (op1))
17373 		is_mulwiden = 1, op1 = XEXP (op1, 0);
17374 	      else if (GET_CODE (op1) == CONST_INT)
17375 		{
17376 		  if (GET_CODE (op0) == SIGN_EXTEND)
17377 		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17378 			          == INTVAL (op1);
17379 		  else
17380 		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17381 	        }
17382 
17383 	      if (is_mulwiden)
17384 	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17385 	    }
17386 
17387   	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17388 		    + nbits * ix86_cost->mult_bit
17389 	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17390 
17391           return true;
17392 	}
17393 
17394     case DIV:
17395     case UDIV:
17396     case MOD:
17397     case UMOD:
17398       if (FLOAT_MODE_P (mode))
17399 	*total = ix86_cost->fdiv;
17400       else
17401 	*total = ix86_cost->divide[MODE_INDEX (mode)];
17402       return false;
17403 
17404     case PLUS:
17405       if (FLOAT_MODE_P (mode))
17406 	*total = ix86_cost->fadd;
17407       else if (GET_MODE_CLASS (mode) == MODE_INT
17408 	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17409 	{
17410 	  if (GET_CODE (XEXP (x, 0)) == PLUS
17411 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17412 	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17413 	      && CONSTANT_P (XEXP (x, 1)))
17414 	    {
17415 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17416 	      if (val == 2 || val == 4 || val == 8)
17417 		{
17418 		  *total = ix86_cost->lea;
17419 		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17420 		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17421 				      outer_code);
17422 		  *total += rtx_cost (XEXP (x, 1), outer_code);
17423 		  return true;
17424 		}
17425 	    }
17426 	  else if (GET_CODE (XEXP (x, 0)) == MULT
17427 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17428 	    {
17429 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17430 	      if (val == 2 || val == 4 || val == 8)
17431 		{
17432 		  *total = ix86_cost->lea;
17433 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17434 		  *total += rtx_cost (XEXP (x, 1), outer_code);
17435 		  return true;
17436 		}
17437 	    }
17438 	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
17439 	    {
17440 	      *total = ix86_cost->lea;
17441 	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17442 	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17443 	      *total += rtx_cost (XEXP (x, 1), outer_code);
17444 	      return true;
17445 	    }
17446 	}
17447       /* FALLTHRU */
17448 
17449     case MINUS:
17450       if (FLOAT_MODE_P (mode))
17451 	{
17452 	  *total = ix86_cost->fadd;
17453 	  return false;
17454 	}
17455       /* FALLTHRU */
17456 
17457     case AND:
17458     case IOR:
17459     case XOR:
17460       if (!TARGET_64BIT && mode == DImode)
17461 	{
17462 	  *total = (ix86_cost->add * 2
17463 		    + (rtx_cost (XEXP (x, 0), outer_code)
17464 		       << (GET_MODE (XEXP (x, 0)) != DImode))
17465 		    + (rtx_cost (XEXP (x, 1), outer_code)
17466 	               << (GET_MODE (XEXP (x, 1)) != DImode)));
17467 	  return true;
17468 	}
17469       /* FALLTHRU */
17470 
17471     case NEG:
17472       if (FLOAT_MODE_P (mode))
17473 	{
17474 	  *total = ix86_cost->fchs;
17475 	  return false;
17476 	}
17477       /* FALLTHRU */
17478 
17479     case NOT:
17480       if (!TARGET_64BIT && mode == DImode)
17481 	*total = ix86_cost->add * 2;
17482       else
17483 	*total = ix86_cost->add;
17484       return false;
17485 
17486     case COMPARE:
17487       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17488 	  && XEXP (XEXP (x, 0), 1) == const1_rtx
17489 	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17490 	  && XEXP (x, 1) == const0_rtx)
17491 	{
17492 	  /* This kind of construct is implemented using test[bwl].
17493 	     Treat it as if we had an AND.  */
17494 	  *total = (ix86_cost->add
17495 		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17496 		    + rtx_cost (const1_rtx, outer_code));
17497 	  return true;
17498 	}
17499       return false;
17500 
17501     case FLOAT_EXTEND:
17502       if (!TARGET_SSE_MATH
17503 	  || mode == XFmode
17504 	  || (mode == DFmode && !TARGET_SSE2))
17505 	/* For standard 80387 constants, raise the cost to prevent
17506 	   compress_float_constant() to generate load from memory.  */
17507 	switch (standard_80387_constant_p (XEXP (x, 0)))
17508 	  {
17509 	  case -1:
17510 	  case 0:
17511 	    *total = 0;
17512 	    break;
17513 	  case 1: /* 0.0 */
17514 	    *total = 1;
17515 	    break;
17516 	  default:
17517 	    *total = (x86_ext_80387_constants & TUNEMASK
17518 		      || optimize_size
17519 		      ? 1 : 0);
17520 	  }
17521       return false;
17522 
17523     case ABS:
17524       if (FLOAT_MODE_P (mode))
17525 	*total = ix86_cost->fabs;
17526       return false;
17527 
17528     case SQRT:
17529       if (FLOAT_MODE_P (mode))
17530 	*total = ix86_cost->fsqrt;
17531       return false;
17532 
17533     case UNSPEC:
17534       if (XINT (x, 1) == UNSPEC_TP)
17535 	*total = 0;
17536       return false;
17537 
17538     default:
17539       return false;
17540     }
17541 }
17542 
17543 #if TARGET_MACHO
17544 
17545 static int current_machopic_label_num;
17546 
17547 /* Given a symbol name and its associated stub, write out the
17548    definition of the stub.  */
17549 
17550 void
17551 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17552 {
17553   unsigned int length;
17554   char *binder_name, *symbol_name, lazy_ptr_name[32];
17555   int label = ++current_machopic_label_num;
17556 
17557   /* For 64-bit we shouldn't get here.  */
17558   gcc_assert (!TARGET_64BIT);
17559 
17560   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
17561   symb = (*targetm.strip_name_encoding) (symb);
17562 
17563   length = strlen (stub);
17564   binder_name = alloca (length + 32);
17565   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17566 
17567   length = strlen (symb);
17568   symbol_name = alloca (length + 32);
17569   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17570 
17571   sprintf (lazy_ptr_name, "L%d$lz", label);
17572 
17573   if (MACHOPIC_PURE)
17574     switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17575   else
17576     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17577 
17578   fprintf (file, "%s:\n", stub);
17579   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17580 
17581   if (MACHOPIC_PURE)
17582     {
17583       fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17584       fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17585       fprintf (file, "\tjmp\t*%%edx\n");
17586     }
17587   else
17588     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17589 
17590   fprintf (file, "%s:\n", binder_name);
17591 
17592   if (MACHOPIC_PURE)
17593     {
17594       fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17595       fprintf (file, "\tpushl\t%%eax\n");
17596     }
17597   else
17598     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17599 
17600   fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17601 
17602   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17603   fprintf (file, "%s:\n", lazy_ptr_name);
17604   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17605   fprintf (file, "\t.long %s\n", binder_name);
17606 }
17607 
17608 void
17609 darwin_x86_file_end (void)
17610 {
17611   darwin_file_end ();
17612   ix86_file_end ();
17613 }
17614 #endif /* TARGET_MACHO */
17615 
17616 /* Order the registers for register allocator.  */
17617 
17618 void
17619 x86_order_regs_for_local_alloc (void)
17620 {
17621    int pos = 0;
17622    int i;
17623 
17624    /* First allocate the local general purpose registers.  */
17625    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17626      if (GENERAL_REGNO_P (i) && call_used_regs[i])
17627 	reg_alloc_order [pos++] = i;
17628 
17629    /* Global general purpose registers.  */
17630    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17631      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17632 	reg_alloc_order [pos++] = i;
17633 
17634    /* x87 registers come first in case we are doing FP math
17635       using them.  */
17636    if (!TARGET_SSE_MATH)
17637      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17638        reg_alloc_order [pos++] = i;
17639 
17640    /* SSE registers.  */
17641    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17642      reg_alloc_order [pos++] = i;
17643    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17644      reg_alloc_order [pos++] = i;
17645 
17646    /* x87 registers.  */
17647    if (TARGET_SSE_MATH)
17648      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17649        reg_alloc_order [pos++] = i;
17650 
17651    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17652      reg_alloc_order [pos++] = i;
17653 
17654    /* Initialize the rest of array as we do not allocate some registers
17655       at all.  */
17656    while (pos < FIRST_PSEUDO_REGISTER)
17657      reg_alloc_order [pos++] = 0;
17658 }
17659 
17660 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17661    struct attribute_spec.handler.  */
17662 static tree
17663 ix86_handle_struct_attribute (tree *node, tree name,
17664 			      tree args ATTRIBUTE_UNUSED,
17665 			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17666 {
17667   tree *type = NULL;
17668   if (DECL_P (*node))
17669     {
17670       if (TREE_CODE (*node) == TYPE_DECL)
17671 	type = &TREE_TYPE (*node);
17672     }
17673   else
17674     type = node;
17675 
17676   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17677 		 || TREE_CODE (*type) == UNION_TYPE)))
17678     {
17679       warning (OPT_Wattributes, "%qs attribute ignored",
17680 	       IDENTIFIER_POINTER (name));
17681       *no_add_attrs = true;
17682     }
17683 
17684   else if ((is_attribute_p ("ms_struct", name)
17685 	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17686 	   || ((is_attribute_p ("gcc_struct", name)
17687 		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17688     {
17689       warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17690                IDENTIFIER_POINTER (name));
17691       *no_add_attrs = true;
17692     }
17693 
17694   return NULL_TREE;
17695 }
17696 
17697 static bool
17698 ix86_ms_bitfield_layout_p (tree record_type)
17699 {
17700   return (TARGET_MS_BITFIELD_LAYOUT &&
17701 	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17702     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17703 }
17704 
17705 /* Returns an expression indicating where the this parameter is
17706    located on entry to the FUNCTION.  */
17707 
17708 static rtx
17709 x86_this_parameter (tree function)
17710 {
17711   tree type = TREE_TYPE (function);
17712 
17713   if (TARGET_64BIT)
17714     {
17715       int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17716       return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17717     }
17718 
17719   if (ix86_function_regparm (type, function) > 0)
17720     {
17721       tree parm;
17722 
17723       parm = TYPE_ARG_TYPES (type);
17724       /* Figure out whether or not the function has a variable number of
17725 	 arguments.  */
17726       for (; parm; parm = TREE_CHAIN (parm))
17727 	if (TREE_VALUE (parm) == void_type_node)
17728 	  break;
17729       /* If not, the this parameter is in the first argument.  */
17730       if (parm)
17731 	{
17732 	  int regno = 0;
17733 	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17734 	    regno = 2;
17735 	  return gen_rtx_REG (SImode, regno);
17736 	}
17737     }
17738 
17739   if (aggregate_value_p (TREE_TYPE (type), type))
17740     return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17741   else
17742     return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17743 }
17744 
17745 /* Determine whether x86_output_mi_thunk can succeed.  */
17746 
17747 static bool
17748 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17749 			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17750 			 HOST_WIDE_INT vcall_offset, tree function)
17751 {
17752   /* 64-bit can handle anything.  */
17753   if (TARGET_64BIT)
17754     return true;
17755 
17756   /* For 32-bit, everything's fine if we have one free register.  */
17757   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17758     return true;
17759 
17760   /* Need a free register for vcall_offset.  */
17761   if (vcall_offset)
17762     return false;
17763 
17764   /* Need a free register for GOT references.  */
17765   if (flag_pic && !(*targetm.binds_local_p) (function))
17766     return false;
17767 
17768   /* Otherwise ok.  */
17769   return true;
17770 }
17771 
17772 /* Output the assembler code for a thunk function.  THUNK_DECL is the
17773    declaration for the thunk function itself, FUNCTION is the decl for
17774    the target function.  DELTA is an immediate constant offset to be
17775    added to THIS.  If VCALL_OFFSET is nonzero, the word at
17776    *(*this + vcall_offset) should be added to THIS.  */
17777 
17778 static void
17779 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17780 		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17781 		     HOST_WIDE_INT vcall_offset, tree function)
17782 {
17783   rtx xops[3];
17784   rtx this = x86_this_parameter (function);
17785   rtx this_reg, tmp;
17786 
17787   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
17788      pull it in now and let DELTA benefit.  */
17789   if (REG_P (this))
17790     this_reg = this;
17791   else if (vcall_offset)
17792     {
17793       /* Put the this parameter into %eax.  */
17794       xops[0] = this;
17795       xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17796       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17797     }
17798   else
17799     this_reg = NULL_RTX;
17800 
17801   /* Adjust the this parameter by a fixed constant.  */
17802   if (delta)
17803     {
17804       xops[0] = GEN_INT (delta);
17805       xops[1] = this_reg ? this_reg : this;
17806       if (TARGET_64BIT)
17807 	{
17808 	  if (!x86_64_general_operand (xops[0], DImode))
17809 	    {
17810 	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17811 	      xops[1] = tmp;
17812 	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17813 	      xops[0] = tmp;
17814 	      xops[1] = this;
17815 	    }
17816 	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17817 	}
17818       else
17819 	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17820     }
17821 
17822   /* Adjust the this parameter by a value stored in the vtable.  */
17823   if (vcall_offset)
17824     {
17825       if (TARGET_64BIT)
17826 	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17827       else
17828 	{
17829 	  int tmp_regno = 2 /* ECX */;
17830 	  if (lookup_attribute ("fastcall",
17831 	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
17832 	    tmp_regno = 0 /* EAX */;
17833 	  tmp = gen_rtx_REG (SImode, tmp_regno);
17834 	}
17835 
17836       xops[0] = gen_rtx_MEM (Pmode, this_reg);
17837       xops[1] = tmp;
17838       if (TARGET_64BIT)
17839 	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17840       else
17841 	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17842 
17843       /* Adjust the this parameter.  */
17844       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17845       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17846 	{
17847 	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17848 	  xops[0] = GEN_INT (vcall_offset);
17849 	  xops[1] = tmp2;
17850 	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17851 	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17852 	}
17853       xops[1] = this_reg;
17854       if (TARGET_64BIT)
17855 	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17856       else
17857 	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17858     }
17859 
17860   /* If necessary, drop THIS back to its stack slot.  */
17861   if (this_reg && this_reg != this)
17862     {
17863       xops[0] = this_reg;
17864       xops[1] = this;
17865       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17866     }
17867 
17868   xops[0] = XEXP (DECL_RTL (function), 0);
17869   if (TARGET_64BIT)
17870     {
17871       if (!flag_pic || (*targetm.binds_local_p) (function))
17872 	output_asm_insn ("jmp\t%P0", xops);
17873       else
17874 	{
17875 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17876 	  tmp = gen_rtx_CONST (Pmode, tmp);
17877 	  tmp = gen_rtx_MEM (QImode, tmp);
17878 	  xops[0] = tmp;
17879 	  output_asm_insn ("jmp\t%A0", xops);
17880 	}
17881     }
17882   else
17883     {
17884       if (!flag_pic || (*targetm.binds_local_p) (function))
17885 	output_asm_insn ("jmp\t%P0", xops);
17886       else
17887 #if TARGET_MACHO
17888 	if (TARGET_MACHO)
17889 	  {
17890 	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
17891 	    tmp = (gen_rtx_SYMBOL_REF
17892 		   (Pmode,
17893 		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17894 	    tmp = gen_rtx_MEM (QImode, tmp);
17895 	    xops[0] = tmp;
17896 	    output_asm_insn ("jmp\t%0", xops);
17897 	  }
17898 	else
17899 #endif /* TARGET_MACHO */
17900 	{
17901 	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17902 	  output_set_got (tmp, NULL_RTX);
17903 
17904 	  xops[1] = tmp;
17905 	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17906 	  output_asm_insn ("jmp\t{*}%1", xops);
17907 	}
17908     }
17909 }
17910 
17911 static void
17912 x86_file_start (void)
17913 {
17914   default_file_start ();
17915 #if TARGET_MACHO
17916   darwin_file_start ();
17917 #endif
17918   if (X86_FILE_START_VERSION_DIRECTIVE)
17919     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17920   if (X86_FILE_START_FLTUSED)
17921     fputs ("\t.global\t__fltused\n", asm_out_file);
17922   if (ix86_asm_dialect == ASM_INTEL)
17923     fputs ("\t.intel_syntax\n", asm_out_file);
17924 }
17925 
17926 int
17927 x86_field_alignment (tree field, int computed)
17928 {
17929   enum machine_mode mode;
17930   tree type = TREE_TYPE (field);
17931 
17932   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17933     return computed;
17934   mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17935 		    ? get_inner_array_type (type) : type);
17936   if (mode == DFmode || mode == DCmode
17937       || GET_MODE_CLASS (mode) == MODE_INT
17938       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17939     return MIN (32, computed);
17940   return computed;
17941 }
17942 
17943 /* Output assembler code to FILE to increment profiler label # LABELNO
17944    for profiling a function entry.  */
17945 void
17946 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17947 {
17948   if (TARGET_64BIT)
17949     if (flag_pic)
17950       {
17951 #ifndef NO_PROFILE_COUNTERS
17952 	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17953 #endif
17954 	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17955       }
17956     else
17957       {
17958 #ifndef NO_PROFILE_COUNTERS
17959 	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17960 #endif
17961 	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17962       }
17963   else if (flag_pic)
17964     {
17965 #ifndef NO_PROFILE_COUNTERS
17966       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17967 	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17968 #endif
17969       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17970     }
17971   else
17972     {
17973 #ifndef NO_PROFILE_COUNTERS
17974       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17975 	       PROFILE_COUNT_REGISTER);
17976 #endif
17977       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17978     }
17979 }
17980 
17981 /* We don't have exact information about the insn sizes, but we may assume
17982    quite safely that we are informed about all 1 byte insns and memory
17983    address sizes.  This is enough to eliminate unnecessary padding in
17984    99% of cases.  */
17985 
17986 static int
17987 min_insn_size (rtx insn)
17988 {
17989   int l = 0;
17990 
17991   if (!INSN_P (insn) || !active_insn_p (insn))
17992     return 0;
17993 
17994   /* Discard alignments we've emit and jump instructions.  */
17995   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17996       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17997     return 0;
17998   if (GET_CODE (insn) == JUMP_INSN
17999       && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18000 	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18001     return 0;
18002 
18003   /* Important case - calls are always 5 bytes.
18004      It is common to have many calls in the row.  */
18005   if (GET_CODE (insn) == CALL_INSN
18006       && symbolic_reference_mentioned_p (PATTERN (insn))
18007       && !SIBLING_CALL_P (insn))
18008     return 5;
18009   if (get_attr_length (insn) <= 1)
18010     return 1;
18011 
18012   /* For normal instructions we may rely on the sizes of addresses
18013      and the presence of symbol to require 4 bytes of encoding.
18014      This is not the case for jumps where references are PC relative.  */
18015   if (GET_CODE (insn) != JUMP_INSN)
18016     {
18017       l = get_attr_length_address (insn);
18018       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18019 	l = 4;
18020     }
18021   if (l)
18022     return 1+l;
18023   else
18024     return 2;
18025 }
18026 
18027 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18028    window.  */
18029 
18030 static void
18031 ix86_avoid_jump_misspredicts (void)
18032 {
18033   rtx insn, start = get_insns ();
18034   int nbytes = 0, njumps = 0;
18035   int isjump = 0;
18036 
18037   /* Look for all minimal intervals of instructions containing 4 jumps.
18038      The intervals are bounded by START and INSN.  NBYTES is the total
18039      size of instructions in the interval including INSN and not including
18040      START.  When the NBYTES is smaller than 16 bytes, it is possible
18041      that the end of START and INSN ends up in the same 16byte page.
18042 
18043      The smallest offset in the page INSN can start is the case where START
18044      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
18045      We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18046      */
18047   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18048     {
18049 
18050       nbytes += min_insn_size (insn);
18051       if (dump_file)
18052         fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18053 		INSN_UID (insn), min_insn_size (insn));
18054       if ((GET_CODE (insn) == JUMP_INSN
18055 	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
18056 	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18057 	  || GET_CODE (insn) == CALL_INSN)
18058 	njumps++;
18059       else
18060 	continue;
18061 
18062       while (njumps > 3)
18063 	{
18064 	  start = NEXT_INSN (start);
18065 	  if ((GET_CODE (start) == JUMP_INSN
18066 	       && GET_CODE (PATTERN (start)) != ADDR_VEC
18067 	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18068 	      || GET_CODE (start) == CALL_INSN)
18069 	    njumps--, isjump = 1;
18070 	  else
18071 	    isjump = 0;
18072 	  nbytes -= min_insn_size (start);
18073 	}
18074       gcc_assert (njumps >= 0);
18075       if (dump_file)
18076         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18077 		INSN_UID (start), INSN_UID (insn), nbytes);
18078 
18079       if (njumps == 3 && isjump && nbytes < 16)
18080 	{
18081 	  int padsize = 15 - nbytes + min_insn_size (insn);
18082 
18083 	  if (dump_file)
18084 	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18085 		     INSN_UID (insn), padsize);
18086           emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18087 	}
18088     }
18089 }
18090 
18091 /* AMD Athlon works faster
18092    when RET is not destination of conditional jump or directly preceded
18093    by other jump instruction.  We avoid the penalty by inserting NOP just
18094    before the RET instructions in such cases.  */
18095 static void
18096 ix86_pad_returns (void)
18097 {
18098   edge e;
18099   edge_iterator ei;
18100 
18101   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18102     {
18103       basic_block bb = e->src;
18104       rtx ret = BB_END (bb);
18105       rtx prev;
18106       bool replace = false;
18107 
18108       if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18109 	  || !maybe_hot_bb_p (bb))
18110 	continue;
18111       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18112 	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18113 	  break;
18114       if (prev && GET_CODE (prev) == CODE_LABEL)
18115 	{
18116 	  edge e;
18117 	  edge_iterator ei;
18118 
18119 	  FOR_EACH_EDGE (e, ei, bb->preds)
18120 	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
18121 		&& !(e->flags & EDGE_FALLTHRU))
18122 	      replace = true;
18123 	}
18124       if (!replace)
18125 	{
18126 	  prev = prev_active_insn (ret);
18127 	  if (prev
18128 	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18129 		  || GET_CODE (prev) == CALL_INSN))
18130 	    replace = true;
18131 	  /* Empty functions get branch mispredict even when the jump destination
18132 	     is not visible to us.  */
18133 	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18134 	    replace = true;
18135 	}
18136       if (replace)
18137 	{
18138 	  emit_insn_before (gen_return_internal_long (), ret);
18139 	  delete_insn (ret);
18140 	}
18141     }
18142 }
18143 
18144 /* Implement machine specific optimizations.  We implement padding of returns
18145    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18146 static void
18147 ix86_reorg (void)
18148 {
18149   if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18150     ix86_pad_returns ();
18151   if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18152     ix86_avoid_jump_misspredicts ();
18153 }
18154 
18155 /* Return nonzero when QImode register that must be represented via REX prefix
18156    is used.  */
18157 bool
18158 x86_extended_QIreg_mentioned_p (rtx insn)
18159 {
18160   int i;
18161   extract_insn_cached (insn);
18162   for (i = 0; i < recog_data.n_operands; i++)
18163     if (REG_P (recog_data.operand[i])
18164 	&& REGNO (recog_data.operand[i]) >= 4)
18165        return true;
18166   return false;
18167 }
18168 
18169 /* Return nonzero when P points to register encoded via REX prefix.
18170    Called via for_each_rtx.  */
18171 static int
18172 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18173 {
18174    unsigned int regno;
18175    if (!REG_P (*p))
18176      return 0;
18177    regno = REGNO (*p);
18178    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18179 }
18180 
18181 /* Return true when INSN mentions register that must be encoded using REX
18182    prefix.  */
18183 bool
18184 x86_extended_reg_mentioned_p (rtx insn)
18185 {
18186   return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18187 }
18188 
18189 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18190    optabs would emit if we didn't have TFmode patterns.  */
18191 
18192 void
18193 x86_emit_floatuns (rtx operands[2])
18194 {
18195   rtx neglab, donelab, i0, i1, f0, in, out;
18196   enum machine_mode mode, inmode;
18197 
18198   inmode = GET_MODE (operands[1]);
18199   gcc_assert (inmode == SImode || inmode == DImode);
18200 
18201   out = operands[0];
18202   in = force_reg (inmode, operands[1]);
18203   mode = GET_MODE (out);
18204   neglab = gen_label_rtx ();
18205   donelab = gen_label_rtx ();
18206   i1 = gen_reg_rtx (Pmode);
18207   f0 = gen_reg_rtx (mode);
18208 
18209   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18210 
18211   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18212   emit_jump_insn (gen_jump (donelab));
18213   emit_barrier ();
18214 
18215   emit_label (neglab);
18216 
18217   i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18218   i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18219   i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18220   expand_float (f0, i0, 0);
18221   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18222 
18223   emit_label (donelab);
18224 }
18225 
18226 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18227    with all elements equal to VAR.  Return true if successful.  */
18228 
18229 static bool
18230 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18231 				   rtx target, rtx val)
18232 {
18233   enum machine_mode smode, wsmode, wvmode;
18234   rtx x;
18235 
18236   switch (mode)
18237     {
18238     case V2SImode:
18239     case V2SFmode:
18240       if (!mmx_ok)
18241 	return false;
18242       /* FALLTHRU */
18243 
18244     case V2DFmode:
18245     case V2DImode:
18246     case V4SFmode:
18247     case V4SImode:
18248       val = force_reg (GET_MODE_INNER (mode), val);
18249       x = gen_rtx_VEC_DUPLICATE (mode, val);
18250       emit_insn (gen_rtx_SET (VOIDmode, target, x));
18251       return true;
18252 
18253     case V4HImode:
18254       if (!mmx_ok)
18255 	return false;
18256       if (TARGET_SSE || TARGET_3DNOW_A)
18257 	{
18258 	  val = gen_lowpart (SImode, val);
18259 	  x = gen_rtx_TRUNCATE (HImode, val);
18260 	  x = gen_rtx_VEC_DUPLICATE (mode, x);
18261 	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
18262 	  return true;
18263 	}
18264       else
18265 	{
18266 	  smode = HImode;
18267 	  wsmode = SImode;
18268 	  wvmode = V2SImode;
18269 	  goto widen;
18270 	}
18271 
18272     case V8QImode:
18273       if (!mmx_ok)
18274 	return false;
18275       smode = QImode;
18276       wsmode = HImode;
18277       wvmode = V4HImode;
18278       goto widen;
18279     case V8HImode:
18280       if (TARGET_SSE2)
18281 	{
18282 	  rtx tmp1, tmp2;
18283 	  /* Extend HImode to SImode using a paradoxical SUBREG.  */
18284 	  tmp1 = gen_reg_rtx (SImode);
18285 	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18286 	  /* Insert the SImode value as low element of V4SImode vector. */
18287 	  tmp2 = gen_reg_rtx (V4SImode);
18288 	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18289 				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18290 				    CONST0_RTX (V4SImode),
18291 				    const1_rtx);
18292 	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18293 	  /* Cast the V4SImode vector back to a V8HImode vector.  */
18294 	  tmp1 = gen_reg_rtx (V8HImode);
18295 	  emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18296 	  /* Duplicate the low short through the whole low SImode word.  */
18297 	  emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18298 	  /* Cast the V8HImode vector back to a V4SImode vector.  */
18299 	  tmp2 = gen_reg_rtx (V4SImode);
18300 	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18301 	  /* Replicate the low element of the V4SImode vector.  */
18302 	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18303 	  /* Cast the V2SImode back to V8HImode, and store in target.  */
18304 	  emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18305 	  return true;
18306 	}
18307       smode = HImode;
18308       wsmode = SImode;
18309       wvmode = V4SImode;
18310       goto widen;
18311     case V16QImode:
18312       if (TARGET_SSE2)
18313 	{
18314 	  rtx tmp1, tmp2;
18315 	  /* Extend QImode to SImode using a paradoxical SUBREG.  */
18316 	  tmp1 = gen_reg_rtx (SImode);
18317 	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18318 	  /* Insert the SImode value as low element of V4SImode vector. */
18319 	  tmp2 = gen_reg_rtx (V4SImode);
18320 	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18321 				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18322 				    CONST0_RTX (V4SImode),
18323 				    const1_rtx);
18324 	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18325 	  /* Cast the V4SImode vector back to a V16QImode vector.  */
18326 	  tmp1 = gen_reg_rtx (V16QImode);
18327 	  emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18328 	  /* Duplicate the low byte through the whole low SImode word.  */
18329 	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18330 	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18331 	  /* Cast the V16QImode vector back to a V4SImode vector.  */
18332 	  tmp2 = gen_reg_rtx (V4SImode);
18333 	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18334 	  /* Replicate the low element of the V4SImode vector.  */
18335 	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18336 	  /* Cast the V2SImode back to V16QImode, and store in target.  */
18337 	  emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18338 	  return true;
18339 	}
18340       smode = QImode;
18341       wsmode = HImode;
18342       wvmode = V8HImode;
18343       goto widen;
18344     widen:
18345       /* Replicate the value once into the next wider mode and recurse.  */
18346       val = convert_modes (wsmode, smode, val, true);
18347       x = expand_simple_binop (wsmode, ASHIFT, val,
18348 			       GEN_INT (GET_MODE_BITSIZE (smode)),
18349 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
18350       val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18351 
18352       x = gen_reg_rtx (wvmode);
18353       if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18354 	gcc_unreachable ();
18355       emit_move_insn (target, gen_lowpart (mode, x));
18356       return true;
18357 
18358     default:
18359       return false;
18360     }
18361 }
18362 
18363 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18364    whose ONE_VAR element is VAR, and other elements are zero.  Return true
18365    if successful.  */
18366 
18367 static bool
18368 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18369 				     rtx target, rtx var, int one_var)
18370 {
18371   enum machine_mode vsimode;
18372   rtx new_target;
18373   rtx x, tmp;
18374 
18375   switch (mode)
18376     {
18377     case V2SFmode:
18378     case V2SImode:
18379       if (!mmx_ok)
18380 	return false;
18381       /* FALLTHRU */
18382 
18383     case V2DFmode:
18384     case V2DImode:
18385       if (one_var != 0)
18386 	return false;
18387       var = force_reg (GET_MODE_INNER (mode), var);
18388       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18389       emit_insn (gen_rtx_SET (VOIDmode, target, x));
18390       return true;
18391 
18392     case V4SFmode:
18393     case V4SImode:
18394       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18395 	new_target = gen_reg_rtx (mode);
18396       else
18397 	new_target = target;
18398       var = force_reg (GET_MODE_INNER (mode), var);
18399       x = gen_rtx_VEC_DUPLICATE (mode, var);
18400       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18401       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18402       if (one_var != 0)
18403 	{
18404 	  /* We need to shuffle the value to the correct position, so
18405 	     create a new pseudo to store the intermediate result.  */
18406 
18407 	  /* With SSE2, we can use the integer shuffle insns.  */
18408 	  if (mode != V4SFmode && TARGET_SSE2)
18409 	    {
18410 	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18411 					    GEN_INT (1),
18412 					    GEN_INT (one_var == 1 ? 0 : 1),
18413 					    GEN_INT (one_var == 2 ? 0 : 1),
18414 					    GEN_INT (one_var == 3 ? 0 : 1)));
18415 	      if (target != new_target)
18416 		emit_move_insn (target, new_target);
18417 	      return true;
18418 	    }
18419 
18420 	  /* Otherwise convert the intermediate result to V4SFmode and
18421 	     use the SSE1 shuffle instructions.  */
18422 	  if (mode != V4SFmode)
18423 	    {
18424 	      tmp = gen_reg_rtx (V4SFmode);
18425 	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18426 	    }
18427 	  else
18428 	    tmp = new_target;
18429 
18430 	  emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18431 				       GEN_INT (1),
18432 				       GEN_INT (one_var == 1 ? 0 : 1),
18433 				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18434 				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18435 
18436 	  if (mode != V4SFmode)
18437 	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18438 	  else if (tmp != target)
18439 	    emit_move_insn (target, tmp);
18440 	}
18441       else if (target != new_target)
18442 	emit_move_insn (target, new_target);
18443       return true;
18444 
18445     case V8HImode:
18446     case V16QImode:
18447       vsimode = V4SImode;
18448       goto widen;
18449     case V4HImode:
18450     case V8QImode:
18451       if (!mmx_ok)
18452 	return false;
18453       vsimode = V2SImode;
18454       goto widen;
18455     widen:
18456       if (one_var != 0)
18457 	return false;
18458 
18459       /* Zero extend the variable element to SImode and recurse.  */
18460       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18461 
18462       x = gen_reg_rtx (vsimode);
18463       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18464 						var, one_var))
18465 	gcc_unreachable ();
18466 
18467       emit_move_insn (target, gen_lowpart (mode, x));
18468       return true;
18469 
18470     default:
18471       return false;
18472     }
18473 }
18474 
18475 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18476    consisting of the values in VALS.  It is known that all elements
18477    except ONE_VAR are constants.  Return true if successful.  */
18478 
18479 static bool
18480 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18481 				 rtx target, rtx vals, int one_var)
18482 {
18483   rtx var = XVECEXP (vals, 0, one_var);
18484   enum machine_mode wmode;
18485   rtx const_vec, x;
18486 
18487   const_vec = copy_rtx (vals);
18488   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18489   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18490 
18491   switch (mode)
18492     {
18493     case V2DFmode:
18494     case V2DImode:
18495     case V2SFmode:
18496     case V2SImode:
18497       /* For the two element vectors, it's just as easy to use
18498 	 the general case.  */
18499       return false;
18500 
18501     case V4SFmode:
18502     case V4SImode:
18503     case V8HImode:
18504     case V4HImode:
18505       break;
18506 
18507     case V16QImode:
18508       wmode = V8HImode;
18509       goto widen;
18510     case V8QImode:
18511       wmode = V4HImode;
18512       goto widen;
18513     widen:
18514       /* There's no way to set one QImode entry easily.  Combine
18515 	 the variable value with its adjacent constant value, and
18516 	 promote to an HImode set.  */
18517       x = XVECEXP (vals, 0, one_var ^ 1);
18518       if (one_var & 1)
18519 	{
18520 	  var = convert_modes (HImode, QImode, var, true);
18521 	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18522 				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
18523 	  x = GEN_INT (INTVAL (x) & 0xff);
18524 	}
18525       else
18526 	{
18527 	  var = convert_modes (HImode, QImode, var, true);
18528 	  x = gen_int_mode (INTVAL (x) << 8, HImode);
18529 	}
18530       if (x != const0_rtx)
18531 	var = expand_simple_binop (HImode, IOR, var, x, var,
18532 				   1, OPTAB_LIB_WIDEN);
18533 
18534       x = gen_reg_rtx (wmode);
18535       emit_move_insn (x, gen_lowpart (wmode, const_vec));
18536       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18537 
18538       emit_move_insn (target, gen_lowpart (mode, x));
18539       return true;
18540 
18541     default:
18542       return false;
18543     }
18544 
18545   emit_move_insn (target, const_vec);
18546   ix86_expand_vector_set (mmx_ok, target, var, one_var);
18547   return true;
18548 }
18549 
18550 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
18551    all values variable, and none identical.  */
18552 
18553 static void
18554 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18555 				 rtx target, rtx vals)
18556 {
18557   enum machine_mode half_mode = GET_MODE_INNER (mode);
18558   rtx op0 = NULL, op1 = NULL;
18559   bool use_vec_concat = false;
18560 
18561   switch (mode)
18562     {
18563     case V2SFmode:
18564     case V2SImode:
18565       if (!mmx_ok && !TARGET_SSE)
18566 	break;
18567       /* FALLTHRU */
18568 
18569     case V2DFmode:
18570     case V2DImode:
18571       /* For the two element vectors, we always implement VEC_CONCAT.  */
18572       op0 = XVECEXP (vals, 0, 0);
18573       op1 = XVECEXP (vals, 0, 1);
18574       use_vec_concat = true;
18575       break;
18576 
18577     case V4SFmode:
18578       half_mode = V2SFmode;
18579       goto half;
18580     case V4SImode:
18581       half_mode = V2SImode;
18582       goto half;
18583     half:
18584       {
18585 	rtvec v;
18586 
18587 	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
18588 	   Recurse to load the two halves.  */
18589 
18590 	op0 = gen_reg_rtx (half_mode);
18591 	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18592 	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18593 
18594 	op1 = gen_reg_rtx (half_mode);
18595 	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18596 	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18597 
18598 	use_vec_concat = true;
18599       }
18600       break;
18601 
18602     case V8HImode:
18603     case V16QImode:
18604     case V4HImode:
18605     case V8QImode:
18606       break;
18607 
18608     default:
18609       gcc_unreachable ();
18610     }
18611 
18612   if (use_vec_concat)
18613     {
18614       if (!register_operand (op0, half_mode))
18615 	op0 = force_reg (half_mode, op0);
18616       if (!register_operand (op1, half_mode))
18617 	op1 = force_reg (half_mode, op1);
18618 
18619       emit_insn (gen_rtx_SET (VOIDmode, target,
18620 			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
18621     }
18622   else
18623     {
18624       int i, j, n_elts, n_words, n_elt_per_word;
18625       enum machine_mode inner_mode;
18626       rtx words[4], shift;
18627 
18628       inner_mode = GET_MODE_INNER (mode);
18629       n_elts = GET_MODE_NUNITS (mode);
18630       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18631       n_elt_per_word = n_elts / n_words;
18632       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18633 
18634       for (i = 0; i < n_words; ++i)
18635 	{
18636 	  rtx word = NULL_RTX;
18637 
18638 	  for (j = 0; j < n_elt_per_word; ++j)
18639 	    {
18640 	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18641 	      elt = convert_modes (word_mode, inner_mode, elt, true);
18642 
18643 	      if (j == 0)
18644 		word = elt;
18645 	      else
18646 		{
18647 		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18648 					      word, 1, OPTAB_LIB_WIDEN);
18649 		  word = expand_simple_binop (word_mode, IOR, word, elt,
18650 					      word, 1, OPTAB_LIB_WIDEN);
18651 		}
18652 	    }
18653 
18654 	  words[i] = word;
18655 	}
18656 
18657       if (n_words == 1)
18658 	emit_move_insn (target, gen_lowpart (mode, words[0]));
18659       else if (n_words == 2)
18660 	{
18661 	  rtx tmp = gen_reg_rtx (mode);
18662 	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18663 	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18664 	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18665 	  emit_move_insn (target, tmp);
18666 	}
18667       else if (n_words == 4)
18668 	{
18669 	  rtx tmp = gen_reg_rtx (V4SImode);
18670 	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18671 	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18672 	  emit_move_insn (target, gen_lowpart (mode, tmp));
18673 	}
18674       else
18675 	gcc_unreachable ();
18676     }
18677 }
18678 
18679 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
18680    instructions unless MMX_OK is true.  */
18681 
18682 void
18683 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18684 {
18685   enum machine_mode mode = GET_MODE (target);
18686   enum machine_mode inner_mode = GET_MODE_INNER (mode);
18687   int n_elts = GET_MODE_NUNITS (mode);
18688   int n_var = 0, one_var = -1;
18689   bool all_same = true, all_const_zero = true;
18690   int i;
18691   rtx x;
18692 
18693   for (i = 0; i < n_elts; ++i)
18694     {
18695       x = XVECEXP (vals, 0, i);
18696       if (!CONSTANT_P (x))
18697 	n_var++, one_var = i;
18698       else if (x != CONST0_RTX (inner_mode))
18699 	all_const_zero = false;
18700       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18701 	all_same = false;
18702     }
18703 
18704   /* Constants are best loaded from the constant pool.  */
18705   if (n_var == 0)
18706     {
18707       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18708       return;
18709     }
18710 
18711   /* If all values are identical, broadcast the value.  */
18712   if (all_same
18713       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18714 					    XVECEXP (vals, 0, 0)))
18715     return;
18716 
18717   /* Values where only one field is non-constant are best loaded from
18718      the pool and overwritten via move later.  */
18719   if (n_var == 1)
18720     {
18721       if (all_const_zero
18722 	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18723 						  XVECEXP (vals, 0, one_var),
18724 						  one_var))
18725 	return;
18726 
18727       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18728 	return;
18729     }
18730 
18731   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18732 }
18733 
18734 void
18735 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18736 {
18737   enum machine_mode mode = GET_MODE (target);
18738   enum machine_mode inner_mode = GET_MODE_INNER (mode);
18739   bool use_vec_merge = false;
18740   rtx tmp;
18741 
18742   switch (mode)
18743     {
18744     case V2SFmode:
18745     case V2SImode:
18746       if (mmx_ok)
18747 	{
18748 	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18749 	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18750 	  if (elt == 0)
18751 	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18752 	  else
18753 	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18754 	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18755 	  return;
18756 	}
18757       break;
18758 
18759     case V2DFmode:
18760     case V2DImode:
18761       {
18762 	rtx op0, op1;
18763 
18764 	/* For the two element vectors, we implement a VEC_CONCAT with
18765 	   the extraction of the other element.  */
18766 
18767 	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18768 	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18769 
18770 	if (elt == 0)
18771 	  op0 = val, op1 = tmp;
18772 	else
18773 	  op0 = tmp, op1 = val;
18774 
18775 	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18776 	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18777       }
18778       return;
18779 
18780     case V4SFmode:
18781       switch (elt)
18782 	{
18783 	case 0:
18784 	  use_vec_merge = true;
18785 	  break;
18786 
18787 	case 1:
18788 	  /* tmp = target = A B C D */
18789 	  tmp = copy_to_reg (target);
18790 	  /* target = A A B B */
18791 	  emit_insn (gen_sse_unpcklps (target, target, target));
18792 	  /* target = X A B B */
18793 	  ix86_expand_vector_set (false, target, val, 0);
18794 	  /* target = A X C D  */
18795 	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18796 				       GEN_INT (1), GEN_INT (0),
18797 				       GEN_INT (2+4), GEN_INT (3+4)));
18798 	  return;
18799 
18800 	case 2:
18801 	  /* tmp = target = A B C D */
18802 	  tmp = copy_to_reg (target);
18803 	  /* tmp = X B C D */
18804 	  ix86_expand_vector_set (false, tmp, val, 0);
18805 	  /* target = A B X D */
18806 	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18807 				       GEN_INT (0), GEN_INT (1),
18808 				       GEN_INT (0+4), GEN_INT (3+4)));
18809 	  return;
18810 
18811 	case 3:
18812 	  /* tmp = target = A B C D */
18813 	  tmp = copy_to_reg (target);
18814 	  /* tmp = X B C D */
18815 	  ix86_expand_vector_set (false, tmp, val, 0);
18816 	  /* target = A B X D */
18817 	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18818 				       GEN_INT (0), GEN_INT (1),
18819 				       GEN_INT (2+4), GEN_INT (0+4)));
18820 	  return;
18821 
18822 	default:
18823 	  gcc_unreachable ();
18824 	}
18825       break;
18826 
18827     case V4SImode:
18828       /* Element 0 handled by vec_merge below.  */
18829       if (elt == 0)
18830 	{
18831 	  use_vec_merge = true;
18832 	  break;
18833 	}
18834 
18835       if (TARGET_SSE2)
18836 	{
18837 	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
18838 	     store into element 0, then shuffle them back.  */
18839 
18840 	  rtx order[4];
18841 
18842 	  order[0] = GEN_INT (elt);
18843 	  order[1] = const1_rtx;
18844 	  order[2] = const2_rtx;
18845 	  order[3] = GEN_INT (3);
18846 	  order[elt] = const0_rtx;
18847 
18848 	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18849 					order[1], order[2], order[3]));
18850 
18851 	  ix86_expand_vector_set (false, target, val, 0);
18852 
18853 	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18854 					order[1], order[2], order[3]));
18855 	}
18856       else
18857 	{
18858 	  /* For SSE1, we have to reuse the V4SF code.  */
18859 	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18860 				  gen_lowpart (SFmode, val), elt);
18861 	}
18862       return;
18863 
18864     case V8HImode:
18865       use_vec_merge = TARGET_SSE2;
18866       break;
18867     case V4HImode:
18868       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18869       break;
18870 
18871     case V16QImode:
18872     case V8QImode:
18873     default:
18874       break;
18875     }
18876 
18877   if (use_vec_merge)
18878     {
18879       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18880       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18881       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18882     }
18883   else
18884     {
18885       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18886 
18887       emit_move_insn (mem, target);
18888 
18889       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18890       emit_move_insn (tmp, val);
18891 
18892       emit_move_insn (target, mem);
18893     }
18894 }
18895 
18896 void
18897 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18898 {
18899   enum machine_mode mode = GET_MODE (vec);
18900   enum machine_mode inner_mode = GET_MODE_INNER (mode);
18901   bool use_vec_extr = false;
18902   rtx tmp;
18903 
18904   switch (mode)
18905     {
18906     case V2SImode:
18907     case V2SFmode:
18908       if (!mmx_ok)
18909 	break;
18910       /* FALLTHRU */
18911 
18912     case V2DFmode:
18913     case V2DImode:
18914       use_vec_extr = true;
18915       break;
18916 
18917     case V4SFmode:
18918       switch (elt)
18919 	{
18920 	case 0:
18921 	  tmp = vec;
18922 	  break;
18923 
18924 	case 1:
18925 	case 3:
18926 	  tmp = gen_reg_rtx (mode);
18927 	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18928 				       GEN_INT (elt), GEN_INT (elt),
18929 				       GEN_INT (elt+4), GEN_INT (elt+4)));
18930 	  break;
18931 
18932 	case 2:
18933 	  tmp = gen_reg_rtx (mode);
18934 	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18935 	  break;
18936 
18937 	default:
18938 	  gcc_unreachable ();
18939 	}
18940       vec = tmp;
18941       use_vec_extr = true;
18942       elt = 0;
18943       break;
18944 
18945     case V4SImode:
18946       if (TARGET_SSE2)
18947 	{
18948 	  switch (elt)
18949 	    {
18950 	    case 0:
18951 	      tmp = vec;
18952 	      break;
18953 
18954 	    case 1:
18955 	    case 3:
18956 	      tmp = gen_reg_rtx (mode);
18957 	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18958 					    GEN_INT (elt), GEN_INT (elt),
18959 					    GEN_INT (elt), GEN_INT (elt)));
18960 	      break;
18961 
18962 	    case 2:
18963 	      tmp = gen_reg_rtx (mode);
18964 	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18965 	      break;
18966 
18967 	    default:
18968 	      gcc_unreachable ();
18969 	    }
18970 	  vec = tmp;
18971 	  use_vec_extr = true;
18972 	  elt = 0;
18973 	}
18974       else
18975 	{
18976 	  /* For SSE1, we have to reuse the V4SF code.  */
18977 	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18978 				      gen_lowpart (V4SFmode, vec), elt);
18979 	  return;
18980 	}
18981       break;
18982 
18983     case V8HImode:
18984       use_vec_extr = TARGET_SSE2;
18985       break;
18986     case V4HImode:
18987       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18988       break;
18989 
18990     case V16QImode:
18991     case V8QImode:
18992       /* ??? Could extract the appropriate HImode element and shift.  */
18993     default:
18994       break;
18995     }
18996 
18997   if (use_vec_extr)
18998     {
18999       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19000       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19001 
19002       /* Let the rtl optimizers know about the zero extension performed.  */
19003       if (inner_mode == HImode)
19004 	{
19005 	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19006 	  target = gen_lowpart (SImode, target);
19007 	}
19008 
19009       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19010     }
19011   else
19012     {
19013       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19014 
19015       emit_move_insn (mem, vec);
19016 
19017       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19018       emit_move_insn (target, tmp);
19019     }
19020 }
19021 
19022 /* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
19023    pattern to reduce; DEST is the destination; IN is the input vector.  */
19024 
19025 void
19026 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19027 {
19028   rtx tmp1, tmp2, tmp3;
19029 
19030   tmp1 = gen_reg_rtx (V4SFmode);
19031   tmp2 = gen_reg_rtx (V4SFmode);
19032   tmp3 = gen_reg_rtx (V4SFmode);
19033 
19034   emit_insn (gen_sse_movhlps (tmp1, in, in));
19035   emit_insn (fn (tmp2, tmp1, in));
19036 
19037   emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19038 			       GEN_INT (1), GEN_INT (1),
19039 			       GEN_INT (1+4), GEN_INT (1+4)));
19040   emit_insn (fn (dest, tmp2, tmp3));
19041 }
19042 
19043 /* Target hook for scalar_mode_supported_p.  */
19044 static bool
19045 ix86_scalar_mode_supported_p (enum machine_mode mode)
19046 {
19047   if (DECIMAL_FLOAT_MODE_P (mode))
19048     return true;
19049   else
19050     return default_scalar_mode_supported_p (mode);
19051 }
19052 
19053 /* Implements target hook vector_mode_supported_p.  */
19054 static bool
19055 ix86_vector_mode_supported_p (enum machine_mode mode)
19056 {
19057   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19058     return true;
19059   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19060     return true;
19061   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19062     return true;
19063   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19064     return true;
19065   return false;
19066 }
19067 
19068 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19069 
19070    We do this in the new i386 backend to maintain source compatibility
19071    with the old cc0-based compiler.  */
19072 
19073 static tree
19074 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19075 		      tree inputs ATTRIBUTE_UNUSED,
19076 		      tree clobbers)
19077 {
19078   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19079 			clobbers);
19080   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19081 			clobbers);
19082   clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19083 			clobbers);
19084   return clobbers;
19085 }
19086 
19087 /* Return true if this goes in small data/bss.  */
19088 
19089 static bool
19090 ix86_in_large_data_p (tree exp)
19091 {
19092   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19093     return false;
19094 
19095   /* Functions are never large data.  */
19096   if (TREE_CODE (exp) == FUNCTION_DECL)
19097     return false;
19098 
19099   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19100     {
19101       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19102       if (strcmp (section, ".ldata") == 0
19103 	  || strcmp (section, ".lbss") == 0)
19104 	return true;
19105       return false;
19106     }
19107   else
19108     {
19109       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19110 
19111       /* If this is an incomplete type with size 0, then we can't put it
19112 	 in data because it might be too big when completed.  */
19113       if (!size || size > ix86_section_threshold)
19114 	return true;
19115     }
19116 
19117   return false;
19118 }
19119 static void
19120 ix86_encode_section_info (tree decl, rtx rtl, int first)
19121 {
19122   default_encode_section_info (decl, rtl, first);
19123 
19124   if (TREE_CODE (decl) == VAR_DECL
19125       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19126       && ix86_in_large_data_p (decl))
19127     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19128 }
19129 
19130 /* Worker function for REVERSE_CONDITION.  */
19131 
19132 enum rtx_code
19133 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19134 {
19135   return (mode != CCFPmode && mode != CCFPUmode
19136 	  ? reverse_condition (code)
19137 	  : reverse_condition_maybe_unordered (code));
19138 }
19139 
19140 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19141    to OPERANDS[0].  */
19142 
19143 const char *
19144 output_387_reg_move (rtx insn, rtx *operands)
19145 {
19146   if (REG_P (operands[1])
19147       && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19148     {
19149       if (REGNO (operands[0]) == FIRST_STACK_REG)
19150 	return output_387_ffreep (operands, 0);
19151       return "fstp\t%y0";
19152     }
19153   if (STACK_TOP_P (operands[0]))
19154     return "fld%z1\t%y1";
19155   return "fst\t%y0";
19156 }
19157 
19158 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19159    FP status register is set.  */
19160 
19161 void
19162 ix86_emit_fp_unordered_jump (rtx label)
19163 {
19164   rtx reg = gen_reg_rtx (HImode);
19165   rtx temp;
19166 
19167   emit_insn (gen_x86_fnstsw_1 (reg));
19168 
19169   if (TARGET_USE_SAHF)
19170     {
19171       emit_insn (gen_x86_sahf_1 (reg));
19172 
19173       temp = gen_rtx_REG (CCmode, FLAGS_REG);
19174       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19175     }
19176   else
19177     {
19178       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19179 
19180       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19181       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19182     }
19183 
19184   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19185 			      gen_rtx_LABEL_REF (VOIDmode, label),
19186 			      pc_rtx);
19187   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19188   emit_jump_insn (temp);
19189 }
19190 
19191 /* Output code to perform a log1p XFmode calculation.  */
19192 
19193 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19194 {
19195   rtx label1 = gen_label_rtx ();
19196   rtx label2 = gen_label_rtx ();
19197 
19198   rtx tmp = gen_reg_rtx (XFmode);
19199   rtx tmp2 = gen_reg_rtx (XFmode);
19200 
19201   emit_insn (gen_absxf2 (tmp, op1));
19202   emit_insn (gen_cmpxf (tmp,
19203     CONST_DOUBLE_FROM_REAL_VALUE (
19204        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19205        XFmode)));
19206   emit_jump_insn (gen_bge (label1));
19207 
19208   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19209   emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19210   emit_jump (label2);
19211 
19212   emit_label (label1);
19213   emit_move_insn (tmp, CONST1_RTX (XFmode));
19214   emit_insn (gen_addxf3 (tmp, op1, tmp));
19215   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19216   emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19217 
19218   emit_label (label2);
19219 }
19220 
19221 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19222 
19223 static void
19224 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19225 				tree decl)
19226 {
19227   /* With Binutils 2.15, the "@unwind" marker must be specified on
19228      every occurrence of the ".eh_frame" section, not just the first
19229      one.  */
19230   if (TARGET_64BIT
19231       && strcmp (name, ".eh_frame") == 0)
19232     {
19233       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19234 	       flags & SECTION_WRITE ? "aw" : "a");
19235       return;
19236     }
19237   default_elf_asm_named_section (name, flags, decl);
19238 }
19239 
19240 /* Return the mangling of TYPE if it is an extended fundamental type.  */
19241 
19242 static const char *
19243 ix86_mangle_fundamental_type (tree type)
19244 {
19245   switch (TYPE_MODE (type))
19246     {
19247     case TFmode:
19248       /* __float128 is "g".  */
19249       return "g";
19250     case XFmode:
19251       /* "long double" or __float80 is "e".  */
19252       return "e";
19253     default:
19254       return NULL;
19255     }
19256 }
19257 
19258 /* For 32-bit code we can save PIC register setup by using
19259    __stack_chk_fail_local hidden function instead of calling
19260    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19261    register, so it is better to call __stack_chk_fail directly.  */
19262 
19263 static tree
19264 ix86_stack_protect_fail (void)
19265 {
19266   return TARGET_64BIT
19267 	 ? default_external_stack_protect_fail ()
19268 	 : default_hidden_stack_protect_fail ();
19269 }
19270 
19271 /* Select a format to encode pointers in exception handling data.  CODE
19272    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19273    true if the symbol may be affected by dynamic relocations.
19274 
19275    ??? All x86 object file formats are capable of representing this.
19276    After all, the relocation needed is the same as for the call insn.
19277    Whether or not a particular assembler allows us to enter such, I
19278    guess we'll have to see.  */
19279 int
19280 asm_preferred_eh_data_format (int code, int global)
19281 {
19282   if (flag_pic)
19283     {
19284       int type = DW_EH_PE_sdata8;
19285       if (!TARGET_64BIT
19286 	  || ix86_cmodel == CM_SMALL_PIC
19287 	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19288 	type = DW_EH_PE_sdata4;
19289       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19290     }
19291   if (ix86_cmodel == CM_SMALL
19292       || (ix86_cmodel == CM_MEDIUM && code))
19293     return DW_EH_PE_udata4;
19294   return DW_EH_PE_absptr;
19295 }
19296 
19297 #include "gt-i386.h"
19298