xref: /openbsd/gnu/usr.bin/gcc/gcc/config/i386/i386.c (revision e97b50d0)
1 /* Subroutines used for code generation on IA-32.
2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004 Free Software Foundation, Inc.
4 
5 This file is part of GNU CC.
6 
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11 
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING.  If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50 
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = {	/* costs for tunning for size */
54   2,					/* cost of an add instruction */
55   3,					/* cost of a lea instruction */
56   2,					/* variable shift costs */
57   3,					/* constant shift costs */
58   3,					/* cost of starting a multiply */
59   0,					/* cost of multiply per each bit set */
60   3,					/* cost of a divide/mod */
61   3,					/* cost of movsx */
62   3,					/* cost of movzx */
63   0,					/* "large" insn */
64   2,					/* MOVE_RATIO */
65   2,					/* cost for loading QImode using movzbl */
66   {2, 2, 2},				/* cost of loading integer registers
67 					   in QImode, HImode and SImode.
68 					   Relative to reg-reg move (2).  */
69   {2, 2, 2},				/* cost of storing integer registers */
70   2,					/* cost of reg,reg fld/fst */
71   {2, 2, 2},				/* cost of loading fp registers
72 					   in SFmode, DFmode and XFmode */
73   {2, 2, 2},				/* cost of loading integer registers */
74   3,					/* cost of moving MMX register */
75   {3, 3},				/* cost of loading MMX registers
76 					   in SImode and DImode */
77   {3, 3},				/* cost of storing MMX registers
78 					   in SImode and DImode */
79   3,					/* cost of moving SSE register */
80   {3, 3, 3},				/* cost of loading SSE registers
81 					   in SImode, DImode and TImode */
82   {3, 3, 3},				/* cost of storing SSE registers
83 					   in SImode, DImode and TImode */
84   3,					/* MMX or SSE register to integer */
85   0,					/* size of prefetch block */
86   0,					/* number of parallel prefetches */
87   2,					/* cost of FADD and FSUB insns.  */
88   2,					/* cost of FMUL instruction.  */
89   2,					/* cost of FDIV instruction.  */
90   2,					/* cost of FABS instruction.  */
91   2,					/* cost of FCHS instruction.  */
92   2,					/* cost of FSQRT instruction.  */
93 };
94 
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = {	/* 386 specific costs */
98   1,					/* cost of an add instruction */
99   1,					/* cost of a lea instruction */
100   3,					/* variable shift costs */
101   2,					/* constant shift costs */
102   6,					/* cost of starting a multiply */
103   1,					/* cost of multiply per each bit set */
104   23,					/* cost of a divide/mod */
105   3,					/* cost of movsx */
106   2,					/* cost of movzx */
107   15,					/* "large" insn */
108   3,					/* MOVE_RATIO */
109   4,					/* cost for loading QImode using movzbl */
110   {2, 4, 2},				/* cost of loading integer registers
111 					   in QImode, HImode and SImode.
112 					   Relative to reg-reg move (2).  */
113   {2, 4, 2},				/* cost of storing integer registers */
114   2,					/* cost of reg,reg fld/fst */
115   {8, 8, 8},				/* cost of loading fp registers
116 					   in SFmode, DFmode and XFmode */
117   {8, 8, 8},				/* cost of loading integer registers */
118   2,					/* cost of moving MMX register */
119   {4, 8},				/* cost of loading MMX registers
120 					   in SImode and DImode */
121   {4, 8},				/* cost of storing MMX registers
122 					   in SImode and DImode */
123   2,					/* cost of moving SSE register */
124   {4, 8, 16},				/* cost of loading SSE registers
125 					   in SImode, DImode and TImode */
126   {4, 8, 16},				/* cost of storing SSE registers
127 					   in SImode, DImode and TImode */
128   3,					/* MMX or SSE register to integer */
129   0,					/* size of prefetch block */
130   0,					/* number of parallel prefetches */
131   23,					/* cost of FADD and FSUB insns.  */
132   27,					/* cost of FMUL instruction.  */
133   88,					/* cost of FDIV instruction.  */
134   22,					/* cost of FABS instruction.  */
135   24,					/* cost of FCHS instruction.  */
136   122,					/* cost of FSQRT instruction.  */
137 };
138 
139 static const
140 struct processor_costs i486_cost = {	/* 486 specific costs */
141   1,					/* cost of an add instruction */
142   1,					/* cost of a lea instruction */
143   3,					/* variable shift costs */
144   2,					/* constant shift costs */
145   12,					/* cost of starting a multiply */
146   1,					/* cost of multiply per each bit set */
147   40,					/* cost of a divide/mod */
148   3,					/* cost of movsx */
149   2,					/* cost of movzx */
150   15,					/* "large" insn */
151   3,					/* MOVE_RATIO */
152   4,					/* cost for loading QImode using movzbl */
153   {2, 4, 2},				/* cost of loading integer registers
154 					   in QImode, HImode and SImode.
155 					   Relative to reg-reg move (2).  */
156   {2, 4, 2},				/* cost of storing integer registers */
157   2,					/* cost of reg,reg fld/fst */
158   {8, 8, 8},				/* cost of loading fp registers
159 					   in SFmode, DFmode and XFmode */
160   {8, 8, 8},				/* cost of loading integer registers */
161   2,					/* cost of moving MMX register */
162   {4, 8},				/* cost of loading MMX registers
163 					   in SImode and DImode */
164   {4, 8},				/* cost of storing MMX registers
165 					   in SImode and DImode */
166   2,					/* cost of moving SSE register */
167   {4, 8, 16},				/* cost of loading SSE registers
168 					   in SImode, DImode and TImode */
169   {4, 8, 16},				/* cost of storing SSE registers
170 					   in SImode, DImode and TImode */
171   3,					/* MMX or SSE register to integer */
172   0,					/* size of prefetch block */
173   0,					/* number of parallel prefetches */
174   8,					/* cost of FADD and FSUB insns.  */
175   16,					/* cost of FMUL instruction.  */
176   73,					/* cost of FDIV instruction.  */
177   3,					/* cost of FABS instruction.  */
178   3,					/* cost of FCHS instruction.  */
179   83,					/* cost of FSQRT instruction.  */
180 };
181 
182 static const
183 struct processor_costs pentium_cost = {
184   1,					/* cost of an add instruction */
185   1,					/* cost of a lea instruction */
186   4,					/* variable shift costs */
187   1,					/* constant shift costs */
188   11,					/* cost of starting a multiply */
189   0,					/* cost of multiply per each bit set */
190   25,					/* cost of a divide/mod */
191   3,					/* cost of movsx */
192   2,					/* cost of movzx */
193   8,					/* "large" insn */
194   6,					/* MOVE_RATIO */
195   6,					/* cost for loading QImode using movzbl */
196   {2, 4, 2},				/* cost of loading integer registers
197 					   in QImode, HImode and SImode.
198 					   Relative to reg-reg move (2).  */
199   {2, 4, 2},				/* cost of storing integer registers */
200   2,					/* cost of reg,reg fld/fst */
201   {2, 2, 6},				/* cost of loading fp registers
202 					   in SFmode, DFmode and XFmode */
203   {4, 4, 6},				/* cost of loading integer registers */
204   8,					/* cost of moving MMX register */
205   {8, 8},				/* cost of loading MMX registers
206 					   in SImode and DImode */
207   {8, 8},				/* cost of storing MMX registers
208 					   in SImode and DImode */
209   2,					/* cost of moving SSE register */
210   {4, 8, 16},				/* cost of loading SSE registers
211 					   in SImode, DImode and TImode */
212   {4, 8, 16},				/* cost of storing SSE registers
213 					   in SImode, DImode and TImode */
214   3,					/* MMX or SSE register to integer */
215   0,					/* size of prefetch block */
216   0,					/* number of parallel prefetches */
217   3,					/* cost of FADD and FSUB insns.  */
218   3,					/* cost of FMUL instruction.  */
219   39,					/* cost of FDIV instruction.  */
220   1,					/* cost of FABS instruction.  */
221   1,					/* cost of FCHS instruction.  */
222   70,					/* cost of FSQRT instruction.  */
223 };
224 
225 static const
226 struct processor_costs pentiumpro_cost = {
227   1,					/* cost of an add instruction */
228   1,					/* cost of a lea instruction */
229   1,					/* variable shift costs */
230   1,					/* constant shift costs */
231   4,					/* cost of starting a multiply */
232   0,					/* cost of multiply per each bit set */
233   17,					/* cost of a divide/mod */
234   1,					/* cost of movsx */
235   1,					/* cost of movzx */
236   8,					/* "large" insn */
237   6,					/* MOVE_RATIO */
238   2,					/* cost for loading QImode using movzbl */
239   {4, 4, 4},				/* cost of loading integer registers
240 					   in QImode, HImode and SImode.
241 					   Relative to reg-reg move (2).  */
242   {2, 2, 2},				/* cost of storing integer registers */
243   2,					/* cost of reg,reg fld/fst */
244   {2, 2, 6},				/* cost of loading fp registers
245 					   in SFmode, DFmode and XFmode */
246   {4, 4, 6},				/* cost of loading integer registers */
247   2,					/* cost of moving MMX register */
248   {2, 2},				/* cost of loading MMX registers
249 					   in SImode and DImode */
250   {2, 2},				/* cost of storing MMX registers
251 					   in SImode and DImode */
252   2,					/* cost of moving SSE register */
253   {2, 2, 8},				/* cost of loading SSE registers
254 					   in SImode, DImode and TImode */
255   {2, 2, 8},				/* cost of storing SSE registers
256 					   in SImode, DImode and TImode */
257   3,					/* MMX or SSE register to integer */
258   32,					/* size of prefetch block */
259   6,					/* number of parallel prefetches */
260   3,					/* cost of FADD and FSUB insns.  */
261   5,					/* cost of FMUL instruction.  */
262   56,					/* cost of FDIV instruction.  */
263   2,					/* cost of FABS instruction.  */
264   2,					/* cost of FCHS instruction.  */
265   56,					/* cost of FSQRT instruction.  */
266 };
267 
268 static const
269 struct processor_costs k6_cost = {
270   1,					/* cost of an add instruction */
271   2,					/* cost of a lea instruction */
272   1,					/* variable shift costs */
273   1,					/* constant shift costs */
274   3,					/* cost of starting a multiply */
275   0,					/* cost of multiply per each bit set */
276   18,					/* cost of a divide/mod */
277   2,					/* cost of movsx */
278   2,					/* cost of movzx */
279   8,					/* "large" insn */
280   4,					/* MOVE_RATIO */
281   3,					/* cost for loading QImode using movzbl */
282   {4, 5, 4},				/* cost of loading integer registers
283 					   in QImode, HImode and SImode.
284 					   Relative to reg-reg move (2).  */
285   {2, 3, 2},				/* cost of storing integer registers */
286   4,					/* cost of reg,reg fld/fst */
287   {6, 6, 6},				/* cost of loading fp registers
288 					   in SFmode, DFmode and XFmode */
289   {4, 4, 4},				/* cost of loading integer registers */
290   2,					/* cost of moving MMX register */
291   {2, 2},				/* cost of loading MMX registers
292 					   in SImode and DImode */
293   {2, 2},				/* cost of storing MMX registers
294 					   in SImode and DImode */
295   2,					/* cost of moving SSE register */
296   {2, 2, 8},				/* cost of loading SSE registers
297 					   in SImode, DImode and TImode */
298   {2, 2, 8},				/* cost of storing SSE registers
299 					   in SImode, DImode and TImode */
300   6,					/* MMX or SSE register to integer */
301   32,					/* size of prefetch block */
302   1,					/* number of parallel prefetches */
303   2,					/* cost of FADD and FSUB insns.  */
304   2,					/* cost of FMUL instruction.  */
305   56,					/* cost of FDIV instruction.  */
306   2,					/* cost of FABS instruction.  */
307   2,					/* cost of FCHS instruction.  */
308   56,					/* cost of FSQRT instruction.  */
309 };
310 
311 static const
312 struct processor_costs athlon_cost = {
313   1,					/* cost of an add instruction */
314   2,					/* cost of a lea instruction */
315   1,					/* variable shift costs */
316   1,					/* constant shift costs */
317   5,					/* cost of starting a multiply */
318   0,					/* cost of multiply per each bit set */
319   42,					/* cost of a divide/mod */
320   1,					/* cost of movsx */
321   1,					/* cost of movzx */
322   8,					/* "large" insn */
323   9,					/* MOVE_RATIO */
324   4,					/* cost for loading QImode using movzbl */
325   {3, 4, 3},				/* cost of loading integer registers
326 					   in QImode, HImode and SImode.
327 					   Relative to reg-reg move (2).  */
328   {3, 4, 3},				/* cost of storing integer registers */
329   4,					/* cost of reg,reg fld/fst */
330   {4, 4, 12},				/* cost of loading fp registers
331 					   in SFmode, DFmode and XFmode */
332   {6, 6, 8},				/* cost of loading integer registers */
333   2,					/* cost of moving MMX register */
334   {4, 4},				/* cost of loading MMX registers
335 					   in SImode and DImode */
336   {4, 4},				/* cost of storing MMX registers
337 					   in SImode and DImode */
338   2,					/* cost of moving SSE register */
339   {4, 4, 6},				/* cost of loading SSE registers
340 					   in SImode, DImode and TImode */
341   {4, 4, 5},				/* cost of storing SSE registers
342 					   in SImode, DImode and TImode */
343   5,					/* MMX or SSE register to integer */
344   64,					/* size of prefetch block */
345   6,					/* number of parallel prefetches */
346   4,					/* cost of FADD and FSUB insns.  */
347   4,					/* cost of FMUL instruction.  */
348   24,					/* cost of FDIV instruction.  */
349   2,					/* cost of FABS instruction.  */
350   2,					/* cost of FCHS instruction.  */
351   35,					/* cost of FSQRT instruction.  */
352 };
353 
354 static const
355 struct processor_costs pentium4_cost = {
356   1,					/* cost of an add instruction */
357   1,					/* cost of a lea instruction */
358   8,					/* variable shift costs */
359   8,					/* constant shift costs */
360   30,					/* cost of starting a multiply */
361   0,					/* cost of multiply per each bit set */
362   112,					/* cost of a divide/mod */
363   1,					/* cost of movsx */
364   1,					/* cost of movzx */
365   16,					/* "large" insn */
366   6,					/* MOVE_RATIO */
367   2,					/* cost for loading QImode using movzbl */
368   {4, 5, 4},				/* cost of loading integer registers
369 					   in QImode, HImode and SImode.
370 					   Relative to reg-reg move (2).  */
371   {2, 3, 2},				/* cost of storing integer registers */
372   2,					/* cost of reg,reg fld/fst */
373   {2, 2, 6},				/* cost of loading fp registers
374 					   in SFmode, DFmode and XFmode */
375   {4, 4, 6},				/* cost of loading integer registers */
376   2,					/* cost of moving MMX register */
377   {2, 2},				/* cost of loading MMX registers
378 					   in SImode and DImode */
379   {2, 2},				/* cost of storing MMX registers
380 					   in SImode and DImode */
381   12,					/* cost of moving SSE register */
382   {12, 12, 12},				/* cost of loading SSE registers
383 					   in SImode, DImode and TImode */
384   {2, 2, 8},				/* cost of storing SSE registers
385 					   in SImode, DImode and TImode */
386   10,					/* MMX or SSE register to integer */
387   64,					/* size of prefetch block */
388   6,					/* number of parallel prefetches */
389   5,					/* cost of FADD and FSUB insns.  */
390   7,					/* cost of FMUL instruction.  */
391   43,					/* cost of FDIV instruction.  */
392   2,					/* cost of FABS instruction.  */
393   2,					/* cost of FCHS instruction.  */
394   43,					/* cost of FSQRT instruction.  */
395 };
396 
397 const struct processor_costs *ix86_cost = &pentium_cost;
398 
399 /* Processor feature/optimization bitmasks.  */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6  (1<<PROCESSOR_K6)
405 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
406 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
407 
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448 
449 /* In case the avreage insn count for single function invocation is
450    lower than this constant, emit fast (but longer) prologue and
451    epilogue code.  */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453 
454 /* Set by prologue expander and used by epilogue expander to determine
455    the style used.  */
456 static int use_fast_prologue_epilogue;
457 
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462 
463 /* Array of the smallest class containing reg number REGNO, indexed by
464    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
465 
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468   /* ax, dx, cx, bx */
469   AREG, DREG, CREG, BREG,
470   /* si, di, bp, sp */
471   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472   /* FP registers */
473   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475   /* arg pointer */
476   NON_Q_REGS,
477   /* flags, fpsr, dirflag, frame */
478   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480   SSE_REGS, SSE_REGS,
481   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482   MMX_REGS, MMX_REGS,
483   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486   SSE_REGS, SSE_REGS,
487 };
488 
489 /* The "default" register map used in 32bit mode.  */
490 
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493   0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
494   12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
495   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
496   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
497   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
498   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
499   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
500 };
501 
502 static int const x86_64_int_parameter_registers[6] =
503 {
504   5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507 
508 static int const x86_64_int_return_registers[4] =
509 {
510   0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512 
513 /* The "default" register map used in 64bit mode.  */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516   0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
517   33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
518   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
519   17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
520   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
521   8,9,10,11,12,13,14,15,		/* extended integer registers */
522   25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
523 };
524 
525 /* Define the register numbers to be used in Dwarf debugging information.
526    The SVR4 reference port C compiler uses the following register numbers
527    in its Dwarf output code:
528 	0 for %eax (gcc regno = 0)
529 	1 for %ecx (gcc regno = 2)
530 	2 for %edx (gcc regno = 1)
531 	3 for %ebx (gcc regno = 3)
532 	4 for %esp (gcc regno = 7)
533 	5 for %ebp (gcc regno = 6)
534 	6 for %esi (gcc regno = 4)
535 	7 for %edi (gcc regno = 5)
536    The following three DWARF register numbers are never generated by
537    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538    believes these numbers have these meanings.
539 	8  for %eip    (no gcc equivalent)
540 	9  for %eflags (gcc regno = 17)
541 	10 for %trapno (no gcc equivalent)
542    It is not at all clear how we should number the FP stack registers
543    for the x86 architecture.  If the version of SDB on x86/svr4 were
544    a bit less brain dead with respect to floating-point then we would
545    have a precedent to follow with respect to DWARF register numbers
546    for x86 FP registers, but the SDB on x86/svr4 is so completely
547    broken with respect to FP registers that it is hardly worth thinking
548    of it as something to strive for compatibility with.
549    The version of x86/svr4 SDB I have at the moment does (partially)
550    seem to believe that DWARF register number 11 is associated with
551    the x86 register %st(0), but that's about all.  Higher DWARF
552    register numbers don't seem to be associated with anything in
553    particular, and even for DWARF regno 11, SDB only seems to under-
554    stand that it should say that a variable lives in %st(0) (when
555    asked via an `=' command) if we said it was in DWARF regno 11,
556    but SDB still prints garbage when asked for the value of the
557    variable in question (via a `/' command).
558    (Also note that the labels SDB prints for various FP stack regs
559    when doing an `x' command are all wrong.)
560    Note that these problems generally don't affect the native SVR4
561    C compiler because it doesn't allow the use of -O with -g and
562    because when it is *not* optimizing, it allocates a memory
563    location for each floating-point variable, and the memory
564    location is what gets described in the DWARF AT_location
565    attribute for the variable in question.
566    Regardless of the severe mental illness of the x86/svr4 SDB, we
567    do something sensible here and we use the following DWARF
568    register numbers.  Note that these are all stack-top-relative
569    numbers.
570 	11 for %st(0) (gcc regno = 8)
571 	12 for %st(1) (gcc regno = 9)
572 	13 for %st(2) (gcc regno = 10)
573 	14 for %st(3) (gcc regno = 11)
574 	15 for %st(4) (gcc regno = 12)
575 	16 for %st(5) (gcc regno = 13)
576 	17 for %st(6) (gcc regno = 14)
577 	18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581   0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
582   11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
583   -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
584   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
585   29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
586   -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded integer registers */
587   -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded SSE registers */
588 };
589 
590 /* Test and compare insns in i386.md store the information needed to
591    generate branch and scc insns here.  */
592 
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595 
596 /* The encoding characters for the four TLS models present in ELF.  */
597 
598 static char const tls_model_chars[] = " GLil";
599 
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area.  */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603 
604 /* Define the structure for the machine field in struct function.  */
605 struct machine_function GTY(())
606 {
607   rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608   const char *some_ld_name;
609   int save_varrargs_registers;
610   int accesses_prev_frame;
611 };
612 
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615 
616 /* Structure describing stack frame layout.
617    Stack grows downward:
618 
619    [arguments]
620 					      <- ARG_POINTER
621    saved pc
622 
623    saved frame pointer if frame_pointer_needed
624 					      <- HARD_FRAME_POINTER
625    [saved regs]
626 
627    [padding1]          \
628 		        )
629    [va_arg registers]  (
630 		        > to_allocate	      <- FRAME_POINTER
631    [frame]	       (
632 		        )
633    [padding2]	       /
634   */
635 struct ix86_frame
636 {
637   int nregs;
638   int padding1;
639   int va_arg_size;
640   HOST_WIDE_INT frame;
641   int padding2;
642   int outgoing_arguments_size;
643   int red_zone_size;
644 
645   HOST_WIDE_INT to_allocate;
646   /* The offsets relative to ARG_POINTER.  */
647   HOST_WIDE_INT frame_pointer_offset;
648   HOST_WIDE_INT hard_frame_pointer_offset;
649   HOST_WIDE_INT stack_pointer_offset;
650 
651   HOST_WIDE_INT local_size;
652 };
653 
654 /* Used to enable/disable debugging features.  */
655 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
656 /* Code model option as passed by user.  */
657 const char *ix86_cmodel_string;
658 /* Parsed value.  */
659 enum cmodel ix86_cmodel;
660 /* Asm dialect.  */
661 const char *ix86_asm_string;
662 enum asm_dialect ix86_asm_dialect = ASM_ATT;
663 /* TLS dialext.  */
664 const char *ix86_tls_dialect_string;
665 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
666 
667 /* Which unit we are generating floating point math for.  */
668 enum fpmath_unit ix86_fpmath;
669 
670 /* Which cpu are we scheduling for.  */
671 enum processor_type ix86_cpu;
672 /* Which instruction set architecture to use.  */
673 enum processor_type ix86_arch;
674 
675 /* Strings to hold which cpu and instruction set architecture  to use.  */
676 const char *ix86_cpu_string;		/* for -mcpu=<xxx> */
677 const char *ix86_arch_string;		/* for -march=<xxx> */
678 const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
679 
680 /* # of registers to use to pass arguments.  */
681 const char *ix86_regparm_string;
682 
683 /* true if sse prefetch instruction is not NOOP.  */
684 int x86_prefetch_sse;
685 
686 /* ix86_regparm_string as a number */
687 int ix86_regparm;
688 
689 /* Alignment to use for loops and jumps:  */
690 
691 /* Power of two alignment for loops.  */
692 const char *ix86_align_loops_string;
693 
694 /* Power of two alignment for non-loop jumps.  */
695 const char *ix86_align_jumps_string;
696 
697 /* Power of two alignment for stack boundary in bytes.  */
698 const char *ix86_preferred_stack_boundary_string;
699 
700 /* Preferred alignment for stack boundary in bits.  */
701 int ix86_preferred_stack_boundary;
702 
703 /* Values 1-5: see jump.c */
704 int ix86_branch_cost;
705 const char *ix86_branch_cost_string;
706 
707 /* Power of two alignment for functions.  */
708 const char *ix86_align_funcs_string;
709 
710 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
711 static char internal_label_prefix[16];
712 static int internal_label_prefix_len;
713 
714 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
715 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
716 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
717 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
718 				       int, int, FILE *));
719 static const char *get_some_local_dynamic_name PARAMS ((void));
720 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
721 static rtx maybe_get_pool_constant PARAMS ((rtx));
722 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
723 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
724 							   rtx *, rtx *));
725 static bool ix86_fixed_condition_code_regs PARAMS ((unsigned int *,
726 						    unsigned int *));
727 static enum machine_mode ix86_cc_modes_compatible PARAMS ((enum machine_mode,
728 							   enum machine_mode));
729 static rtx get_thread_pointer PARAMS ((void));
730 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
731 static rtx gen_push PARAMS ((rtx));
732 static int memory_address_length PARAMS ((rtx addr));
733 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
734 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
735 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
736 static void ix86_dump_ppro_packet PARAMS ((FILE *));
737 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
738 static struct machine_function * ix86_init_machine_status PARAMS ((void));
739 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
740 static int ix86_nsaved_regs PARAMS ((void));
741 static void ix86_emit_save_regs PARAMS ((void));
742 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
743 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
744 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
745 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
746 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
747 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
748 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
749 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
750 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
751 static int ix86_issue_rate PARAMS ((void));
752 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
753 static void ix86_sched_init PARAMS ((FILE *, int, int));
754 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
755 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
756 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
757 static int ia32_multipass_dfa_lookahead PARAMS ((void));
758 static void ix86_init_mmx_sse_builtins PARAMS ((void));
759 static rtx x86_this_parameter PARAMS ((tree));
760 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
761 					 HOST_WIDE_INT, tree));
762 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
763 					     HOST_WIDE_INT, tree));
764 
765 struct ix86_address
766 {
767   rtx base, index, disp;
768   HOST_WIDE_INT scale;
769 };
770 
771 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
772 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
773 
774 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
775 static const char *ix86_strip_name_encoding PARAMS ((const char *))
776      ATTRIBUTE_UNUSED;
777 
778 struct builtin_description;
779 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
780 					 tree, rtx));
781 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
782 					    tree, rtx));
783 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
784 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
785 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
786 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
787 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
788 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
789 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
790 					      enum rtx_code *,
791 					      enum rtx_code *,
792 					      enum rtx_code *));
793 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
794 					  rtx *, rtx *));
795 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
796 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
797 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
798 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
799 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
800 static int ix86_save_reg PARAMS ((unsigned int, int));
801 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
802 static int ix86_comp_type_attributes PARAMS ((tree, tree));
803 static int ix86_fntype_regparm PARAMS ((tree));
804 const struct attribute_spec ix86_attribute_table[];
805 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
806 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
807 static int ix86_value_regno PARAMS ((enum machine_mode));
808 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
809 
810 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
811 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
812 #endif
813 
814 /* Register class used for passing given 64bit part of the argument.
815    These represent classes as documented by the PS ABI, with the exception
816    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
817    use SF or DFmode move instead of DImode to avoid reformating penalties.
818 
819    Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
820    whenever possible (upper half does contain padding).
821  */
822 enum x86_64_reg_class
823   {
824     X86_64_NO_CLASS,
825     X86_64_INTEGER_CLASS,
826     X86_64_INTEGERSI_CLASS,
827     X86_64_SSE_CLASS,
828     X86_64_SSESF_CLASS,
829     X86_64_SSEDF_CLASS,
830     X86_64_SSEUP_CLASS,
831     X86_64_X87_CLASS,
832     X86_64_X87UP_CLASS,
833     X86_64_MEMORY_CLASS
834   };
835 static const char * const x86_64_reg_class_name[] =
836    {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
837 
838 #define MAX_CLASSES 4
839 static int classify_argument PARAMS ((enum machine_mode, tree,
840 				      enum x86_64_reg_class [MAX_CLASSES],
841 				      int));
842 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
843 				     int *));
844 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
845 					const int *, int));
846 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
847 						    enum x86_64_reg_class));
848 
849 /* Initialize the GCC target structure.  */
850 #undef TARGET_ATTRIBUTE_TABLE
851 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
852 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
853 #  undef TARGET_MERGE_DECL_ATTRIBUTES
854 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
855 #endif
856 
857 #undef TARGET_COMP_TYPE_ATTRIBUTES
858 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
859 
860 #undef TARGET_INIT_BUILTINS
861 #define TARGET_INIT_BUILTINS ix86_init_builtins
862 
863 #undef TARGET_EXPAND_BUILTIN
864 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
865 
866 #undef TARGET_ASM_FUNCTION_EPILOGUE
867 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
868 
869 #undef TARGET_ASM_OPEN_PAREN
870 #define TARGET_ASM_OPEN_PAREN ""
871 #undef TARGET_ASM_CLOSE_PAREN
872 #define TARGET_ASM_CLOSE_PAREN ""
873 
874 #undef TARGET_ASM_ALIGNED_HI_OP
875 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
876 #undef TARGET_ASM_ALIGNED_SI_OP
877 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
878 #ifdef ASM_QUAD
879 #undef TARGET_ASM_ALIGNED_DI_OP
880 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
881 #endif
882 
883 #undef TARGET_ASM_UNALIGNED_HI_OP
884 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
885 #undef TARGET_ASM_UNALIGNED_SI_OP
886 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
887 #undef TARGET_ASM_UNALIGNED_DI_OP
888 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
889 
890 #undef TARGET_SCHED_ADJUST_COST
891 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
892 #undef TARGET_SCHED_ISSUE_RATE
893 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
894 #undef TARGET_SCHED_VARIABLE_ISSUE
895 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
896 #undef TARGET_SCHED_INIT
897 #define TARGET_SCHED_INIT ix86_sched_init
898 #undef TARGET_SCHED_REORDER
899 #define TARGET_SCHED_REORDER ix86_sched_reorder
900 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
901 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
902   ia32_use_dfa_pipeline_interface
903 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
904 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
905   ia32_multipass_dfa_lookahead
906 
907 #ifdef HAVE_AS_TLS
908 #undef TARGET_HAVE_TLS
909 #define TARGET_HAVE_TLS true
910 #endif
911 #undef TARGET_CANNOT_FORCE_CONST_MEM
912 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
913 
914 #undef TARGET_ASM_OUTPUT_MI_THUNK
915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
918 
919 #undef TARGET_FIXED_CONDITION_CODE_REGS
920 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
921 #undef TARGET_CC_MODES_COMPATIBLE
922 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
923 
924 struct gcc_target targetm = TARGET_INITIALIZER;
925 
926 /* The svr4 ABI for the i386 says that records and unions are returned
927    in memory.  */
928 #ifndef DEFAULT_PCC_STRUCT_RETURN
929 #define DEFAULT_PCC_STRUCT_RETURN 1
930 #endif
931 
932 /* Sometimes certain combinations of command options do not make
933    sense on a particular target machine.  You can define a macro
934    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
935    defined, is executed once just after all the command options have
936    been parsed.
937 
938    Don't use this macro to turn on various extra optimizations for
939    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
940 
941 void
override_options()942 override_options ()
943 {
944   int i;
945   /* Comes from final.c -- no real reason to change it.  */
946 #define MAX_CODE_ALIGN 16
947 
948   static struct ptt
949     {
950       const struct processor_costs *cost;	/* Processor costs */
951       const int target_enable;			/* Target flags to enable.  */
952       const int target_disable;			/* Target flags to disable.  */
953       const int align_loop;			/* Default alignments.  */
954       const int align_loop_max_skip;
955       const int align_jump;
956       const int align_jump_max_skip;
957       const int align_func;
958       const int branch_cost;
959     }
960   const processor_target_table[PROCESSOR_max] =
961     {
962       {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
963       {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
964       {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
965       {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
966       {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
967       {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
968       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
969     };
970 
971   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
972   static struct pta
973     {
974       const char *const name;		/* processor name or nickname.  */
975       const enum processor_type processor;
976       const enum pta_flags
977 	{
978 	  PTA_SSE = 1,
979 	  PTA_SSE2 = 2,
980 	  PTA_SSE3 = 4,
981 	  PTA_MMX = 8,
982 	  PTA_PREFETCH_SSE = 16,
983 	  PTA_3DNOW = 32,
984 	  PTA_3DNOW_A = 64
985 	} flags;
986     }
987   const processor_alias_table[] =
988     {
989       {"i386", PROCESSOR_I386, 0},
990       {"i486", PROCESSOR_I486, 0},
991       {"i586", PROCESSOR_PENTIUM, 0},
992       {"pentium", PROCESSOR_PENTIUM, 0},
993       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
994       {"winchip-c6", PROCESSOR_I486, PTA_MMX},
995       {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
996       {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
997       {"i686", PROCESSOR_PENTIUMPRO, 0},
998       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
999       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1000       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1001       {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1002 				       | PTA_MMX | PTA_PREFETCH_SSE},
1003       {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1004 				       | PTA_MMX | PTA_PREFETCH_SSE},
1005       {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1006 				     | PTA_MMX | PTA_PREFETCH_SSE},
1007       {"k6", PROCESSOR_K6, PTA_MMX},
1008       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1009       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1010       {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1011 				   | PTA_3DNOW_A},
1012       {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1013 					 | PTA_3DNOW | PTA_3DNOW_A},
1014       {"x86-64", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1015       {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1016 				    | PTA_3DNOW_A | PTA_SSE},
1017       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1018 				      | PTA_3DNOW_A | PTA_SSE},
1019       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1020 				      | PTA_3DNOW_A | PTA_SSE},
1021     };
1022 
1023   int const pta_size = ARRAY_SIZE (processor_alias_table);
1024 
1025   /* By default our XFmode is the 80-bit extended format.  If we have
1026      use TFmode instead, it's also the 80-bit format, but with padding.  */
1027   real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1028   real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1029 
1030   /* Set the default values for switches whose default depends on TARGET_64BIT
1031      in case they weren't overwriten by command line options.  */
1032   if (TARGET_64BIT)
1033     {
1034       if (flag_omit_frame_pointer == 2)
1035 	flag_omit_frame_pointer = 1;
1036       if (flag_asynchronous_unwind_tables == 2)
1037 	flag_asynchronous_unwind_tables = 1;
1038       if (flag_pcc_struct_return == 2)
1039 	flag_pcc_struct_return = 0;
1040     }
1041   else
1042     {
1043       if (flag_omit_frame_pointer == 2)
1044 	flag_omit_frame_pointer = 0;
1045       if (flag_asynchronous_unwind_tables == 2)
1046 	flag_asynchronous_unwind_tables = 0;
1047       if (flag_pcc_struct_return == 2)
1048 	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1049     }
1050 
1051 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1052   SUBTARGET_OVERRIDE_OPTIONS;
1053 #endif
1054 
1055   if (!ix86_cpu_string && ix86_arch_string)
1056     ix86_cpu_string = ix86_arch_string;
1057   if (!ix86_cpu_string)
1058     ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1059   if (!ix86_arch_string)
1060     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1061 
1062   if (ix86_cmodel_string != 0)
1063     {
1064       if (!strcmp (ix86_cmodel_string, "small"))
1065 	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1066       else if (flag_pic)
1067 	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1068       else if (!strcmp (ix86_cmodel_string, "32"))
1069 	ix86_cmodel = CM_32;
1070       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1071 	ix86_cmodel = CM_KERNEL;
1072       else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1073 	ix86_cmodel = CM_MEDIUM;
1074       else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1075 	ix86_cmodel = CM_LARGE;
1076       else
1077 	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1078     }
1079   else
1080     {
1081       ix86_cmodel = CM_32;
1082       if (TARGET_64BIT)
1083 	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1084     }
1085   if (ix86_asm_string != 0)
1086     {
1087       if (!strcmp (ix86_asm_string, "intel"))
1088 	ix86_asm_dialect = ASM_INTEL;
1089       else if (!strcmp (ix86_asm_string, "att"))
1090 	ix86_asm_dialect = ASM_ATT;
1091       else
1092 	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1093     }
1094   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1095     error ("code model `%s' not supported in the %s bit mode",
1096 	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1097   if (ix86_cmodel == CM_LARGE)
1098     sorry ("code model `large' not supported yet");
1099   if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1100     sorry ("%i-bit mode not compiled in",
1101 	   (target_flags & MASK_64BIT) ? 64 : 32);
1102 
1103   for (i = 0; i < pta_size; i++)
1104     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1105       {
1106 	ix86_arch = processor_alias_table[i].processor;
1107 	/* Default cpu tuning to the architecture.  */
1108 	ix86_cpu = ix86_arch;
1109 	if (processor_alias_table[i].flags & PTA_MMX
1110 	    && !(target_flags_explicit & MASK_MMX))
1111 	  target_flags |= MASK_MMX;
1112 	if (processor_alias_table[i].flags & PTA_3DNOW
1113 	    && !(target_flags_explicit & MASK_3DNOW))
1114 	  target_flags |= MASK_3DNOW;
1115 	if (processor_alias_table[i].flags & PTA_3DNOW_A
1116 	    && !(target_flags_explicit & MASK_3DNOW_A))
1117 	  target_flags |= MASK_3DNOW_A;
1118 	if (processor_alias_table[i].flags & PTA_SSE
1119 	    && !(target_flags_explicit & MASK_SSE))
1120 	  target_flags |= MASK_SSE;
1121 	if (processor_alias_table[i].flags & PTA_SSE2
1122 	    && !(target_flags_explicit & MASK_SSE2))
1123 	  target_flags |= MASK_SSE2;
1124 	if (processor_alias_table[i].flags & PTA_SSE3
1125 	    && !(target_flags_explicit & MASK_SSE3))
1126 	  target_flags |= MASK_SSE3;
1127 	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1128 	  x86_prefetch_sse = true;
1129 	break;
1130       }
1131 
1132   if (i == pta_size)
1133     error ("bad value (%s) for -march= switch", ix86_arch_string);
1134 
1135   for (i = 0; i < pta_size; i++)
1136     if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1137       {
1138 	ix86_cpu = processor_alias_table[i].processor;
1139 	break;
1140       }
1141   if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1142     x86_prefetch_sse = true;
1143   if (i == pta_size)
1144     error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1145 
1146   if (optimize_size)
1147     ix86_cost = &size_cost;
1148   else
1149     ix86_cost = processor_target_table[ix86_cpu].cost;
1150   target_flags |= processor_target_table[ix86_cpu].target_enable;
1151   target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1152 
1153   /* Arrange to set up i386_stack_locals for all functions.  */
1154   init_machine_status = ix86_init_machine_status;
1155 
1156   /* Validate -mregparm= value.  */
1157   if (ix86_regparm_string)
1158     {
1159       i = atoi (ix86_regparm_string);
1160       if (i < 0 || i > REGPARM_MAX)
1161 	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1162       else
1163 	ix86_regparm = i;
1164     }
1165   else
1166    if (TARGET_64BIT)
1167      ix86_regparm = REGPARM_MAX;
1168 
1169   /* If the user has provided any of the -malign-* options,
1170      warn and use that value only if -falign-* is not set.
1171      Remove this code in GCC 3.2 or later.  */
1172   if (ix86_align_loops_string)
1173     {
1174       warning ("-malign-loops is obsolete, use -falign-loops");
1175       if (align_loops == 0)
1176 	{
1177 	  i = atoi (ix86_align_loops_string);
1178 	  if (i < 0 || i > MAX_CODE_ALIGN)
1179 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1180 	  else
1181 	    align_loops = 1 << i;
1182 	}
1183     }
1184 
1185   if (ix86_align_jumps_string)
1186     {
1187       warning ("-malign-jumps is obsolete, use -falign-jumps");
1188       if (align_jumps == 0)
1189 	{
1190 	  i = atoi (ix86_align_jumps_string);
1191 	  if (i < 0 || i > MAX_CODE_ALIGN)
1192 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1193 	  else
1194 	    align_jumps = 1 << i;
1195 	}
1196     }
1197 
1198   if (ix86_align_funcs_string)
1199     {
1200       warning ("-malign-functions is obsolete, use -falign-functions");
1201       if (align_functions == 0)
1202 	{
1203 	  i = atoi (ix86_align_funcs_string);
1204 	  if (i < 0 || i > MAX_CODE_ALIGN)
1205 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1206 	  else
1207 	    align_functions = 1 << i;
1208 	}
1209     }
1210 
1211   /* Default align_* from the processor table.  */
1212   if (align_loops == 0)
1213     {
1214       align_loops = processor_target_table[ix86_cpu].align_loop;
1215       align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1216     }
1217   if (align_jumps == 0)
1218     {
1219       align_jumps = processor_target_table[ix86_cpu].align_jump;
1220       align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1221     }
1222   if (align_functions == 0)
1223     {
1224       align_functions = processor_target_table[ix86_cpu].align_func;
1225     }
1226 
1227   /* Validate -mpreferred-stack-boundary= value, or provide default.
1228      The default of 128 bits is for Pentium III's SSE __m128, but we
1229      don't want additional code to keep the stack aligned when
1230      optimizing for code size.  */
1231   ix86_preferred_stack_boundary = (optimize_size
1232 				   ? TARGET_64BIT ? 128 : 32
1233 				   : 128);
1234   if (ix86_preferred_stack_boundary_string)
1235     {
1236       i = atoi (ix86_preferred_stack_boundary_string);
1237       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1238 	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1239 	       TARGET_64BIT ? 4 : 2);
1240       else
1241 	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1242     }
1243 
1244   /* Validate -mbranch-cost= value, or provide default.  */
1245   ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1246   if (ix86_branch_cost_string)
1247     {
1248       i = atoi (ix86_branch_cost_string);
1249       if (i < 0 || i > 5)
1250 	error ("-mbranch-cost=%d is not between 0 and 5", i);
1251       else
1252 	ix86_branch_cost = i;
1253     }
1254 
1255   if (ix86_tls_dialect_string)
1256     {
1257       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1258 	ix86_tls_dialect = TLS_DIALECT_GNU;
1259       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1260 	ix86_tls_dialect = TLS_DIALECT_SUN;
1261       else
1262 	error ("bad value (%s) for -mtls-dialect= switch",
1263 	       ix86_tls_dialect_string);
1264     }
1265 
1266   /* Keep nonleaf frame pointers.  */
1267   if (TARGET_OMIT_LEAF_FRAME_POINTER)
1268     flag_omit_frame_pointer = 1;
1269 
1270   /* If we're doing fast math, we don't care about comparison order
1271      wrt NaNs.  This lets us use a shorter comparison sequence.  */
1272   if (flag_unsafe_math_optimizations)
1273     target_flags &= ~MASK_IEEE_FP;
1274 
1275   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1276      since the insns won't need emulation.  */
1277   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1278     target_flags &= ~MASK_NO_FANCY_MATH_387;
1279 
1280   /* Turn on SSE2 builtins for -msse3.  */
1281   if (TARGET_SSE3)
1282     target_flags |= MASK_SSE2;
1283 
1284   /* Turn on SSE builtins for -msse2.  */
1285   if (TARGET_SSE2)
1286     target_flags |= MASK_SSE;
1287 
1288   if (TARGET_64BIT)
1289     {
1290       if (TARGET_ALIGN_DOUBLE)
1291 	error ("-malign-double makes no sense in the 64bit mode");
1292       if (TARGET_RTD)
1293 	error ("-mrtd calling convention not supported in the 64bit mode");
1294       /* Enable by default the SSE and MMX builtins.  */
1295       target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1296       ix86_fpmath = FPMATH_SSE;
1297      }
1298   else
1299     ix86_fpmath = FPMATH_387;
1300 
1301   if (ix86_fpmath_string != 0)
1302     {
1303       if (! strcmp (ix86_fpmath_string, "387"))
1304 	ix86_fpmath = FPMATH_387;
1305       else if (! strcmp (ix86_fpmath_string, "sse"))
1306 	{
1307 	  if (!TARGET_SSE)
1308 	    {
1309 	      warning ("SSE instruction set disabled, using 387 arithmetics");
1310 	      ix86_fpmath = FPMATH_387;
1311 	    }
1312 	  else
1313 	    ix86_fpmath = FPMATH_SSE;
1314 	}
1315       else if (! strcmp (ix86_fpmath_string, "387,sse")
1316 	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1317 	{
1318 	  if (!TARGET_SSE)
1319 	    {
1320 	      warning ("SSE instruction set disabled, using 387 arithmetics");
1321 	      ix86_fpmath = FPMATH_387;
1322 	    }
1323 	  else if (!TARGET_80387)
1324 	    {
1325 	      warning ("387 instruction set disabled, using SSE arithmetics");
1326 	      ix86_fpmath = FPMATH_SSE;
1327 	    }
1328 	  else
1329 	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1330 	}
1331       else
1332 	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1333     }
1334 
1335   /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1336      on by -msse.  */
1337   if (TARGET_SSE)
1338     {
1339       target_flags |= MASK_MMX;
1340       x86_prefetch_sse = true;
1341     }
1342 
1343   /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1344   if (TARGET_3DNOW)
1345     {
1346       target_flags |= MASK_MMX;
1347       /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1348 	 extensions it adds.  */
1349       if (x86_3dnow_a & (1 << ix86_arch))
1350 	target_flags |= MASK_3DNOW_A;
1351     }
1352   if ((x86_accumulate_outgoing_args & CPUMASK)
1353       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1354       && !optimize_size)
1355     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1356 
1357   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1358   {
1359     char *p;
1360     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1361     p = strchr (internal_label_prefix, 'X');
1362     internal_label_prefix_len = p - internal_label_prefix;
1363     *p = '\0';
1364   }
1365 }
1366 
1367 void
optimization_options(level,size)1368 optimization_options (level, size)
1369      int level;
1370      int size ATTRIBUTE_UNUSED;
1371 {
1372   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1373      make the problem with not enough registers even worse.  */
1374 #ifdef INSN_SCHEDULING
1375   if (level > 1)
1376     flag_schedule_insns = 0;
1377 #endif
1378 
1379   /* The default values of these switches depend on the TARGET_64BIT
1380      that is not known at this moment.  Mark these values with 2 and
1381      let user the to override these.  In case there is no command line option
1382      specifying them, we will set the defaults in override_options.  */
1383   if (optimize >= 1)
1384     flag_omit_frame_pointer = 2;
1385   flag_pcc_struct_return = 2;
1386   flag_asynchronous_unwind_tables = 2;
1387 }
1388 
1389 /* Table of valid machine attributes.  */
1390 const struct attribute_spec ix86_attribute_table[] =
1391 {
1392   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1393   /* Stdcall attribute says callee is responsible for popping arguments
1394      if they are not variable.  */
1395   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1396   /* Cdecl attribute says the callee is a normal C declaration */
1397   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1398   /* Regparm attribute specifies how many integer arguments are to be
1399      passed in registers.  */
1400   { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1401 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1402   { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1403   { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1404   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1405 #endif
1406   { NULL,        0, 0, false, false, false, NULL }
1407 };
1408 
1409 /* Handle a "cdecl" or "stdcall" attribute;
1410    arguments as in struct attribute_spec.handler.  */
1411 static tree
ix86_handle_cdecl_attribute(node,name,args,flags,no_add_attrs)1412 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1413      tree *node;
1414      tree name;
1415      tree args ATTRIBUTE_UNUSED;
1416      int flags ATTRIBUTE_UNUSED;
1417      bool *no_add_attrs;
1418 {
1419   if (TREE_CODE (*node) != FUNCTION_TYPE
1420       && TREE_CODE (*node) != METHOD_TYPE
1421       && TREE_CODE (*node) != FIELD_DECL
1422       && TREE_CODE (*node) != TYPE_DECL)
1423     {
1424       warning ("`%s' attribute only applies to functions",
1425 	       IDENTIFIER_POINTER (name));
1426       *no_add_attrs = true;
1427     }
1428 
1429   if (TARGET_64BIT)
1430     {
1431       warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1432       *no_add_attrs = true;
1433     }
1434 
1435   return NULL_TREE;
1436 }
1437 
1438 /* Handle a "regparm" attribute;
1439    arguments as in struct attribute_spec.handler.  */
1440 static tree
ix86_handle_regparm_attribute(node,name,args,flags,no_add_attrs)1441 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1442      tree *node;
1443      tree name;
1444      tree args;
1445      int flags ATTRIBUTE_UNUSED;
1446      bool *no_add_attrs;
1447 {
1448   if (TREE_CODE (*node) != FUNCTION_TYPE
1449       && TREE_CODE (*node) != METHOD_TYPE
1450       && TREE_CODE (*node) != FIELD_DECL
1451       && TREE_CODE (*node) != TYPE_DECL)
1452     {
1453       warning ("`%s' attribute only applies to functions",
1454 	       IDENTIFIER_POINTER (name));
1455       *no_add_attrs = true;
1456     }
1457   else
1458     {
1459       tree cst;
1460 
1461       cst = TREE_VALUE (args);
1462       if (TREE_CODE (cst) != INTEGER_CST)
1463 	{
1464 	  warning ("`%s' attribute requires an integer constant argument",
1465 		   IDENTIFIER_POINTER (name));
1466 	  *no_add_attrs = true;
1467 	}
1468       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1469 	{
1470 	  warning ("argument to `%s' attribute larger than %d",
1471 		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1472 	  *no_add_attrs = true;
1473 	}
1474     }
1475 
1476   return NULL_TREE;
1477 }
1478 
1479 /* Return 0 if the attributes for two types are incompatible, 1 if they
1480    are compatible, and 2 if they are nearly compatible (which causes a
1481    warning to be generated).  */
1482 
1483 static int
ix86_comp_type_attributes(type1,type2)1484 ix86_comp_type_attributes (type1, type2)
1485      tree type1;
1486      tree type2;
1487 {
1488   /* Check for mismatch of non-default calling convention.  */
1489   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1490 
1491   if (TREE_CODE (type1) != FUNCTION_TYPE)
1492     return 1;
1493 
1494   /* Check for mismatched return types (cdecl vs stdcall).  */
1495   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1496       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1497     return 0;
1498   return 1;
1499 }
1500 
1501 /* Return the regparm value for a fuctio with the indicated TYPE.  */
1502 
1503 static int
ix86_fntype_regparm(type)1504 ix86_fntype_regparm (type)
1505      tree type;
1506 {
1507   tree attr;
1508 
1509   attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1510   if (attr)
1511     return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1512   else
1513     return ix86_regparm;
1514 }
1515 
1516 /* Value is the number of bytes of arguments automatically
1517    popped when returning from a subroutine call.
1518    FUNDECL is the declaration node of the function (as a tree),
1519    FUNTYPE is the data type of the function (as a tree),
1520    or for a library call it is an identifier node for the subroutine name.
1521    SIZE is the number of bytes of arguments passed on the stack.
1522 
1523    On the 80386, the RTD insn may be used to pop them if the number
1524      of args is fixed, but if the number is variable then the caller
1525      must pop them all.  RTD can't be used for library calls now
1526      because the library is compiled with the Unix compiler.
1527    Use of RTD is a selectable option, since it is incompatible with
1528    standard Unix calling sequences.  If the option is not selected,
1529    the caller must always pop the args.
1530 
1531    The attribute stdcall is equivalent to RTD on a per module basis.  */
1532 
1533 int
ix86_return_pops_args(fundecl,funtype,size)1534 ix86_return_pops_args (fundecl, funtype, size)
1535      tree fundecl;
1536      tree funtype;
1537      int size;
1538 {
1539   int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1540 
1541     /* Cdecl functions override -mrtd, and never pop the stack.  */
1542   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1543 
1544     /* Stdcall functions will pop the stack if not variable args.  */
1545     if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1546       rtd = 1;
1547 
1548     if (rtd
1549         && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1550 	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1551 		== void_type_node)))
1552       return size;
1553   }
1554 
1555   /* Lose any fake structure return argument if it is passed on the stack.  */
1556   if (aggregate_value_p (TREE_TYPE (funtype))
1557       && !TARGET_64BIT)
1558     {
1559       int nregs = ix86_fntype_regparm (funtype);
1560 
1561       if (!nregs)
1562 	return GET_MODE_SIZE (Pmode);
1563     }
1564 
1565   return 0;
1566 }
1567 
1568 /* Argument support functions.  */
1569 
1570 /* Return true when register may be used to pass function parameters.  */
1571 bool
ix86_function_arg_regno_p(regno)1572 ix86_function_arg_regno_p (regno)
1573      int regno;
1574 {
1575   int i;
1576   if (!TARGET_64BIT)
1577     return (regno < REGPARM_MAX
1578 	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1579   if (SSE_REGNO_P (regno) && TARGET_SSE)
1580     return true;
1581   /* RAX is used as hidden argument to va_arg functions.  */
1582   if (!regno)
1583     return true;
1584   for (i = 0; i < REGPARM_MAX; i++)
1585     if (regno == x86_64_int_parameter_registers[i])
1586       return true;
1587   return false;
1588 }
1589 
1590 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1591    for a call to a function whose data type is FNTYPE.
1592    For a library call, FNTYPE is 0.  */
1593 
1594 void
init_cumulative_args(cum,fntype,libname)1595 init_cumulative_args (cum, fntype, libname)
1596      CUMULATIVE_ARGS *cum;	/* Argument info to initialize */
1597      tree fntype;		/* tree ptr for function decl */
1598      rtx libname;		/* SYMBOL_REF of library name or 0 */
1599 {
1600   static CUMULATIVE_ARGS zero_cum;
1601   tree param, next_param;
1602 
1603   if (TARGET_DEBUG_ARG)
1604     {
1605       fprintf (stderr, "\ninit_cumulative_args (");
1606       if (fntype)
1607 	fprintf (stderr, "fntype code = %s, ret code = %s",
1608 		 tree_code_name[(int) TREE_CODE (fntype)],
1609 		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1610       else
1611 	fprintf (stderr, "no fntype");
1612 
1613       if (libname)
1614 	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1615     }
1616 
1617   *cum = zero_cum;
1618 
1619   /* Set up the number of registers to use for passing arguments.  */
1620   cum->nregs = ix86_regparm;
1621   cum->sse_nregs = SSE_REGPARM_MAX;
1622   if (fntype && !TARGET_64BIT)
1623     {
1624       tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1625 
1626       if (attr)
1627 	cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1628     }
1629   cum->maybe_vaarg = false;
1630 
1631   /* Determine if this function has variable arguments.  This is
1632      indicated by the last argument being 'void_type_mode' if there
1633      are no variable arguments.  If there are variable arguments, then
1634      we won't pass anything in registers */
1635 
1636   if (cum->nregs)
1637     {
1638       for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1639 	   param != 0; param = next_param)
1640 	{
1641 	  next_param = TREE_CHAIN (param);
1642 	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1643 	    {
1644 	      if (!TARGET_64BIT)
1645 		cum->nregs = 0;
1646 	      cum->maybe_vaarg = true;
1647 	    }
1648 	}
1649     }
1650   if ((!fntype && !libname)
1651       || (fntype && !TYPE_ARG_TYPES (fntype)))
1652     cum->maybe_vaarg = 1;
1653 
1654   if (TARGET_DEBUG_ARG)
1655     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1656 
1657   return;
1658 }
1659 
1660 /* x86-64 register passing impleemntation.  See x86-64 ABI for details.  Goal
1661    of this code is to classify each 8bytes of incoming argument by the register
1662    class and assign registers accordingly.  */
1663 
1664 /* Return the union class of CLASS1 and CLASS2.
1665    See the x86-64 PS ABI for details.  */
1666 
1667 static enum x86_64_reg_class
merge_classes(class1,class2)1668 merge_classes (class1, class2)
1669      enum x86_64_reg_class class1, class2;
1670 {
1671   /* Rule #1: If both classes are equal, this is the resulting class.  */
1672   if (class1 == class2)
1673     return class1;
1674 
1675   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1676      the other class.  */
1677   if (class1 == X86_64_NO_CLASS)
1678     return class2;
1679   if (class2 == X86_64_NO_CLASS)
1680     return class1;
1681 
1682   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1683   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1684     return X86_64_MEMORY_CLASS;
1685 
1686   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1687   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1688       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1689     return X86_64_INTEGERSI_CLASS;
1690   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1691       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1692     return X86_64_INTEGER_CLASS;
1693 
1694   /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1695   if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1696       || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1697     return X86_64_MEMORY_CLASS;
1698 
1699   /* Rule #6: Otherwise class SSE is used.  */
1700   return X86_64_SSE_CLASS;
1701 }
1702 
1703 /* Classify the argument of type TYPE and mode MODE.
1704    CLASSES will be filled by the register class used to pass each word
1705    of the operand.  The number of words is returned.  In case the parameter
1706    should be passed in memory, 0 is returned. As a special case for zero
1707    sized containers, classes[0] will be NO_CLASS and 1 is returned.
1708 
1709    BIT_OFFSET is used internally for handling records and specifies offset
1710    of the offset in bits modulo 256 to avoid overflow cases.
1711 
1712    See the x86-64 PS ABI for details.
1713 */
1714 
1715 static int
classify_argument(mode,type,classes,bit_offset)1716 classify_argument (mode, type, classes, bit_offset)
1717      enum machine_mode mode;
1718      tree type;
1719      enum x86_64_reg_class classes[MAX_CLASSES];
1720      int bit_offset;
1721 {
1722   int bytes =
1723     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1724   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1725 
1726   /* Variable sized entities are always passed/returned in memory.  */
1727   if (bytes < 0)
1728     return 0;
1729 
1730   if (mode != VOIDmode
1731       && MUST_PASS_IN_STACK (mode, type))
1732     return 0;
1733 
1734   if (type && AGGREGATE_TYPE_P (type))
1735     {
1736       int i;
1737       tree field;
1738       enum x86_64_reg_class subclasses[MAX_CLASSES];
1739 
1740       /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
1741       if (bytes > 16)
1742 	return 0;
1743 
1744       for (i = 0; i < words; i++)
1745 	classes[i] = X86_64_NO_CLASS;
1746 
1747       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
1748 	 signalize memory class, so handle it as special case.  */
1749       if (!words)
1750 	{
1751 	  classes[0] = X86_64_NO_CLASS;
1752 	  return 1;
1753 	}
1754 
1755       /* Classify each field of record and merge classes.  */
1756       if (TREE_CODE (type) == RECORD_TYPE)
1757 	{
1758 	  /* For classes first merge in the field of the subclasses.  */
1759 	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1760 	    {
1761 	      tree bases = TYPE_BINFO_BASETYPES (type);
1762 	      int n_bases = TREE_VEC_LENGTH (bases);
1763 	      int basenum;
1764 
1765 	      for (basenum = 0; basenum < n_bases; ++basenum)
1766 		{
1767 		   tree binfo = TREE_VEC_ELT (bases, basenum);
1768 		   int num;
1769 		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1770 		   tree type = BINFO_TYPE (binfo);
1771 
1772 		   num = classify_argument (TYPE_MODE (type),
1773 					    type, subclasses,
1774 					    (offset + bit_offset) % 256);
1775 		   if (!num)
1776 		     return 0;
1777 		   for (i = 0; i < num; i++)
1778 		     {
1779 		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
1780 		       classes[i + pos] =
1781 			 merge_classes (subclasses[i], classes[i + pos]);
1782 		     }
1783 		}
1784 	    }
1785 	  /* And now merge the fields of structure.   */
1786 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1787 	    {
1788 	      if (TREE_CODE (field) == FIELD_DECL)
1789 		{
1790 		  int num;
1791 
1792 		  /* Bitfields are always classified as integer.  Handle them
1793 		     early, since later code would consider them to be
1794 		     misaligned integers.  */
1795 		  if (DECL_BIT_FIELD (field))
1796 		    {
1797 		      for (i = int_bit_position (field) / 8 / 8;
1798 			   i < (int_bit_position (field)
1799 			        + tree_low_cst (DECL_SIZE (field), 0)
1800 			       	+ 63) / 8 / 8; i++)
1801 			classes[i] =
1802 			  merge_classes (X86_64_INTEGER_CLASS,
1803 					 classes[i]);
1804 		    }
1805 		  else
1806 		    {
1807 		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1808 					       TREE_TYPE (field), subclasses,
1809 					       (int_bit_position (field)
1810 						+ bit_offset) % 256);
1811 		      if (!num)
1812 			return 0;
1813 		      for (i = 0; i < num; i++)
1814 			{
1815 			  int pos =
1816 			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1817 			  classes[i + pos] =
1818 			    merge_classes (subclasses[i], classes[i + pos]);
1819 			}
1820 		    }
1821 		}
1822 	    }
1823 	}
1824       /* Arrays are handled as small records.  */
1825       else if (TREE_CODE (type) == ARRAY_TYPE)
1826 	{
1827 	  int num;
1828 	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1829 				   TREE_TYPE (type), subclasses, bit_offset);
1830 	  if (!num)
1831 	    return 0;
1832 
1833 	  /* The partial classes are now full classes.  */
1834 	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1835 	    subclasses[0] = X86_64_SSE_CLASS;
1836 	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1837 	    subclasses[0] = X86_64_INTEGER_CLASS;
1838 
1839 	  for (i = 0; i < words; i++)
1840 	    classes[i] = subclasses[i % num];
1841 	}
1842       /* Unions are similar to RECORD_TYPE but offset is always 0.  */
1843       else if (TREE_CODE (type) == UNION_TYPE
1844 	       || TREE_CODE (type) == QUAL_UNION_TYPE)
1845 	{
1846 	  /* For classes first merge in the field of the subclasses.  */
1847 	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1848 	    {
1849 	      tree bases = TYPE_BINFO_BASETYPES (type);
1850 	      int n_bases = TREE_VEC_LENGTH (bases);
1851 	      int basenum;
1852 
1853 	      for (basenum = 0; basenum < n_bases; ++basenum)
1854 		{
1855 		   tree binfo = TREE_VEC_ELT (bases, basenum);
1856 		   int num;
1857 		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1858 		   tree type = BINFO_TYPE (binfo);
1859 
1860 		   num = classify_argument (TYPE_MODE (type),
1861 					    type, subclasses,
1862 					    (offset + (bit_offset % 64)) % 256);
1863 		   if (!num)
1864 		     return 0;
1865 		   for (i = 0; i < num; i++)
1866 		     {
1867 		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
1868 		       classes[i + pos] =
1869 			 merge_classes (subclasses[i], classes[i + pos]);
1870 		     }
1871 		}
1872 	    }
1873 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1874 	    {
1875 	      if (TREE_CODE (field) == FIELD_DECL)
1876 		{
1877 		  int num;
1878 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1879 					   TREE_TYPE (field), subclasses,
1880 					   bit_offset);
1881 		  if (!num)
1882 		    return 0;
1883 		  for (i = 0; i < num; i++)
1884 		    classes[i] = merge_classes (subclasses[i], classes[i]);
1885 		}
1886 	    }
1887 	}
1888       else
1889 	abort ();
1890 
1891       /* Final merger cleanup.  */
1892       for (i = 0; i < words; i++)
1893 	{
1894 	  /* If one class is MEMORY, everything should be passed in
1895 	     memory.  */
1896 	  if (classes[i] == X86_64_MEMORY_CLASS)
1897 	    return 0;
1898 
1899 	  /* The X86_64_SSEUP_CLASS should be always preceded by
1900 	     X86_64_SSE_CLASS.  */
1901 	  if (classes[i] == X86_64_SSEUP_CLASS
1902 	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1903 	    classes[i] = X86_64_SSE_CLASS;
1904 
1905 	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
1906 	  if (classes[i] == X86_64_X87UP_CLASS
1907 	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1908 	    classes[i] = X86_64_SSE_CLASS;
1909 	}
1910       return words;
1911     }
1912 
1913   /* Compute alignment needed.  We align all types to natural boundaries with
1914      exception of XFmode that is aligned to 64bits.  */
1915   if (mode != VOIDmode && mode != BLKmode)
1916     {
1917       int mode_alignment = GET_MODE_BITSIZE (mode);
1918 
1919       if (mode == XFmode)
1920 	mode_alignment = 128;
1921       else if (mode == XCmode)
1922 	mode_alignment = 256;
1923       if (COMPLEX_MODE_P (mode))
1924 	mode_alignment /= 2;
1925       /* Misaligned fields are always returned in memory.  */
1926       if (bit_offset % mode_alignment)
1927 	return 0;
1928     }
1929 
1930   /* Classification of atomic types.  */
1931   switch (mode)
1932     {
1933     case DImode:
1934     case SImode:
1935     case HImode:
1936     case QImode:
1937     case CSImode:
1938     case CHImode:
1939     case CQImode:
1940       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1941 	classes[0] = X86_64_INTEGERSI_CLASS;
1942       else
1943 	classes[0] = X86_64_INTEGER_CLASS;
1944       return 1;
1945     case CDImode:
1946     case TImode:
1947       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1948       return 2;
1949     case CTImode:
1950       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1951       classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1952       return 4;
1953     case SFmode:
1954       if (!(bit_offset % 64))
1955 	classes[0] = X86_64_SSESF_CLASS;
1956       else
1957 	classes[0] = X86_64_SSE_CLASS;
1958       return 1;
1959     case DFmode:
1960       classes[0] = X86_64_SSEDF_CLASS;
1961       return 1;
1962     case TFmode:
1963       classes[0] = X86_64_X87_CLASS;
1964       classes[1] = X86_64_X87UP_CLASS;
1965       return 2;
1966     case TCmode:
1967       classes[0] = X86_64_X87_CLASS;
1968       classes[1] = X86_64_X87UP_CLASS;
1969       classes[2] = X86_64_X87_CLASS;
1970       classes[3] = X86_64_X87UP_CLASS;
1971       return 4;
1972     case DCmode:
1973       classes[0] = X86_64_SSEDF_CLASS;
1974       classes[1] = X86_64_SSEDF_CLASS;
1975       return 2;
1976     case SCmode:
1977       classes[0] = X86_64_SSE_CLASS;
1978       return 1;
1979     case V4SFmode:
1980     case V4SImode:
1981     case V16QImode:
1982     case V8HImode:
1983     case V2DFmode:
1984     case V2DImode:
1985       classes[0] = X86_64_SSE_CLASS;
1986       classes[1] = X86_64_SSEUP_CLASS;
1987       return 2;
1988     case V2SFmode:
1989     case V2SImode:
1990     case V4HImode:
1991     case V8QImode:
1992       return 0;
1993     case BLKmode:
1994     case VOIDmode:
1995       return 0;
1996     default:
1997       abort ();
1998     }
1999 }
2000 
2001 /* Examine the argument and return set number of register required in each
2002    class.  Return 0 iff parameter should be passed in memory.  */
2003 static int
examine_argument(mode,type,in_return,int_nregs,sse_nregs)2004 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2005      enum machine_mode mode;
2006      tree type;
2007      int *int_nregs, *sse_nregs;
2008      int in_return;
2009 {
2010   enum x86_64_reg_class class[MAX_CLASSES];
2011   int n = classify_argument (mode, type, class, 0);
2012 
2013   *int_nregs = 0;
2014   *sse_nregs = 0;
2015   if (!n)
2016     return 0;
2017   for (n--; n >= 0; n--)
2018     switch (class[n])
2019       {
2020       case X86_64_INTEGER_CLASS:
2021       case X86_64_INTEGERSI_CLASS:
2022 	(*int_nregs)++;
2023 	break;
2024       case X86_64_SSE_CLASS:
2025       case X86_64_SSESF_CLASS:
2026       case X86_64_SSEDF_CLASS:
2027 	(*sse_nregs)++;
2028 	break;
2029       case X86_64_NO_CLASS:
2030       case X86_64_SSEUP_CLASS:
2031 	break;
2032       case X86_64_X87_CLASS:
2033       case X86_64_X87UP_CLASS:
2034 	if (!in_return)
2035 	  return 0;
2036 	break;
2037       case X86_64_MEMORY_CLASS:
2038 	abort ();
2039       }
2040   return 1;
2041 }
2042 /* Construct container for the argument used by GCC interface.  See
2043    FUNCTION_ARG for the detailed description.  */
2044 static rtx
construct_container(mode,type,in_return,nintregs,nsseregs,intreg,sse_regno)2045 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2046      enum machine_mode mode;
2047      tree type;
2048      int in_return;
2049      int nintregs, nsseregs;
2050      const int * intreg;
2051      int sse_regno;
2052 {
2053   enum machine_mode tmpmode;
2054   int bytes =
2055     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2056   enum x86_64_reg_class class[MAX_CLASSES];
2057   int n;
2058   int i;
2059   int nexps = 0;
2060   int needed_sseregs, needed_intregs;
2061   rtx exp[MAX_CLASSES];
2062   rtx ret;
2063 
2064   n = classify_argument (mode, type, class, 0);
2065   if (TARGET_DEBUG_ARG)
2066     {
2067       if (!n)
2068 	fprintf (stderr, "Memory class\n");
2069       else
2070 	{
2071 	  fprintf (stderr, "Classes:");
2072 	  for (i = 0; i < n; i++)
2073 	    {
2074 	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2075 	    }
2076 	   fprintf (stderr, "\n");
2077 	}
2078     }
2079   if (!n)
2080     return NULL;
2081   if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2082     return NULL;
2083   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2084     return NULL;
2085 
2086   /* First construct simple cases.  Avoid SCmode, since we want to use
2087      single register to pass this type.  */
2088   if (n == 1 && mode != SCmode)
2089     switch (class[0])
2090       {
2091       case X86_64_INTEGER_CLASS:
2092       case X86_64_INTEGERSI_CLASS:
2093 	return gen_rtx_REG (mode, intreg[0]);
2094       case X86_64_SSE_CLASS:
2095       case X86_64_SSESF_CLASS:
2096       case X86_64_SSEDF_CLASS:
2097 	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2098       case X86_64_X87_CLASS:
2099 	return gen_rtx_REG (mode, FIRST_STACK_REG);
2100       case X86_64_NO_CLASS:
2101 	/* Zero sized array, struct or class.  */
2102 	return NULL;
2103       default:
2104 	abort ();
2105       }
2106   if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2107       && mode != BLKmode)
2108     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2109   if (n == 2
2110       && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2111     return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2112   if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2113       && class[1] == X86_64_INTEGER_CLASS
2114       && (mode == CDImode || mode == TImode)
2115       && intreg[0] + 1 == intreg[1])
2116     return gen_rtx_REG (mode, intreg[0]);
2117   if (n == 4
2118       && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2119       && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2120       && mode != BLKmode)
2121     return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2122 
2123   /* Otherwise figure out the entries of the PARALLEL.  */
2124   for (i = 0; i < n; i++)
2125     {
2126       switch (class[i])
2127         {
2128 	  case X86_64_NO_CLASS:
2129 	    break;
2130 	  case X86_64_INTEGER_CLASS:
2131 	  case X86_64_INTEGERSI_CLASS:
2132 	    /* Merge TImodes on aligned occassions here too.  */
2133 	    if (i * 8 + 8 > bytes)
2134 	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2135 	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2136 	      tmpmode = SImode;
2137 	    else
2138 	      tmpmode = DImode;
2139 	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2140 	    if (tmpmode == BLKmode)
2141 	      tmpmode = DImode;
2142 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2143 					       gen_rtx_REG (tmpmode, *intreg),
2144 					       GEN_INT (i*8));
2145 	    intreg++;
2146 	    break;
2147 	  case X86_64_SSESF_CLASS:
2148 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2149 					       gen_rtx_REG (SFmode,
2150 							    SSE_REGNO (sse_regno)),
2151 					       GEN_INT (i*8));
2152 	    sse_regno++;
2153 	    break;
2154 	  case X86_64_SSEDF_CLASS:
2155 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2156 					       gen_rtx_REG (DFmode,
2157 							    SSE_REGNO (sse_regno)),
2158 					       GEN_INT (i*8));
2159 	    sse_regno++;
2160 	    break;
2161 	  case X86_64_SSE_CLASS:
2162 	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2163 	      tmpmode = TImode;
2164 	    else
2165 	      tmpmode = DImode;
2166 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2167 					       gen_rtx_REG (tmpmode,
2168 							    SSE_REGNO (sse_regno)),
2169 					       GEN_INT (i*8));
2170 	    if (tmpmode == TImode)
2171 	      i++;
2172 	    sse_regno++;
2173 	    break;
2174 	  default:
2175 	    abort ();
2176 	}
2177     }
2178   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2179   for (i = 0; i < nexps; i++)
2180     XVECEXP (ret, 0, i) = exp [i];
2181   return ret;
2182 }
2183 
2184 /* Update the data in CUM to advance over an argument
2185    of mode MODE and data type TYPE.
2186    (TYPE is null for libcalls where that information may not be available.)  */
2187 
2188 void
function_arg_advance(cum,mode,type,named)2189 function_arg_advance (cum, mode, type, named)
2190      CUMULATIVE_ARGS *cum;	/* current arg information */
2191      enum machine_mode mode;	/* current arg mode */
2192      tree type;			/* type of the argument or 0 if lib support */
2193      int named;			/* whether or not the argument was named */
2194 {
2195   int bytes =
2196     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2197   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2198 
2199   if (TARGET_DEBUG_ARG)
2200     fprintf (stderr,
2201 	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2202 	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2203   if (TARGET_64BIT)
2204     {
2205       int int_nregs, sse_nregs;
2206       if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2207 	cum->words += words;
2208       else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2209 	{
2210 	  cum->nregs -= int_nregs;
2211 	  cum->sse_nregs -= sse_nregs;
2212 	  cum->regno += int_nregs;
2213 	  cum->sse_regno += sse_nregs;
2214 	}
2215       else
2216 	cum->words += words;
2217     }
2218   else
2219     {
2220       if (TARGET_SSE && mode == TImode)
2221 	{
2222 	  cum->sse_words += words;
2223 	  cum->sse_nregs -= 1;
2224 	  cum->sse_regno += 1;
2225 	  if (cum->sse_nregs <= 0)
2226 	    {
2227 	      cum->sse_nregs = 0;
2228 	      cum->sse_regno = 0;
2229 	    }
2230 	}
2231       else
2232 	{
2233 	  cum->words += words;
2234 	  cum->nregs -= words;
2235 	  cum->regno += words;
2236 
2237 	  if (cum->nregs <= 0)
2238 	    {
2239 	      cum->nregs = 0;
2240 	      cum->regno = 0;
2241 	    }
2242 	}
2243     }
2244   return;
2245 }
2246 
2247 /* Define where to put the arguments to a function.
2248    Value is zero to push the argument on the stack,
2249    or a hard register in which to store the argument.
2250 
2251    MODE is the argument's machine mode.
2252    TYPE is the data type of the argument (as a tree).
2253     This is null for libcalls where that information may
2254     not be available.
2255    CUM is a variable of type CUMULATIVE_ARGS which gives info about
2256     the preceding args and about the function being called.
2257    NAMED is nonzero if this argument is a named parameter
2258     (otherwise it is an extra parameter matching an ellipsis).  */
2259 
2260 rtx
function_arg(cum,mode,type,named)2261 function_arg (cum, mode, type, named)
2262      CUMULATIVE_ARGS *cum;	/* current arg information */
2263      enum machine_mode mode;	/* current arg mode */
2264      tree type;			/* type of the argument or 0 if lib support */
2265      int named;			/* != 0 for normal args, == 0 for ... args */
2266 {
2267   rtx ret   = NULL_RTX;
2268   int bytes =
2269     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2270   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2271 
2272   /* Handle an hidden AL argument containing number of registers for varargs
2273      x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2274      any AL settings.  */
2275   if (mode == VOIDmode)
2276     {
2277       if (TARGET_64BIT)
2278 	return GEN_INT (cum->maybe_vaarg
2279 			? (cum->sse_nregs < 0
2280 			   ? SSE_REGPARM_MAX
2281 			   : cum->sse_regno)
2282 			: -1);
2283       else
2284 	return constm1_rtx;
2285     }
2286   if (TARGET_64BIT)
2287     ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2288 			       &x86_64_int_parameter_registers [cum->regno],
2289 			       cum->sse_regno);
2290   else
2291     switch (mode)
2292       {
2293 	/* For now, pass fp/complex values on the stack.  */
2294       default:
2295 	break;
2296 
2297       case BLKmode:
2298 	if (bytes < 0)
2299 	  break;
2300 	/* FALLTHRU */
2301       case DImode:
2302       case SImode:
2303       case HImode:
2304       case QImode:
2305 	if (words <= cum->nregs)
2306 	  ret = gen_rtx_REG (mode, cum->regno);
2307 	break;
2308       case TImode:
2309 	if (cum->sse_nregs)
2310 	  ret = gen_rtx_REG (mode, cum->sse_regno);
2311 	break;
2312       }
2313 
2314   if (TARGET_DEBUG_ARG)
2315     {
2316       fprintf (stderr,
2317 	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2318 	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2319 
2320       if (ret)
2321 	print_simple_rtl (stderr, ret);
2322       else
2323 	fprintf (stderr, ", stack");
2324 
2325       fprintf (stderr, " )\n");
2326     }
2327 
2328   return ret;
2329 }
2330 
2331 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2332    ABI  */
2333 static bool
contains_128bit_aligned_vector_p(type)2334 contains_128bit_aligned_vector_p (type)
2335      tree type;
2336 {
2337   enum machine_mode mode = TYPE_MODE (type);
2338   if (SSE_REG_MODE_P (mode)
2339       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2340     return true;
2341   if (TYPE_ALIGN (type) < 128)
2342     return false;
2343 
2344   if (AGGREGATE_TYPE_P (type))
2345     {
2346       /* Walk the agregates recursivly.  */
2347       if (TREE_CODE (type) == RECORD_TYPE
2348 	  || TREE_CODE (type) == UNION_TYPE
2349 	  || TREE_CODE (type) == QUAL_UNION_TYPE)
2350 	{
2351 	  tree field;
2352 
2353 	  if (TYPE_BINFO (type) != NULL
2354 	      && TYPE_BINFO_BASETYPES (type) != NULL)
2355 	    {
2356 	      tree bases = TYPE_BINFO_BASETYPES (type);
2357 	      int n_bases = TREE_VEC_LENGTH (bases);
2358 	      int i;
2359 
2360 	      for (i = 0; i < n_bases; ++i)
2361 		{
2362 		  tree binfo = TREE_VEC_ELT (bases, i);
2363 		  tree type = BINFO_TYPE (binfo);
2364 
2365 		  if (contains_128bit_aligned_vector_p (type))
2366 		    return true;
2367 		}
2368 	    }
2369 	  /* And now merge the fields of structure.   */
2370 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2371 	    {
2372 	      if (TREE_CODE (field) == FIELD_DECL
2373 		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2374 		return true;
2375 	    }
2376 	}
2377       /* Just for use if some languages passes arrays by value.  */
2378       else if (TREE_CODE (type) == ARRAY_TYPE)
2379 	{
2380 	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2381 	    return true;
2382 	}
2383       else
2384 	abort ();
2385     }
2386   return false;
2387 }
2388 
2389 /* A C expression that indicates when an argument must be passed by
2390    reference.  If nonzero for an argument, a copy of that argument is
2391    made in memory and a pointer to the argument is passed instead of
2392    the argument itself.  The pointer is passed in whatever way is
2393    appropriate for passing a pointer to that type.  */
2394 
2395 int
function_arg_pass_by_reference(cum,mode,type,named)2396 function_arg_pass_by_reference (cum, mode, type, named)
2397      CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2398      enum machine_mode mode ATTRIBUTE_UNUSED;
2399      tree type;
2400      int named ATTRIBUTE_UNUSED;
2401 {
2402   if (!TARGET_64BIT)
2403     return 0;
2404 
2405   if (type && int_size_in_bytes (type) == -1)
2406     {
2407       if (TARGET_DEBUG_ARG)
2408 	fprintf (stderr, "function_arg_pass_by_reference\n");
2409       return 1;
2410     }
2411 
2412   return 0;
2413 }
2414 
2415 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2416    and type.   */
2417 
2418 int
ix86_function_arg_boundary(mode,type)2419 ix86_function_arg_boundary (mode, type)
2420      enum machine_mode mode;
2421      tree type;
2422 {
2423   int align;
2424   if (type)
2425     align = TYPE_ALIGN (type);
2426   else
2427     align = GET_MODE_ALIGNMENT (mode);
2428   if (align < PARM_BOUNDARY)
2429     align = PARM_BOUNDARY;
2430   if (!TARGET_64BIT)
2431     {
2432       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
2433 	 make an exception for SSE modes since these require 128bit
2434 	 alignment.
2435 
2436 	 The handling here differs from field_alignment.  ICC aligns MMX
2437 	 arguments to 4 byte boundaries, while structure fields are aligned
2438 	 to 8 byte boundaries.  */
2439       if (!type)
2440 	{
2441 	  if (!SSE_REG_MODE_P (mode))
2442 	    align = PARM_BOUNDARY;
2443 	}
2444       else
2445 	{
2446 	  if (!contains_128bit_aligned_vector_p (type))
2447 	    align = PARM_BOUNDARY;
2448 	}
2449       if (align != PARM_BOUNDARY && !TARGET_SSE)
2450 	abort();
2451     }
2452   if (align > 128)
2453     align = 128;
2454   return align;
2455 }
2456 
2457 /* Return true if N is a possible register number of function value.  */
2458 bool
ix86_function_value_regno_p(regno)2459 ix86_function_value_regno_p (regno)
2460      int regno;
2461 {
2462   if (!TARGET_64BIT)
2463     {
2464       return ((regno) == 0
2465 	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2466 	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2467     }
2468   return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2469 	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2470 	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2471 }
2472 
2473 /* Define how to find the value returned by a function.
2474    VALTYPE is the data type of the value (as a tree).
2475    If the precise function being called is known, FUNC is its FUNCTION_DECL;
2476    otherwise, FUNC is 0.  */
2477 rtx
ix86_function_value(valtype)2478 ix86_function_value (valtype)
2479      tree valtype;
2480 {
2481   if (TARGET_64BIT)
2482     {
2483       rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2484 				     REGPARM_MAX, SSE_REGPARM_MAX,
2485 				     x86_64_int_return_registers, 0);
2486       /* For zero sized structures, construct_continer return NULL, but we need
2487          to keep rest of compiler happy by returning meaningfull value.  */
2488       if (!ret)
2489 	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2490       return ret;
2491     }
2492   else
2493     return gen_rtx_REG (TYPE_MODE (valtype),
2494 			ix86_value_regno (TYPE_MODE (valtype)));
2495 }
2496 
2497 /* Return false iff type is returned in memory.  */
2498 int
ix86_return_in_memory(type)2499 ix86_return_in_memory (type)
2500      tree type;
2501 {
2502   int needed_intregs, needed_sseregs, size;
2503   enum machine_mode mode = TYPE_MODE (type);
2504 
2505   if (TARGET_64BIT)
2506     return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2507 
2508   if (mode == BLKmode)
2509     return 1;
2510 
2511   size = int_size_in_bytes (type);
2512 
2513   if (VECTOR_MODE_P (mode) || mode == TImode)
2514     {
2515       /* User-created vectors small enough to fit in EAX.  */
2516       if (size < 8)
2517 	return 0;
2518 
2519       /* MMX/3dNow values are returned on the stack, since we've
2520 	 got to EMMS/FEMMS before returning.  */
2521       if (size == 8)
2522 	return 1;
2523 
2524       /* SSE values are returned in XMM0.  */
2525       /* ??? Except when it doesn't exist?  We have a choice of
2526 	 either (1) being abi incompatible with a -march switch,
2527 	 or (2) generating an error here.  Given no good solution,
2528 	 I think the safest thing is one warning.  The user won't
2529 	 be able to use -Werror, but...  */
2530       if (size == 16)
2531 	{
2532 	  static bool warned;
2533 
2534 	  if (TARGET_SSE)
2535 	    return 0;
2536 
2537 	  if (!warned)
2538 	    {
2539 	      warned = true;
2540 	      warning ("SSE vector return without SSE enabled changes the ABI");
2541 	    }
2542 	  return 1;
2543 	}
2544     }
2545 
2546   if (mode == TFmode)
2547     return 0;
2548   if (size > 12)
2549     return 1;
2550   return 0;
2551 }
2552 
2553 /* Define how to find the value returned by a library function
2554    assuming the value has mode MODE.  */
2555 rtx
ix86_libcall_value(mode)2556 ix86_libcall_value (mode)
2557    enum machine_mode mode;
2558 {
2559   if (TARGET_64BIT)
2560     {
2561       switch (mode)
2562 	{
2563 	  case SFmode:
2564 	  case SCmode:
2565 	  case DFmode:
2566 	  case DCmode:
2567 	    return gen_rtx_REG (mode, FIRST_SSE_REG);
2568 	  case TFmode:
2569 	  case TCmode:
2570 	    return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2571 	  default:
2572 	    return gen_rtx_REG (mode, 0);
2573 	}
2574     }
2575   else
2576    return gen_rtx_REG (mode, ix86_value_regno (mode));
2577 }
2578 
2579 /* Given a mode, return the register to use for a return value.  */
2580 
2581 static int
ix86_value_regno(mode)2582 ix86_value_regno (mode)
2583      enum machine_mode mode;
2584 {
2585   /* Floating point return values in %st(0).  */
2586   if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2587     return FIRST_FLOAT_REG;
2588   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
2589      we prevent this case when sse is not available.  */
2590   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2591     return FIRST_SSE_REG;
2592   /* Everything else in %eax.  */
2593   return 0;
2594 }
2595 
2596 /* Create the va_list data type.  */
2597 
2598 tree
ix86_build_va_list()2599 ix86_build_va_list ()
2600 {
2601   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2602 
2603   /* For i386 we use plain pointer to argument area.  */
2604   if (!TARGET_64BIT)
2605     return build_pointer_type (char_type_node);
2606 
2607   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2608   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2609 
2610   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2611 		      unsigned_type_node);
2612   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2613 		      unsigned_type_node);
2614   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2615 		      ptr_type_node);
2616   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2617 		      ptr_type_node);
2618 
2619   DECL_FIELD_CONTEXT (f_gpr) = record;
2620   DECL_FIELD_CONTEXT (f_fpr) = record;
2621   DECL_FIELD_CONTEXT (f_ovf) = record;
2622   DECL_FIELD_CONTEXT (f_sav) = record;
2623 
2624   TREE_CHAIN (record) = type_decl;
2625   TYPE_NAME (record) = type_decl;
2626   TYPE_FIELDS (record) = f_gpr;
2627   TREE_CHAIN (f_gpr) = f_fpr;
2628   TREE_CHAIN (f_fpr) = f_ovf;
2629   TREE_CHAIN (f_ovf) = f_sav;
2630 
2631   layout_type (record);
2632 
2633   /* The correct type is an array type of one element.  */
2634   return build_array_type (record, build_index_type (size_zero_node));
2635 }
2636 
2637 /* Perform any needed actions needed for a function that is receiving a
2638    variable number of arguments.
2639 
2640    CUM is as above.
2641 
2642    MODE and TYPE are the mode and type of the current parameter.
2643 
2644    PRETEND_SIZE is a variable that should be set to the amount of stack
2645    that must be pushed by the prolog to pretend that our caller pushed
2646    it.
2647 
2648    Normally, this macro will push all remaining incoming registers on the
2649    stack and set PRETEND_SIZE to the length of the registers pushed.  */
2650 
2651 void
ix86_setup_incoming_varargs(cum,mode,type,pretend_size,no_rtl)2652 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2653      CUMULATIVE_ARGS *cum;
2654      enum machine_mode mode;
2655      tree type;
2656      int *pretend_size ATTRIBUTE_UNUSED;
2657      int no_rtl;
2658 
2659 {
2660   CUMULATIVE_ARGS next_cum;
2661   rtx save_area = NULL_RTX, mem;
2662   rtx label;
2663   rtx label_ref;
2664   rtx tmp_reg;
2665   rtx nsse_reg;
2666   int set;
2667   tree fntype;
2668   int stdarg_p;
2669   int i;
2670 
2671   if (!TARGET_64BIT)
2672     return;
2673 
2674   /* Indicate to allocate space on the stack for varargs save area.  */
2675   ix86_save_varrargs_registers = 1;
2676 
2677   cfun->stack_alignment_needed = 128;
2678 
2679   fntype = TREE_TYPE (current_function_decl);
2680   stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2681 	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2682 		  != void_type_node));
2683 
2684   /* For varargs, we do not want to skip the dummy va_dcl argument.
2685      For stdargs, we do want to skip the last named argument.  */
2686   next_cum = *cum;
2687   if (stdarg_p)
2688     function_arg_advance (&next_cum, mode, type, 1);
2689 
2690   if (!no_rtl)
2691     save_area = frame_pointer_rtx;
2692 
2693   set = get_varargs_alias_set ();
2694 
2695   for (i = next_cum.regno; i < ix86_regparm; i++)
2696     {
2697       mem = gen_rtx_MEM (Pmode,
2698 			 plus_constant (save_area, i * UNITS_PER_WORD));
2699       set_mem_alias_set (mem, set);
2700       emit_move_insn (mem, gen_rtx_REG (Pmode,
2701 					x86_64_int_parameter_registers[i]));
2702     }
2703 
2704   if (next_cum.sse_nregs)
2705     {
2706       /* Now emit code to save SSE registers.  The AX parameter contains number
2707 	 of SSE parameter regsiters used to call this function.  We use
2708 	 sse_prologue_save insn template that produces computed jump across
2709 	 SSE saves.  We need some preparation work to get this working.  */
2710 
2711       label = gen_label_rtx ();
2712       label_ref = gen_rtx_LABEL_REF (Pmode, label);
2713 
2714       /* Compute address to jump to :
2715          label - 5*eax + nnamed_sse_arguments*5  */
2716       tmp_reg = gen_reg_rtx (Pmode);
2717       nsse_reg = gen_reg_rtx (Pmode);
2718       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2719       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2720 			      gen_rtx_MULT (Pmode, nsse_reg,
2721 					    GEN_INT (4))));
2722       if (next_cum.sse_regno)
2723 	emit_move_insn
2724 	  (nsse_reg,
2725 	   gen_rtx_CONST (DImode,
2726 			  gen_rtx_PLUS (DImode,
2727 					label_ref,
2728 					GEN_INT (next_cum.sse_regno * 4))));
2729       else
2730 	emit_move_insn (nsse_reg, label_ref);
2731       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2732 
2733       /* Compute address of memory block we save into.  We always use pointer
2734 	 pointing 127 bytes after first byte to store - this is needed to keep
2735 	 instruction size limited by 4 bytes.  */
2736       tmp_reg = gen_reg_rtx (Pmode);
2737       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2738 			      plus_constant (save_area,
2739 					     8 * REGPARM_MAX + 127)));
2740       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2741       set_mem_alias_set (mem, set);
2742       set_mem_align (mem, BITS_PER_WORD);
2743 
2744       /* And finally do the dirty job!  */
2745       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2746 					GEN_INT (next_cum.sse_regno), label));
2747     }
2748 
2749 }
2750 
2751 /* Implement va_start.  */
2752 
2753 void
ix86_va_start(valist,nextarg)2754 ix86_va_start (valist, nextarg)
2755      tree valist;
2756      rtx nextarg;
2757 {
2758   HOST_WIDE_INT words, n_gpr, n_fpr;
2759   tree f_gpr, f_fpr, f_ovf, f_sav;
2760   tree gpr, fpr, ovf, sav, t;
2761 
2762   /* Only 64bit target needs something special.  */
2763   if (!TARGET_64BIT)
2764     {
2765       std_expand_builtin_va_start (valist, nextarg);
2766       return;
2767     }
2768 
2769   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2770   f_fpr = TREE_CHAIN (f_gpr);
2771   f_ovf = TREE_CHAIN (f_fpr);
2772   f_sav = TREE_CHAIN (f_ovf);
2773 
2774   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2775   gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2776   fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2777   ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2778   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2779 
2780   /* Count number of gp and fp argument registers used.  */
2781   words = current_function_args_info.words;
2782   n_gpr = current_function_args_info.regno;
2783   n_fpr = current_function_args_info.sse_regno;
2784 
2785   if (TARGET_DEBUG_ARG)
2786     fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2787 	     (int) words, (int) n_gpr, (int) n_fpr);
2788 
2789   t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2790 	     build_int_2 (n_gpr * 8, 0));
2791   TREE_SIDE_EFFECTS (t) = 1;
2792   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2793 
2794   t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2795 	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2796   TREE_SIDE_EFFECTS (t) = 1;
2797   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2798 
2799   /* Find the overflow area.  */
2800   t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2801   if (words != 0)
2802     t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2803 	       build_int_2 (words * UNITS_PER_WORD, 0));
2804   t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2805   TREE_SIDE_EFFECTS (t) = 1;
2806   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2807 
2808   /* Find the register save area.
2809      Prologue of the function save it right above stack frame.  */
2810   t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2811   t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2812   TREE_SIDE_EFFECTS (t) = 1;
2813   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2814 }
2815 
2816 /* Implement va_arg.  */
2817 rtx
ix86_va_arg(valist,type)2818 ix86_va_arg (valist, type)
2819      tree valist, type;
2820 {
2821   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2822   tree f_gpr, f_fpr, f_ovf, f_sav;
2823   tree gpr, fpr, ovf, sav, t;
2824   int size, rsize;
2825   rtx lab_false, lab_over = NULL_RTX;
2826   rtx addr_rtx, r;
2827   rtx container;
2828   int indirect_p = 0;
2829 
2830   /* Only 64bit target needs something special.  */
2831   if (!TARGET_64BIT)
2832     {
2833       return std_expand_builtin_va_arg (valist, type);
2834     }
2835 
2836   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2837   f_fpr = TREE_CHAIN (f_gpr);
2838   f_ovf = TREE_CHAIN (f_fpr);
2839   f_sav = TREE_CHAIN (f_ovf);
2840 
2841   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2842   gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2843   fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2844   ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2845   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2846 
2847   size = int_size_in_bytes (type);
2848   if (size == -1)
2849     {
2850       /* Passed by reference.  */
2851       indirect_p = 1;
2852       type = build_pointer_type (type);
2853       size = int_size_in_bytes (type);
2854     }
2855   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2856 
2857   container = construct_container (TYPE_MODE (type), type, 0,
2858 				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2859   /*
2860    * Pull the value out of the saved registers ...
2861    */
2862 
2863   addr_rtx = gen_reg_rtx (Pmode);
2864 
2865   if (container)
2866     {
2867       rtx int_addr_rtx, sse_addr_rtx;
2868       int needed_intregs, needed_sseregs;
2869       int need_temp;
2870 
2871       lab_over = gen_label_rtx ();
2872       lab_false = gen_label_rtx ();
2873 
2874       examine_argument (TYPE_MODE (type), type, 0,
2875 		        &needed_intregs, &needed_sseregs);
2876 
2877 
2878       need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2879 		   || TYPE_ALIGN (type) > 128);
2880 
2881       /* In case we are passing structure, verify that it is consetuctive block
2882          on the register save area.  If not we need to do moves.  */
2883       if (!need_temp && !REG_P (container))
2884 	{
2885 	  /* Verify that all registers are strictly consetuctive  */
2886 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2887 	    {
2888 	      int i;
2889 
2890 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2891 		{
2892 		  rtx slot = XVECEXP (container, 0, i);
2893 		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2894 		      || INTVAL (XEXP (slot, 1)) != i * 16)
2895 		    need_temp = 1;
2896 		}
2897 	    }
2898 	  else
2899 	    {
2900 	      int i;
2901 
2902 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2903 		{
2904 		  rtx slot = XVECEXP (container, 0, i);
2905 		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2906 		      || INTVAL (XEXP (slot, 1)) != i * 8)
2907 		    need_temp = 1;
2908 		}
2909 	    }
2910 	}
2911       if (!need_temp)
2912 	{
2913 	  int_addr_rtx = addr_rtx;
2914 	  sse_addr_rtx = addr_rtx;
2915 	}
2916       else
2917 	{
2918 	  int_addr_rtx = gen_reg_rtx (Pmode);
2919 	  sse_addr_rtx = gen_reg_rtx (Pmode);
2920 	}
2921       /* First ensure that we fit completely in registers.  */
2922       if (needed_intregs)
2923 	{
2924 	  emit_cmp_and_jump_insns (expand_expr
2925 				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2926 				   GEN_INT ((REGPARM_MAX - needed_intregs +
2927 					     1) * 8), GE, const1_rtx, SImode,
2928 				   1, lab_false);
2929 	}
2930       if (needed_sseregs)
2931 	{
2932 	  emit_cmp_and_jump_insns (expand_expr
2933 				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2934 				   GEN_INT ((SSE_REGPARM_MAX -
2935 					     needed_sseregs + 1) * 16 +
2936 					    REGPARM_MAX * 8), GE, const1_rtx,
2937 				   SImode, 1, lab_false);
2938 	}
2939 
2940       /* Compute index to start of area used for integer regs.  */
2941       if (needed_intregs)
2942 	{
2943 	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2944 	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2945 	  if (r != int_addr_rtx)
2946 	    emit_move_insn (int_addr_rtx, r);
2947 	}
2948       if (needed_sseregs)
2949 	{
2950 	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2951 	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2952 	  if (r != sse_addr_rtx)
2953 	    emit_move_insn (sse_addr_rtx, r);
2954 	}
2955       if (need_temp)
2956 	{
2957 	  int i;
2958 	  rtx mem;
2959 	  rtx x;
2960 
2961 	  /* Never use the memory itself, as it has the alias set.  */
2962 	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
2963 	  mem = gen_rtx_MEM (BLKmode, x);
2964 	  force_operand (x, addr_rtx);
2965 	  set_mem_alias_set (mem, get_varargs_alias_set ());
2966 	  set_mem_align (mem, BITS_PER_UNIT);
2967 
2968 	  for (i = 0; i < XVECLEN (container, 0); i++)
2969 	    {
2970 	      rtx slot = XVECEXP (container, 0, i);
2971 	      rtx reg = XEXP (slot, 0);
2972 	      enum machine_mode mode = GET_MODE (reg);
2973 	      rtx src_addr;
2974 	      rtx src_mem;
2975 	      int src_offset;
2976 	      rtx dest_mem;
2977 
2978 	      if (SSE_REGNO_P (REGNO (reg)))
2979 		{
2980 		  src_addr = sse_addr_rtx;
2981 		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2982 		}
2983 	      else
2984 		{
2985 		  src_addr = int_addr_rtx;
2986 		  src_offset = REGNO (reg) * 8;
2987 		}
2988 	      src_mem = gen_rtx_MEM (mode, src_addr);
2989 	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
2990 	      src_mem = adjust_address (src_mem, mode, src_offset);
2991 	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2992 	      emit_move_insn (dest_mem, src_mem);
2993 	    }
2994 	}
2995 
2996       if (needed_intregs)
2997 	{
2998 	  t =
2999 	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3000 		   build_int_2 (needed_intregs * 8, 0));
3001 	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3002 	  TREE_SIDE_EFFECTS (t) = 1;
3003 	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3004 	}
3005       if (needed_sseregs)
3006 	{
3007 	  t =
3008 	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3009 		   build_int_2 (needed_sseregs * 16, 0));
3010 	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3011 	  TREE_SIDE_EFFECTS (t) = 1;
3012 	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3013 	}
3014 
3015       emit_jump_insn (gen_jump (lab_over));
3016       emit_barrier ();
3017       emit_label (lab_false);
3018     }
3019 
3020   /* ... otherwise out of the overflow area.  */
3021 
3022   /* Care for on-stack alignment if needed.  */
3023   if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3024     t = ovf;
3025   else
3026     {
3027       HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3028       t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3029       t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3030     }
3031   t = save_expr (t);
3032 
3033   r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3034   if (r != addr_rtx)
3035     emit_move_insn (addr_rtx, r);
3036 
3037   t =
3038     build (PLUS_EXPR, TREE_TYPE (t), t,
3039 	   build_int_2 (rsize * UNITS_PER_WORD, 0));
3040   t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3041   TREE_SIDE_EFFECTS (t) = 1;
3042   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3043 
3044   if (container)
3045     emit_label (lab_over);
3046 
3047   if (indirect_p)
3048     {
3049       r = gen_rtx_MEM (Pmode, addr_rtx);
3050       set_mem_alias_set (r, get_varargs_alias_set ());
3051       emit_move_insn (addr_rtx, r);
3052     }
3053 
3054   return addr_rtx;
3055 }
3056 
3057 /* Return nonzero if OP is either a i387 or SSE fp register.  */
3058 int
any_fp_register_operand(op,mode)3059 any_fp_register_operand (op, mode)
3060      rtx op;
3061      enum machine_mode mode ATTRIBUTE_UNUSED;
3062 {
3063   return ANY_FP_REG_P (op);
3064 }
3065 
3066 /* Return nonzero if OP is an i387 fp register.  */
3067 int
fp_register_operand(op,mode)3068 fp_register_operand (op, mode)
3069      rtx op;
3070      enum machine_mode mode ATTRIBUTE_UNUSED;
3071 {
3072   return FP_REG_P (op);
3073 }
3074 
3075 /* Return nonzero if OP is a non-fp register_operand.  */
3076 int
register_and_not_any_fp_reg_operand(op,mode)3077 register_and_not_any_fp_reg_operand (op, mode)
3078      rtx op;
3079      enum machine_mode mode;
3080 {
3081   return register_operand (op, mode) && !ANY_FP_REG_P (op);
3082 }
3083 
3084 /* Return nonzero of OP is a register operand other than an
3085    i387 fp register.  */
3086 int
register_and_not_fp_reg_operand(op,mode)3087 register_and_not_fp_reg_operand (op, mode)
3088      rtx op;
3089      enum machine_mode mode;
3090 {
3091   return register_operand (op, mode) && !FP_REG_P (op);
3092 }
3093 
3094 /* Return nonzero if OP is general operand representable on x86_64.  */
3095 
3096 int
x86_64_general_operand(op,mode)3097 x86_64_general_operand (op, mode)
3098      rtx op;
3099      enum machine_mode mode;
3100 {
3101   if (!TARGET_64BIT)
3102     return general_operand (op, mode);
3103   if (nonimmediate_operand (op, mode))
3104     return 1;
3105   return x86_64_sign_extended_value (op);
3106 }
3107 
3108 /* Return nonzero if OP is general operand representable on x86_64
3109    as either sign extended or zero extended constant.  */
3110 
3111 int
x86_64_szext_general_operand(op,mode)3112 x86_64_szext_general_operand (op, mode)
3113      rtx op;
3114      enum machine_mode mode;
3115 {
3116   if (!TARGET_64BIT)
3117     return general_operand (op, mode);
3118   if (nonimmediate_operand (op, mode))
3119     return 1;
3120   return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3121 }
3122 
3123 /* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3124 
3125 int
x86_64_nonmemory_operand(op,mode)3126 x86_64_nonmemory_operand (op, mode)
3127      rtx op;
3128      enum machine_mode mode;
3129 {
3130   if (!TARGET_64BIT)
3131     return nonmemory_operand (op, mode);
3132   if (register_operand (op, mode))
3133     return 1;
3134   return x86_64_sign_extended_value (op);
3135 }
3136 
3137 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
3138 
3139 int
x86_64_movabs_operand(op,mode)3140 x86_64_movabs_operand (op, mode)
3141      rtx op;
3142      enum machine_mode mode;
3143 {
3144   if (!TARGET_64BIT || !flag_pic)
3145     return nonmemory_operand (op, mode);
3146   if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3147     return 1;
3148   if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3149     return 1;
3150   return 0;
3151 }
3152 
3153 /* Return nonzero if OPNUM's MEM should be matched
3154    in movabs* patterns.  */
3155 
3156 int
ix86_check_movabs(insn,opnum)3157 ix86_check_movabs (insn, opnum)
3158      rtx insn;
3159      int opnum;
3160 {
3161   rtx set, mem;
3162 
3163   set = PATTERN (insn);
3164   if (GET_CODE (set) == PARALLEL)
3165     set = XVECEXP (set, 0, 0);
3166   if (GET_CODE (set) != SET)
3167     abort ();
3168   mem = XEXP (set, opnum);
3169   while (GET_CODE (mem) == SUBREG)
3170     mem = SUBREG_REG (mem);
3171   if (GET_CODE (mem) != MEM)
3172     abort ();
3173   return (volatile_ok || !MEM_VOLATILE_P (mem));
3174 }
3175 
3176 /* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3177 
3178 int
x86_64_szext_nonmemory_operand(op,mode)3179 x86_64_szext_nonmemory_operand (op, mode)
3180      rtx op;
3181      enum machine_mode mode;
3182 {
3183   if (!TARGET_64BIT)
3184     return nonmemory_operand (op, mode);
3185   if (register_operand (op, mode))
3186     return 1;
3187   return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3188 }
3189 
3190 /* Return nonzero if OP is immediate operand representable on x86_64.  */
3191 
3192 int
x86_64_immediate_operand(op,mode)3193 x86_64_immediate_operand (op, mode)
3194      rtx op;
3195      enum machine_mode mode;
3196 {
3197   if (!TARGET_64BIT)
3198     return immediate_operand (op, mode);
3199   return x86_64_sign_extended_value (op);
3200 }
3201 
3202 /* Return nonzero if OP is immediate operand representable on x86_64.  */
3203 
3204 int
x86_64_zext_immediate_operand(op,mode)3205 x86_64_zext_immediate_operand (op, mode)
3206      rtx op;
3207      enum machine_mode mode ATTRIBUTE_UNUSED;
3208 {
3209   return x86_64_zero_extended_value (op);
3210 }
3211 
3212 /* Return nonzero if OP is (const_int 1), else return zero.  */
3213 
3214 int
const_int_1_operand(op,mode)3215 const_int_1_operand (op, mode)
3216      rtx op;
3217      enum machine_mode mode ATTRIBUTE_UNUSED;
3218 {
3219   return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3220 }
3221 
3222 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3223    for shift & compare patterns, as shifting by 0 does not change flags),
3224    else return zero.  */
3225 
3226 int
const_int_1_31_operand(op,mode)3227 const_int_1_31_operand (op, mode)
3228      rtx op;
3229      enum machine_mode mode ATTRIBUTE_UNUSED;
3230 {
3231   return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3232 }
3233 
3234 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3235    reference and a constant.  */
3236 
3237 int
symbolic_operand(op,mode)3238 symbolic_operand (op, mode)
3239      register rtx op;
3240      enum machine_mode mode ATTRIBUTE_UNUSED;
3241 {
3242   switch (GET_CODE (op))
3243     {
3244     case SYMBOL_REF:
3245     case LABEL_REF:
3246       return 1;
3247 
3248     case CONST:
3249       op = XEXP (op, 0);
3250       if (GET_CODE (op) == SYMBOL_REF
3251 	  || GET_CODE (op) == LABEL_REF
3252 	  || (GET_CODE (op) == UNSPEC
3253 	      && (XINT (op, 1) == UNSPEC_GOT
3254 		  || XINT (op, 1) == UNSPEC_GOTOFF
3255 		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
3256 	return 1;
3257       if (GET_CODE (op) != PLUS
3258 	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3259 	return 0;
3260 
3261       op = XEXP (op, 0);
3262       if (GET_CODE (op) == SYMBOL_REF
3263 	  || GET_CODE (op) == LABEL_REF)
3264 	return 1;
3265       /* Only @GOTOFF gets offsets.  */
3266       if (GET_CODE (op) != UNSPEC
3267 	  || XINT (op, 1) != UNSPEC_GOTOFF)
3268 	return 0;
3269 
3270       op = XVECEXP (op, 0, 0);
3271       if (GET_CODE (op) == SYMBOL_REF
3272 	  || GET_CODE (op) == LABEL_REF)
3273 	return 1;
3274       return 0;
3275 
3276     default:
3277       return 0;
3278     }
3279 }
3280 
3281 /* Return true if the operand contains a @GOT or @GOTOFF reference.  */
3282 
3283 int
pic_symbolic_operand(op,mode)3284 pic_symbolic_operand (op, mode)
3285      register rtx op;
3286      enum machine_mode mode ATTRIBUTE_UNUSED;
3287 {
3288   if (GET_CODE (op) != CONST)
3289     return 0;
3290   op = XEXP (op, 0);
3291   if (TARGET_64BIT)
3292     {
3293       if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3294 	return 1;
3295     }
3296   else
3297     {
3298       if (GET_CODE (op) == UNSPEC)
3299 	return 1;
3300       if (GET_CODE (op) != PLUS
3301 	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3302 	return 0;
3303       op = XEXP (op, 0);
3304       if (GET_CODE (op) == UNSPEC)
3305 	return 1;
3306     }
3307   return 0;
3308 }
3309 
3310 /* Return true if OP is a symbolic operand that resolves locally.  */
3311 
3312 static int
local_symbolic_operand(op,mode)3313 local_symbolic_operand (op, mode)
3314      rtx op;
3315      enum machine_mode mode ATTRIBUTE_UNUSED;
3316 {
3317   if (GET_CODE (op) == CONST
3318       && GET_CODE (XEXP (op, 0)) == PLUS
3319       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3320     op = XEXP (XEXP (op, 0), 0);
3321 
3322   if (GET_CODE (op) == LABEL_REF)
3323     return 1;
3324 
3325   if (GET_CODE (op) != SYMBOL_REF)
3326     return 0;
3327 
3328   /* These we've been told are local by varasm and encode_section_info
3329      respectively.  */
3330   if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3331     return 1;
3332 
3333   /* There is, however, a not insubstantial body of code in the rest of
3334      the compiler that assumes it can just stick the results of
3335      ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3336   /* ??? This is a hack.  Should update the body of the compiler to
3337      always create a DECL an invoke targetm.encode_section_info.  */
3338   if (strncmp (XSTR (op, 0), internal_label_prefix,
3339 	       internal_label_prefix_len) == 0)
3340     return 1;
3341 
3342   return 0;
3343 }
3344 
3345 /* Test for various thread-local symbols.  See ix86_encode_section_info. */
3346 
3347 int
tls_symbolic_operand(op,mode)3348 tls_symbolic_operand (op, mode)
3349      register rtx op;
3350      enum machine_mode mode ATTRIBUTE_UNUSED;
3351 {
3352   const char *symbol_str;
3353 
3354   if (GET_CODE (op) != SYMBOL_REF)
3355     return 0;
3356   symbol_str = XSTR (op, 0);
3357 
3358   if (symbol_str[0] != '%')
3359     return 0;
3360   return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3361 }
3362 
3363 static int
tls_symbolic_operand_1(op,kind)3364 tls_symbolic_operand_1 (op, kind)
3365      rtx op;
3366      enum tls_model kind;
3367 {
3368   const char *symbol_str;
3369 
3370   if (GET_CODE (op) != SYMBOL_REF)
3371     return 0;
3372   symbol_str = XSTR (op, 0);
3373 
3374   return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3375 }
3376 
3377 int
global_dynamic_symbolic_operand(op,mode)3378 global_dynamic_symbolic_operand (op, mode)
3379      register rtx op;
3380      enum machine_mode mode ATTRIBUTE_UNUSED;
3381 {
3382   return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3383 }
3384 
3385 int
local_dynamic_symbolic_operand(op,mode)3386 local_dynamic_symbolic_operand (op, mode)
3387      register rtx op;
3388      enum machine_mode mode ATTRIBUTE_UNUSED;
3389 {
3390   return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3391 }
3392 
3393 int
initial_exec_symbolic_operand(op,mode)3394 initial_exec_symbolic_operand (op, mode)
3395      register rtx op;
3396      enum machine_mode mode ATTRIBUTE_UNUSED;
3397 {
3398   return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3399 }
3400 
3401 int
local_exec_symbolic_operand(op,mode)3402 local_exec_symbolic_operand (op, mode)
3403      register rtx op;
3404      enum machine_mode mode ATTRIBUTE_UNUSED;
3405 {
3406   return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3407 }
3408 
3409 /* Test for a valid operand for a call instruction.  Don't allow the
3410    arg pointer register or virtual regs since they may decay into
3411    reg + const, which the patterns can't handle.  */
3412 
3413 int
call_insn_operand(op,mode)3414 call_insn_operand (op, mode)
3415      rtx op;
3416      enum machine_mode mode ATTRIBUTE_UNUSED;
3417 {
3418   /* Disallow indirect through a virtual register.  This leads to
3419      compiler aborts when trying to eliminate them.  */
3420   if (GET_CODE (op) == REG
3421       && (op == arg_pointer_rtx
3422 	  || op == frame_pointer_rtx
3423 	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3424 	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3425     return 0;
3426 
3427   /* Disallow `call 1234'.  Due to varying assembler lameness this
3428      gets either rejected or translated to `call .+1234'.  */
3429   if (GET_CODE (op) == CONST_INT)
3430     return 0;
3431 
3432   /* Explicitly allow SYMBOL_REF even if pic.  */
3433   if (GET_CODE (op) == SYMBOL_REF)
3434     return 1;
3435 
3436   /* Otherwise we can allow any general_operand in the address.  */
3437   return general_operand (op, Pmode);
3438 }
3439 
3440 int
constant_call_address_operand(op,mode)3441 constant_call_address_operand (op, mode)
3442      rtx op;
3443      enum machine_mode mode ATTRIBUTE_UNUSED;
3444 {
3445   if (GET_CODE (op) == CONST
3446       && GET_CODE (XEXP (op, 0)) == PLUS
3447       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3448     op = XEXP (XEXP (op, 0), 0);
3449   return GET_CODE (op) == SYMBOL_REF;
3450 }
3451 
3452 /* Match exactly zero and one.  */
3453 
3454 int
const0_operand(op,mode)3455 const0_operand (op, mode)
3456      register rtx op;
3457      enum machine_mode mode;
3458 {
3459   return op == CONST0_RTX (mode);
3460 }
3461 
3462 int
const1_operand(op,mode)3463 const1_operand (op, mode)
3464      register rtx op;
3465      enum machine_mode mode ATTRIBUTE_UNUSED;
3466 {
3467   return op == const1_rtx;
3468 }
3469 
3470 /* Match 2, 4, or 8.  Used for leal multiplicands.  */
3471 
3472 int
const248_operand(op,mode)3473 const248_operand (op, mode)
3474      register rtx op;
3475      enum machine_mode mode ATTRIBUTE_UNUSED;
3476 {
3477   return (GET_CODE (op) == CONST_INT
3478 	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3479 }
3480 
3481 /* True if this is a constant appropriate for an increment or decremenmt.  */
3482 
3483 int
incdec_operand(op,mode)3484 incdec_operand (op, mode)
3485      register rtx op;
3486      enum machine_mode mode ATTRIBUTE_UNUSED;
3487 {
3488   /* On Pentium4, the inc and dec operations causes extra dependency on flag
3489      registers, since carry flag is not set.  */
3490   if (TARGET_PENTIUM4 && !optimize_size)
3491     return 0;
3492   return op == const1_rtx || op == constm1_rtx;
3493 }
3494 
3495 /* Return nonzero if OP is acceptable as operand of DImode shift
3496    expander.  */
3497 
3498 int
shiftdi_operand(op,mode)3499 shiftdi_operand (op, mode)
3500      rtx op;
3501      enum machine_mode mode ATTRIBUTE_UNUSED;
3502 {
3503   if (TARGET_64BIT)
3504     return nonimmediate_operand (op, mode);
3505   else
3506     return register_operand (op, mode);
3507 }
3508 
3509 /* Return false if this is the stack pointer, or any other fake
3510    register eliminable to the stack pointer.  Otherwise, this is
3511    a register operand.
3512 
3513    This is used to prevent esp from being used as an index reg.
3514    Which would only happen in pathological cases.  */
3515 
3516 int
reg_no_sp_operand(op,mode)3517 reg_no_sp_operand (op, mode)
3518      register rtx op;
3519      enum machine_mode mode;
3520 {
3521   rtx t = op;
3522   if (GET_CODE (t) == SUBREG)
3523     t = SUBREG_REG (t);
3524   if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3525     return 0;
3526 
3527   return register_operand (op, mode);
3528 }
3529 
3530 int
mmx_reg_operand(op,mode)3531 mmx_reg_operand (op, mode)
3532      register rtx op;
3533      enum machine_mode mode ATTRIBUTE_UNUSED;
3534 {
3535   return MMX_REG_P (op);
3536 }
3537 
3538 /* Return false if this is any eliminable register.  Otherwise
3539    general_operand.  */
3540 
3541 int
general_no_elim_operand(op,mode)3542 general_no_elim_operand (op, mode)
3543      register rtx op;
3544      enum machine_mode mode;
3545 {
3546   rtx t = op;
3547   if (GET_CODE (t) == SUBREG)
3548     t = SUBREG_REG (t);
3549   if (t == arg_pointer_rtx || t == frame_pointer_rtx
3550       || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3551       || t == virtual_stack_dynamic_rtx)
3552     return 0;
3553   if (REG_P (t)
3554       && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3555       && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3556     return 0;
3557 
3558   return general_operand (op, mode);
3559 }
3560 
3561 /* Return false if this is any eliminable register.  Otherwise
3562    register_operand or const_int.  */
3563 
3564 int
nonmemory_no_elim_operand(op,mode)3565 nonmemory_no_elim_operand (op, mode)
3566      register rtx op;
3567      enum machine_mode mode;
3568 {
3569   rtx t = op;
3570   if (GET_CODE (t) == SUBREG)
3571     t = SUBREG_REG (t);
3572   if (t == arg_pointer_rtx || t == frame_pointer_rtx
3573       || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3574       || t == virtual_stack_dynamic_rtx)
3575     return 0;
3576 
3577   return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3578 }
3579 
3580 /* Return false if this is any eliminable register or stack register,
3581    otherwise work like register_operand.  */
3582 
3583 int
index_register_operand(op,mode)3584 index_register_operand (op, mode)
3585      register rtx op;
3586      enum machine_mode mode;
3587 {
3588   rtx t = op;
3589   if (GET_CODE (t) == SUBREG)
3590     t = SUBREG_REG (t);
3591   if (!REG_P (t))
3592     return 0;
3593   if (t == arg_pointer_rtx
3594       || t == frame_pointer_rtx
3595       || t == virtual_incoming_args_rtx
3596       || t == virtual_stack_vars_rtx
3597       || t == virtual_stack_dynamic_rtx
3598       || REGNO (t) == STACK_POINTER_REGNUM)
3599     return 0;
3600 
3601   return general_operand (op, mode);
3602 }
3603 
3604 /* Return true if op is a Q_REGS class register.  */
3605 
3606 int
q_regs_operand(op,mode)3607 q_regs_operand (op, mode)
3608      register rtx op;
3609      enum machine_mode mode;
3610 {
3611   if (mode != VOIDmode && GET_MODE (op) != mode)
3612     return 0;
3613   if (GET_CODE (op) == SUBREG)
3614     op = SUBREG_REG (op);
3615   return ANY_QI_REG_P (op);
3616 }
3617 
3618 /* Return true if op is an flags register.  */
3619 
3620 int
flags_reg_operand(op,mode)3621 flags_reg_operand (op, mode)
3622      register rtx op;
3623      enum machine_mode mode;
3624 {
3625   if (mode != VOIDmode && GET_MODE (op) != mode)
3626     return 0;
3627   return (GET_CODE (op) == REG
3628 	  && REGNO (op) == FLAGS_REG
3629 	  && GET_MODE (op) != VOIDmode);
3630 }
3631 
3632 /* Return true if op is a NON_Q_REGS class register.  */
3633 
3634 int
non_q_regs_operand(op,mode)3635 non_q_regs_operand (op, mode)
3636      register rtx op;
3637      enum machine_mode mode;
3638 {
3639   if (mode != VOIDmode && GET_MODE (op) != mode)
3640     return 0;
3641   if (GET_CODE (op) == SUBREG)
3642     op = SUBREG_REG (op);
3643   return NON_QI_REG_P (op);
3644 }
3645 
3646 /*  Return 1 when OP is operand acceptable for standard SSE move.  */
3647 int
vector_move_operand(op,mode)3648 vector_move_operand (op, mode)
3649      rtx op;
3650      enum machine_mode mode;
3651 {
3652   if (nonimmediate_operand (op, mode))
3653     return 1;
3654   if (GET_MODE (op) != mode && mode != VOIDmode)
3655     return 0;
3656   return (op == CONST0_RTX (GET_MODE (op)));
3657 }
3658 
3659 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3660    insns.  */
3661 int
sse_comparison_operator(op,mode)3662 sse_comparison_operator (op, mode)
3663      rtx op;
3664      enum machine_mode mode ATTRIBUTE_UNUSED;
3665 {
3666   enum rtx_code code = GET_CODE (op);
3667   switch (code)
3668     {
3669     /* Operations supported directly.  */
3670     case EQ:
3671     case LT:
3672     case LE:
3673     case UNORDERED:
3674     case NE:
3675     case UNGE:
3676     case UNGT:
3677     case ORDERED:
3678       return 1;
3679     /* These are equivalent to ones above in non-IEEE comparisons.  */
3680     case UNEQ:
3681     case UNLT:
3682     case UNLE:
3683     case LTGT:
3684     case GE:
3685     case GT:
3686       return !TARGET_IEEE_FP;
3687     default:
3688       return 0;
3689     }
3690 }
3691 /* Return 1 if OP is a valid comparison operator in valid mode.  */
3692 int
ix86_comparison_operator(op,mode)3693 ix86_comparison_operator (op, mode)
3694      register rtx op;
3695      enum machine_mode mode;
3696 {
3697   enum machine_mode inmode;
3698   enum rtx_code code = GET_CODE (op);
3699   if (mode != VOIDmode && GET_MODE (op) != mode)
3700     return 0;
3701   if (GET_RTX_CLASS (code) != '<')
3702     return 0;
3703   inmode = GET_MODE (XEXP (op, 0));
3704 
3705   if (inmode == CCFPmode || inmode == CCFPUmode)
3706     {
3707       enum rtx_code second_code, bypass_code;
3708       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3709       return (bypass_code == NIL && second_code == NIL);
3710     }
3711   switch (code)
3712     {
3713     case EQ: case NE:
3714       return 1;
3715     case LT: case GE:
3716       if (inmode == CCmode || inmode == CCGCmode
3717 	  || inmode == CCGOCmode || inmode == CCNOmode)
3718 	return 1;
3719       return 0;
3720     case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3721       if (inmode == CCmode)
3722 	return 1;
3723       return 0;
3724     case GT: case LE:
3725       if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3726 	return 1;
3727       return 0;
3728     default:
3729       return 0;
3730     }
3731 }
3732 
3733 /* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
3734 
3735 int
fcmov_comparison_operator(op,mode)3736 fcmov_comparison_operator (op, mode)
3737     register rtx op;
3738     enum machine_mode mode;
3739 {
3740   enum machine_mode inmode;
3741   enum rtx_code code = GET_CODE (op);
3742   if (mode != VOIDmode && GET_MODE (op) != mode)
3743     return 0;
3744   if (GET_RTX_CLASS (code) != '<')
3745     return 0;
3746   inmode = GET_MODE (XEXP (op, 0));
3747   if (inmode == CCFPmode || inmode == CCFPUmode)
3748     {
3749       enum rtx_code second_code, bypass_code;
3750       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3751       if (bypass_code != NIL || second_code != NIL)
3752 	return 0;
3753       code = ix86_fp_compare_code_to_integer (code);
3754     }
3755   /* i387 supports just limited amount of conditional codes.  */
3756   switch (code)
3757     {
3758     case LTU: case GTU: case LEU: case GEU:
3759       if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3760 	return 1;
3761       return 0;
3762     case ORDERED: case UNORDERED:
3763     case EQ: case NE:
3764       return 1;
3765     default:
3766       return 0;
3767     }
3768 }
3769 
3770 /* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
3771 
3772 int
promotable_binary_operator(op,mode)3773 promotable_binary_operator (op, mode)
3774      register rtx op;
3775      enum machine_mode mode ATTRIBUTE_UNUSED;
3776 {
3777   switch (GET_CODE (op))
3778     {
3779     case MULT:
3780       /* Modern CPUs have same latency for HImode and SImode multiply,
3781          but 386 and 486 do HImode multiply faster.  */
3782       return ix86_cpu > PROCESSOR_I486;
3783     case PLUS:
3784     case AND:
3785     case IOR:
3786     case XOR:
3787     case ASHIFT:
3788       return 1;
3789     default:
3790       return 0;
3791     }
3792 }
3793 
3794 /* Nearly general operand, but accept any const_double, since we wish
3795    to be able to drop them into memory rather than have them get pulled
3796    into registers.  */
3797 
3798 int
cmp_fp_expander_operand(op,mode)3799 cmp_fp_expander_operand (op, mode)
3800      register rtx op;
3801      enum machine_mode mode;
3802 {
3803   if (mode != VOIDmode && mode != GET_MODE (op))
3804     return 0;
3805   if (GET_CODE (op) == CONST_DOUBLE)
3806     return 1;
3807   return general_operand (op, mode);
3808 }
3809 
3810 /* Match an SI or HImode register for a zero_extract.  */
3811 
3812 int
ext_register_operand(op,mode)3813 ext_register_operand (op, mode)
3814      register rtx op;
3815      enum machine_mode mode ATTRIBUTE_UNUSED;
3816 {
3817   int regno;
3818   if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3819       && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3820     return 0;
3821 
3822   if (!register_operand (op, VOIDmode))
3823     return 0;
3824 
3825   /* Be curefull to accept only registers having upper parts.  */
3826   regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3827   return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3828 }
3829 
3830 /* Return 1 if this is a valid binary floating-point operation.
3831    OP is the expression matched, and MODE is its mode.  */
3832 
3833 int
binary_fp_operator(op,mode)3834 binary_fp_operator (op, mode)
3835     register rtx op;
3836     enum machine_mode mode;
3837 {
3838   if (mode != VOIDmode && mode != GET_MODE (op))
3839     return 0;
3840 
3841   switch (GET_CODE (op))
3842     {
3843     case PLUS:
3844     case MINUS:
3845     case MULT:
3846     case DIV:
3847       return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3848 
3849     default:
3850       return 0;
3851     }
3852 }
3853 
3854 int
mult_operator(op,mode)3855 mult_operator (op, mode)
3856     register rtx op;
3857     enum machine_mode mode ATTRIBUTE_UNUSED;
3858 {
3859   return GET_CODE (op) == MULT;
3860 }
3861 
3862 int
div_operator(op,mode)3863 div_operator (op, mode)
3864     register rtx op;
3865     enum machine_mode mode ATTRIBUTE_UNUSED;
3866 {
3867   return GET_CODE (op) == DIV;
3868 }
3869 
3870 int
arith_or_logical_operator(op,mode)3871 arith_or_logical_operator (op, mode)
3872       rtx op;
3873       enum machine_mode mode;
3874 {
3875   return ((mode == VOIDmode || GET_MODE (op) == mode)
3876           && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3877               || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3878 }
3879 
3880 /* Returns 1 if OP is memory operand with a displacement.  */
3881 
3882 int
memory_displacement_operand(op,mode)3883 memory_displacement_operand (op, mode)
3884      register rtx op;
3885      enum machine_mode mode;
3886 {
3887   struct ix86_address parts;
3888 
3889   if (! memory_operand (op, mode))
3890     return 0;
3891 
3892   if (! ix86_decompose_address (XEXP (op, 0), &parts))
3893     abort ();
3894 
3895   return parts.disp != NULL_RTX;
3896 }
3897 
3898 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3899    re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3900 
3901    ??? It seems likely that this will only work because cmpsi is an
3902    expander, and no actual insns use this.  */
3903 
3904 int
cmpsi_operand(op,mode)3905 cmpsi_operand (op, mode)
3906       rtx op;
3907       enum machine_mode mode;
3908 {
3909   if (nonimmediate_operand (op, mode))
3910     return 1;
3911 
3912   if (GET_CODE (op) == AND
3913       && GET_MODE (op) == SImode
3914       && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3915       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3916       && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3917       && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3918       && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3919       && GET_CODE (XEXP (op, 1)) == CONST_INT)
3920     return 1;
3921 
3922   return 0;
3923 }
3924 
3925 /* Returns 1 if OP is memory operand that can not be represented by the
3926    modRM array.  */
3927 
3928 int
long_memory_operand(op,mode)3929 long_memory_operand (op, mode)
3930      register rtx op;
3931      enum machine_mode mode;
3932 {
3933   if (! memory_operand (op, mode))
3934     return 0;
3935 
3936   return memory_address_length (op) != 0;
3937 }
3938 
3939 /* Return nonzero if the rtx is known aligned.  */
3940 
3941 int
aligned_operand(op,mode)3942 aligned_operand (op, mode)
3943      rtx op;
3944      enum machine_mode mode;
3945 {
3946   struct ix86_address parts;
3947 
3948   if (!general_operand (op, mode))
3949     return 0;
3950 
3951   /* Registers and immediate operands are always "aligned".  */
3952   if (GET_CODE (op) != MEM)
3953     return 1;
3954 
3955   /* Don't even try to do any aligned optimizations with volatiles.  */
3956   if (MEM_VOLATILE_P (op))
3957     return 0;
3958 
3959   op = XEXP (op, 0);
3960 
3961   /* Pushes and pops are only valid on the stack pointer.  */
3962   if (GET_CODE (op) == PRE_DEC
3963       || GET_CODE (op) == POST_INC)
3964     return 1;
3965 
3966   /* Decode the address.  */
3967   if (! ix86_decompose_address (op, &parts))
3968     abort ();
3969 
3970   /* Look for some component that isn't known to be aligned.  */
3971   if (parts.index)
3972     {
3973       if (parts.scale < 4
3974 	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3975 	return 0;
3976     }
3977   if (parts.base)
3978     {
3979       if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3980 	return 0;
3981     }
3982   if (parts.disp)
3983     {
3984       if (GET_CODE (parts.disp) != CONST_INT
3985 	  || (INTVAL (parts.disp) & 3) != 0)
3986 	return 0;
3987     }
3988 
3989   /* Didn't find one -- this must be an aligned address.  */
3990   return 1;
3991 }
3992 
3993 int
compare_operator(op,mode)3994 compare_operator (op, mode)
3995      rtx op;
3996      enum machine_mode mode ATTRIBUTE_UNUSED;
3997 {
3998   return GET_CODE (op) == COMPARE;
3999 }
4000 
4001 /* Return true if the constant is something that can be loaded with
4002    a special instruction.  Only handle 0.0 and 1.0; others are less
4003    worthwhile.  */
4004 
4005 int
standard_80387_constant_p(x)4006 standard_80387_constant_p (x)
4007      rtx x;
4008 {
4009   if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4010     return -1;
4011   /* Note that on the 80387, other constants, such as pi, that we should support
4012      too.  On some machines, these are much slower to load as standard constant,
4013      than to load from doubles in memory.  */
4014   if (x == CONST0_RTX (GET_MODE (x)))
4015     return 1;
4016   if (x == CONST1_RTX (GET_MODE (x)))
4017     return 2;
4018   return 0;
4019 }
4020 
4021 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4022  */
4023 int
standard_sse_constant_p(x)4024 standard_sse_constant_p (x)
4025      rtx x;
4026 {
4027   if (x == const0_rtx)
4028     return 1;
4029   return (x == CONST0_RTX (GET_MODE (x)));
4030 }
4031 
4032 /* Returns 1 if OP contains a symbol reference */
4033 
4034 int
symbolic_reference_mentioned_p(op)4035 symbolic_reference_mentioned_p (op)
4036      rtx op;
4037 {
4038   register const char *fmt;
4039   register int i;
4040 
4041   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4042     return 1;
4043 
4044   fmt = GET_RTX_FORMAT (GET_CODE (op));
4045   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4046     {
4047       if (fmt[i] == 'E')
4048 	{
4049 	  register int j;
4050 
4051 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4052 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4053 	      return 1;
4054 	}
4055 
4056       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4057 	return 1;
4058     }
4059 
4060   return 0;
4061 }
4062 
4063 /* Return 1 if it is appropriate to emit `ret' instructions in the
4064    body of a function.  Do this only if the epilogue is simple, needing a
4065    couple of insns.  Prior to reloading, we can't tell how many registers
4066    must be saved, so return 0 then.  Return 0 if there is no frame
4067    marker to de-allocate.
4068 
4069    If NON_SAVING_SETJMP is defined and true, then it is not possible
4070    for the epilogue to be simple, so return 0.  This is a special case
4071    since NON_SAVING_SETJMP will not cause regs_ever_live to change
4072    until final, but jump_optimize may need to know sooner if a
4073    `return' is OK.  */
4074 
4075 int
ix86_can_use_return_insn_p()4076 ix86_can_use_return_insn_p ()
4077 {
4078   struct ix86_frame frame;
4079 
4080 #ifdef NON_SAVING_SETJMP
4081   if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4082     return 0;
4083 #endif
4084 
4085   if (! reload_completed || frame_pointer_needed)
4086     return 0;
4087 
4088   /* Don't allow more than 32 pop, since that's all we can do
4089      with one instruction.  */
4090   if (current_function_pops_args
4091       && current_function_args_size >= 32768)
4092     return 0;
4093 
4094   ix86_compute_frame_layout (&frame);
4095   return frame.to_allocate == 0 && frame.nregs == 0;
4096 }
4097 
4098 /* Return 1 if VALUE can be stored in the sign extended immediate field.  */
4099 int
x86_64_sign_extended_value(value)4100 x86_64_sign_extended_value (value)
4101      rtx value;
4102 {
4103   switch (GET_CODE (value))
4104     {
4105       /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4106          to be at least 32 and this all acceptable constants are
4107 	 represented as CONST_INT.  */
4108       case CONST_INT:
4109 	if (HOST_BITS_PER_WIDE_INT == 32)
4110 	  return 1;
4111 	else
4112 	  {
4113 	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4114 	    return trunc_int_for_mode (val, SImode) == val;
4115 	  }
4116 	break;
4117 
4118       /* For certain code models, the symbolic references are known to fit.
4119 	 in CM_SMALL_PIC model we know it fits if it is local to the shared
4120 	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
4121 	 only if inside of UNSPEC handled below.  */
4122       case SYMBOL_REF:
4123 	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4124 
4125       /* For certain code models, the code is near as well.  */
4126       case LABEL_REF:
4127 	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4128 		|| ix86_cmodel == CM_KERNEL);
4129 
4130       /* We also may accept the offsetted memory references in certain special
4131          cases.  */
4132       case CONST:
4133 	if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4134 	  switch (XINT (XEXP (value, 0), 1))
4135 	    {
4136 	    case UNSPEC_GOTPCREL:
4137 	    case UNSPEC_DTPOFF:
4138 	    case UNSPEC_GOTNTPOFF:
4139 	    case UNSPEC_NTPOFF:
4140 	      return 1;
4141 	    default:
4142 	      break;
4143 	    }
4144 	if (GET_CODE (XEXP (value, 0)) == PLUS)
4145 	  {
4146 	    rtx op1 = XEXP (XEXP (value, 0), 0);
4147 	    rtx op2 = XEXP (XEXP (value, 0), 1);
4148 	    HOST_WIDE_INT offset;
4149 
4150 	    if (ix86_cmodel == CM_LARGE)
4151 	      return 0;
4152 	    if (GET_CODE (op2) != CONST_INT)
4153 	      return 0;
4154 	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
4155 	    switch (GET_CODE (op1))
4156 	      {
4157 		case SYMBOL_REF:
4158 		  /* For CM_SMALL assume that latest object is 16MB before
4159 		     end of 31bits boundary.  We may also accept pretty
4160 		     large negative constants knowing that all objects are
4161 		     in the positive half of address space.  */
4162 		  if (ix86_cmodel == CM_SMALL
4163 		      && offset < 16*1024*1024
4164 		      && trunc_int_for_mode (offset, SImode) == offset)
4165 		    return 1;
4166 		  /* For CM_KERNEL we know that all object resist in the
4167 		     negative half of 32bits address space.  We may not
4168 		     accept negative offsets, since they may be just off
4169 		     and we may accept pretty large positive ones.  */
4170 		  if (ix86_cmodel == CM_KERNEL
4171 		      && offset > 0
4172 		      && trunc_int_for_mode (offset, SImode) == offset)
4173 		    return 1;
4174 		  break;
4175 		case LABEL_REF:
4176 		  /* These conditions are similar to SYMBOL_REF ones, just the
4177 		     constraints for code models differ.  */
4178 		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4179 		      && offset < 16*1024*1024
4180 		      && trunc_int_for_mode (offset, SImode) == offset)
4181 		    return 1;
4182 		  if (ix86_cmodel == CM_KERNEL
4183 		      && offset > 0
4184 		      && trunc_int_for_mode (offset, SImode) == offset)
4185 		    return 1;
4186 		  break;
4187 		case UNSPEC:
4188 		  switch (XINT (op1, 1))
4189 		    {
4190 		    case UNSPEC_DTPOFF:
4191 		    case UNSPEC_NTPOFF:
4192 		      if (offset > 0
4193 			  && trunc_int_for_mode (offset, SImode) == offset)
4194 			return 1;
4195 		    }
4196 		  break;
4197 		default:
4198 		  return 0;
4199 	      }
4200 	  }
4201 	return 0;
4202       default:
4203 	return 0;
4204     }
4205 }
4206 
4207 /* Return 1 if VALUE can be stored in the zero extended immediate field.  */
4208 int
x86_64_zero_extended_value(value)4209 x86_64_zero_extended_value (value)
4210      rtx value;
4211 {
4212   switch (GET_CODE (value))
4213     {
4214       case CONST_DOUBLE:
4215 	if (HOST_BITS_PER_WIDE_INT == 32)
4216 	  return  (GET_MODE (value) == VOIDmode
4217 		   && !CONST_DOUBLE_HIGH (value));
4218 	else
4219 	  return 0;
4220       case CONST_INT:
4221 	if (HOST_BITS_PER_WIDE_INT == 32)
4222 	  return INTVAL (value) >= 0;
4223 	else
4224 	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4225 	break;
4226 
4227       /* For certain code models, the symbolic references are known to fit.  */
4228       case SYMBOL_REF:
4229 	return ix86_cmodel == CM_SMALL;
4230 
4231       /* For certain code models, the code is near as well.  */
4232       case LABEL_REF:
4233 	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4234 
4235       /* We also may accept the offsetted memory references in certain special
4236          cases.  */
4237       case CONST:
4238 	if (GET_CODE (XEXP (value, 0)) == PLUS)
4239 	  {
4240 	    rtx op1 = XEXP (XEXP (value, 0), 0);
4241 	    rtx op2 = XEXP (XEXP (value, 0), 1);
4242 
4243 	    if (ix86_cmodel == CM_LARGE)
4244 	      return 0;
4245 	    switch (GET_CODE (op1))
4246 	      {
4247 		case SYMBOL_REF:
4248 		    return 0;
4249 		  /* For small code model we may accept pretty large positive
4250 		     offsets, since one bit is available for free.  Negative
4251 		     offsets are limited by the size of NULL pointer area
4252 		     specified by the ABI.  */
4253 		  if (ix86_cmodel == CM_SMALL
4254 		      && GET_CODE (op2) == CONST_INT
4255 		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4256 		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4257 			  == INTVAL (op2)))
4258 		    return 1;
4259 	          /* ??? For the kernel, we may accept adjustment of
4260 		     -0x10000000, since we know that it will just convert
4261 		     negative address space to positive, but perhaps this
4262 		     is not worthwhile.  */
4263 		  break;
4264 		case LABEL_REF:
4265 		  /* These conditions are similar to SYMBOL_REF ones, just the
4266 		     constraints for code models differ.  */
4267 		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4268 		      && GET_CODE (op2) == CONST_INT
4269 		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4270 		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4271 			  == INTVAL (op2)))
4272 		    return 1;
4273 		  break;
4274 		default:
4275 		  return 0;
4276 	      }
4277 	  }
4278 	return 0;
4279       default:
4280 	return 0;
4281     }
4282 }
4283 
4284 /* Value should be nonzero if functions must have frame pointers.
4285    Zero means the frame pointer need not be set up (and parms may
4286    be accessed via the stack pointer) in functions that seem suitable.  */
4287 
4288 int
ix86_frame_pointer_required()4289 ix86_frame_pointer_required ()
4290 {
4291   /* If we accessed previous frames, then the generated code expects
4292      to be able to access the saved ebp value in our frame.  */
4293   if (cfun->machine->accesses_prev_frame)
4294     return 1;
4295 
4296   /* Several x86 os'es need a frame pointer for other reasons,
4297      usually pertaining to setjmp.  */
4298   if (SUBTARGET_FRAME_POINTER_REQUIRED)
4299     return 1;
4300 
4301   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4302      the frame pointer by default.  Turn it back on now if we've not
4303      got a leaf function.  */
4304   if (TARGET_OMIT_LEAF_FRAME_POINTER
4305       && (!current_function_is_leaf))
4306     return 1;
4307 
4308   if (current_function_profile)
4309     return 1;
4310 
4311   return 0;
4312 }
4313 
4314 /* Record that the current function accesses previous call frames.  */
4315 
4316 void
ix86_setup_frame_addresses()4317 ix86_setup_frame_addresses ()
4318 {
4319   cfun->machine->accesses_prev_frame = 1;
4320 }
4321 
4322 #if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY)
4323 # define USE_HIDDEN_LINKONCE 1
4324 #else
4325 # define USE_HIDDEN_LINKONCE 0
4326 #endif
4327 
4328 static int pic_labels_used;
4329 
4330 /* Fills in the label name that should be used for a pc thunk for
4331    the given register.  */
4332 
4333 static void
get_pc_thunk_name(name,regno)4334 get_pc_thunk_name (name, regno)
4335      char name[32];
4336      unsigned int regno;
4337 {
4338   if (USE_HIDDEN_LINKONCE)
4339     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4340   else
4341     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4342 }
4343 
4344 
4345 /* This function generates code for -fpic that loads %ebx with
4346    the return address of the caller and then returns.  */
4347 
4348 void
ix86_asm_file_end(file)4349 ix86_asm_file_end (file)
4350      FILE *file;
4351 {
4352   rtx xops[2];
4353   int regno;
4354 
4355   for (regno = 0; regno < 8; ++regno)
4356     {
4357       char name[32];
4358 
4359       if (! ((pic_labels_used >> regno) & 1))
4360 	continue;
4361 
4362       get_pc_thunk_name (name, regno);
4363 
4364       if (USE_HIDDEN_LINKONCE)
4365 	{
4366 	  tree decl;
4367 
4368 	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4369 			     error_mark_node);
4370 	  TREE_PUBLIC (decl) = 1;
4371 	  TREE_STATIC (decl) = 1;
4372 	  DECL_ONE_ONLY (decl) = 1;
4373 
4374 	  (*targetm.asm_out.unique_section) (decl, 0);
4375 	  named_section (decl, NULL, 0);
4376 
4377 	  (*targetm.asm_out.globalize_label) (file, name);
4378 	  fputs ("\t.hidden\t", file);
4379 	  assemble_name (file, name);
4380 	  fputc ('\n', file);
4381 	  ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4382 	}
4383       else
4384 	{
4385 	  text_section ();
4386 	  ASM_OUTPUT_LABEL (file, name);
4387 	}
4388 
4389       xops[0] = gen_rtx_REG (SImode, regno);
4390       xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4391       output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4392       output_asm_insn ("ret", xops);
4393     }
4394 
4395 #ifdef SUBTARGET_FILE_END
4396   SUBTARGET_FILE_END (file);
4397 #endif
4398 }
4399 
4400 /* Emit code for the SET_GOT patterns.  */
4401 
4402 const char *
output_set_got(dest)4403 output_set_got (dest)
4404      rtx dest;
4405 {
4406   rtx xops[3];
4407 
4408   xops[0] = dest;
4409   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4410 
4411   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4412     {
4413       xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4414 
4415       if (!flag_pic)
4416 	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4417       else
4418 	output_asm_insn ("call\t%a2", xops);
4419 
4420 #if TARGET_MACHO
4421       /* Output the "canonical" label name ("Lxx$pb") here too.  This
4422          is what will be referred to by the Mach-O PIC subsystem.  */
4423       ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4424 #endif
4425       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4426 				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4427 
4428       if (flag_pic)
4429 	output_asm_insn ("pop{l}\t%0", xops);
4430     }
4431   else
4432     {
4433       char name[32];
4434       get_pc_thunk_name (name, REGNO (dest));
4435       pic_labels_used |= 1 << REGNO (dest);
4436 
4437       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4438       xops[2] = gen_rtx_MEM (QImode, xops[2]);
4439       output_asm_insn ("call\t%X2", xops);
4440     }
4441 
4442   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4443     output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4444   else if (!TARGET_MACHO)
4445     output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4446 
4447   return "";
4448 }
4449 
4450 /* Generate an "push" pattern for input ARG.  */
4451 
4452 static rtx
gen_push(arg)4453 gen_push (arg)
4454      rtx arg;
4455 {
4456   return gen_rtx_SET (VOIDmode,
4457 		      gen_rtx_MEM (Pmode,
4458 				   gen_rtx_PRE_DEC (Pmode,
4459 						    stack_pointer_rtx)),
4460 		      arg);
4461 }
4462 
4463 /* Return >= 0 if there is an unused call-clobbered register available
4464    for the entire function.  */
4465 
4466 static unsigned int
ix86_select_alt_pic_regnum()4467 ix86_select_alt_pic_regnum ()
4468 {
4469   if (current_function_is_leaf && !current_function_profile)
4470     {
4471       int i;
4472       for (i = 2; i >= 0; --i)
4473         if (!regs_ever_live[i])
4474 	  return i;
4475     }
4476 
4477   return INVALID_REGNUM;
4478 }
4479 
4480 /* Return 1 if we need to save REGNO.  */
4481 static int
ix86_save_reg(regno,maybe_eh_return)4482 ix86_save_reg (regno, maybe_eh_return)
4483      unsigned int regno;
4484      int maybe_eh_return;
4485 {
4486   if (pic_offset_table_rtx
4487       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4488       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4489 	  || current_function_profile
4490 	  || current_function_calls_eh_return
4491 	  || current_function_uses_const_pool))
4492     {
4493       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4494 	return 0;
4495       return 1;
4496     }
4497 
4498   if (current_function_calls_eh_return && maybe_eh_return)
4499     {
4500       unsigned i;
4501       for (i = 0; ; i++)
4502 	{
4503 	  unsigned test = EH_RETURN_DATA_REGNO (i);
4504 	  if (test == INVALID_REGNUM)
4505 	    break;
4506 	  if (test == regno)
4507 	    return 1;
4508 	}
4509     }
4510 
4511   return (regs_ever_live[regno]
4512 	  && !call_used_regs[regno]
4513 	  && !fixed_regs[regno]
4514 	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4515 }
4516 
4517 /* Return number of registers to be saved on the stack.  */
4518 
4519 static int
ix86_nsaved_regs()4520 ix86_nsaved_regs ()
4521 {
4522   int nregs = 0;
4523   int regno;
4524 
4525   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4526     if (ix86_save_reg (regno, true))
4527       nregs++;
4528   return nregs;
4529 }
4530 
4531 /* Return the offset between two registers, one to be eliminated, and the other
4532    its replacement, at the start of a routine.  */
4533 
4534 HOST_WIDE_INT
ix86_initial_elimination_offset(from,to)4535 ix86_initial_elimination_offset (from, to)
4536      int from;
4537      int to;
4538 {
4539   struct ix86_frame frame;
4540   ix86_compute_frame_layout (&frame);
4541 
4542   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4543     return frame.hard_frame_pointer_offset;
4544   else if (from == FRAME_POINTER_REGNUM
4545 	   && to == HARD_FRAME_POINTER_REGNUM)
4546     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4547   else
4548     {
4549       if (to != STACK_POINTER_REGNUM)
4550 	abort ();
4551       else if (from == ARG_POINTER_REGNUM)
4552 	return frame.stack_pointer_offset;
4553       else if (from != FRAME_POINTER_REGNUM)
4554 	abort ();
4555       else
4556 	return frame.stack_pointer_offset - frame.frame_pointer_offset;
4557     }
4558 }
4559 
4560 /* Fill structure ix86_frame about frame of currently computed function.  */
4561 
4562 static void
ix86_compute_frame_layout(frame)4563 ix86_compute_frame_layout (frame)
4564      struct ix86_frame *frame;
4565 {
4566   HOST_WIDE_INT total_size;
4567   int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4568   int offset;
4569   int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4570   HOST_WIDE_INT size = get_frame_size ();
4571 
4572   frame->local_size = size;
4573   frame->nregs = ix86_nsaved_regs ();
4574   total_size = size;
4575 
4576   /* Skip return address and saved base pointer.  */
4577   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4578 
4579   frame->hard_frame_pointer_offset = offset;
4580 
4581   /* Do some sanity checking of stack_alignment_needed and
4582      preferred_alignment, since i386 port is the only using those features
4583      that may break easily.  */
4584 
4585   if (size && !stack_alignment_needed)
4586     abort ();
4587   if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4588     abort ();
4589   if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4590     abort ();
4591   if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4592     abort ();
4593 
4594   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4595     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4596 
4597   /* Register save area */
4598   offset += frame->nregs * UNITS_PER_WORD;
4599 
4600   /* Va-arg area */
4601   if (ix86_save_varrargs_registers)
4602     {
4603       offset += X86_64_VARARGS_SIZE;
4604       frame->va_arg_size = X86_64_VARARGS_SIZE;
4605     }
4606   else
4607     frame->va_arg_size = 0;
4608 
4609   /* Align start of frame for local function.  */
4610   frame->padding1 = ((offset + stack_alignment_needed - 1)
4611 		     & -stack_alignment_needed) - offset;
4612 
4613   offset += frame->padding1;
4614 
4615   /* Frame pointer points here.  */
4616   frame->frame_pointer_offset = offset;
4617 
4618   offset += size;
4619 
4620   /* Add outgoing arguments area.  Can be skipped if we eliminated
4621      all the function calls as dead code.
4622      Skipping is however impossible when function calls alloca.  Alloca
4623      expander assumes that last current_function_outgoing_args_size
4624      of stack frame are unused.  */
4625   if (ACCUMULATE_OUTGOING_ARGS
4626       && (!current_function_is_leaf || current_function_calls_alloca))
4627     {
4628       offset += current_function_outgoing_args_size;
4629       frame->outgoing_arguments_size = current_function_outgoing_args_size;
4630     }
4631   else
4632     frame->outgoing_arguments_size = 0;
4633 
4634   /* Align stack boundary.  Only needed if we're calling another function
4635      or using alloca.  */
4636   if (!current_function_is_leaf || current_function_calls_alloca)
4637     frame->padding2 = ((offset + preferred_alignment - 1)
4638 		       & -preferred_alignment) - offset;
4639   else
4640     frame->padding2 = 0;
4641 
4642   offset += frame->padding2;
4643 
4644   /* We've reached end of stack frame.  */
4645   frame->stack_pointer_offset = offset;
4646 
4647   /* Size prologue needs to allocate.  */
4648   frame->to_allocate =
4649     (size + frame->padding1 + frame->padding2
4650      + frame->outgoing_arguments_size + frame->va_arg_size);
4651 
4652   if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4653       && current_function_is_leaf)
4654     {
4655       frame->red_zone_size = frame->to_allocate;
4656       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4657 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4658     }
4659   else
4660     frame->red_zone_size = 0;
4661   frame->to_allocate -= frame->red_zone_size;
4662   frame->stack_pointer_offset -= frame->red_zone_size;
4663 #if 0
4664   fprintf (stderr, "nregs: %i\n", frame->nregs);
4665   fprintf (stderr, "size: %i\n", size);
4666   fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4667   fprintf (stderr, "padding1: %i\n", frame->padding1);
4668   fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4669   fprintf (stderr, "padding2: %i\n", frame->padding2);
4670   fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4671   fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4672   fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4673   fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4674 	   frame->hard_frame_pointer_offset);
4675   fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4676 #endif
4677 }
4678 
4679 /* Emit code to save registers in the prologue.  */
4680 
4681 static void
ix86_emit_save_regs()4682 ix86_emit_save_regs ()
4683 {
4684   register int regno;
4685   rtx insn;
4686 
4687   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4688     if (ix86_save_reg (regno, true))
4689       {
4690 	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4691 	RTX_FRAME_RELATED_P (insn) = 1;
4692       }
4693 }
4694 
4695 /* Emit code to save registers using MOV insns.  First register
4696    is restored from POINTER + OFFSET.  */
4697 static void
ix86_emit_save_regs_using_mov(pointer,offset)4698 ix86_emit_save_regs_using_mov (pointer, offset)
4699      rtx pointer;
4700      HOST_WIDE_INT offset;
4701 {
4702   int regno;
4703   rtx insn;
4704 
4705   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4706     if (ix86_save_reg (regno, true))
4707       {
4708 	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4709 					       Pmode, offset),
4710 			       gen_rtx_REG (Pmode, regno));
4711 	RTX_FRAME_RELATED_P (insn) = 1;
4712 	offset += UNITS_PER_WORD;
4713       }
4714 }
4715 
4716 /* Expand the prologue into a bunch of separate insns.  */
4717 
4718 void
ix86_expand_prologue()4719 ix86_expand_prologue ()
4720 {
4721   rtx insn;
4722   bool pic_reg_used;
4723   struct ix86_frame frame;
4724   int use_mov = 0;
4725   HOST_WIDE_INT allocate;
4726 
4727   if (!optimize_size)
4728     {
4729       use_fast_prologue_epilogue
4730 	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4731       if (TARGET_PROLOGUE_USING_MOVE)
4732         use_mov = use_fast_prologue_epilogue;
4733     }
4734   ix86_compute_frame_layout (&frame);
4735 
4736   if (warn_stack_larger_than && frame.local_size > stack_larger_than_size)
4737     warning ("stack usage is %d bytes", frame.local_size);
4738 
4739   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4740      slower on all targets.  Also sdb doesn't like it.  */
4741 
4742   if (frame_pointer_needed)
4743     {
4744       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4745       RTX_FRAME_RELATED_P (insn) = 1;
4746 
4747       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4748       RTX_FRAME_RELATED_P (insn) = 1;
4749     }
4750 
4751   allocate = frame.to_allocate;
4752   /* In case we are dealing only with single register and empty frame,
4753      push is equivalent of the mov+add sequence.  */
4754   if (allocate == 0 && frame.nregs <= 1)
4755     use_mov = 0;
4756 
4757   if (!use_mov)
4758     ix86_emit_save_regs ();
4759   else
4760     allocate += frame.nregs * UNITS_PER_WORD;
4761 
4762   if (allocate == 0)
4763     ;
4764   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4765     {
4766       insn = emit_insn (gen_pro_epilogue_adjust_stack
4767 			(stack_pointer_rtx, stack_pointer_rtx,
4768 			 GEN_INT (-allocate)));
4769       RTX_FRAME_RELATED_P (insn) = 1;
4770     }
4771   else
4772     {
4773       /* ??? Is this only valid for Win32?  */
4774 
4775       rtx arg0, sym;
4776 
4777       if (TARGET_64BIT)
4778 	abort ();
4779 
4780       arg0 = gen_rtx_REG (SImode, 0);
4781       emit_move_insn (arg0, GEN_INT (allocate));
4782 
4783       sym = gen_rtx_MEM (FUNCTION_MODE,
4784 			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4785       insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4786 
4787       CALL_INSN_FUNCTION_USAGE (insn)
4788 	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4789 			     CALL_INSN_FUNCTION_USAGE (insn));
4790 
4791       /* Don't allow scheduling pass to move insns across __alloca
4792          call.  */
4793       emit_insn (gen_blockage (const0_rtx));
4794     }
4795   if (use_mov)
4796     {
4797       if (!frame_pointer_needed || !frame.to_allocate)
4798         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4799       else
4800         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4801 				       -frame.nregs * UNITS_PER_WORD);
4802     }
4803 
4804 #ifdef SUBTARGET_PROLOGUE
4805   SUBTARGET_PROLOGUE;
4806 #endif
4807 
4808   pic_reg_used = false;
4809   if (pic_offset_table_rtx
4810       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4811 	  || current_function_profile))
4812     {
4813       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4814 
4815       if (alt_pic_reg_used != INVALID_REGNUM)
4816 	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4817 
4818       pic_reg_used = true;
4819     }
4820 
4821   if (pic_reg_used)
4822     {
4823       insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4824 
4825       /* Even with accurate pre-reload life analysis, we can wind up
4826 	 deleting all references to the pic register after reload.
4827 	 Consider if cross-jumping unifies two sides of a branch
4828 	 controled by a comparison vs the only read from a global.
4829 	 In which case, allow the set_got to be deleted, though we're
4830 	 too late to do anything about the ebx save in the prologue.  */
4831       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4832     }
4833 
4834   /* Prevent function calls from be scheduled before the call to mcount.
4835      In the pic_reg_used case, make sure that the got load isn't deleted.  */
4836   if (current_function_profile)
4837     emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4838 }
4839 
4840 /* Emit code to restore saved registers using MOV insns.  First register
4841    is restored from POINTER + OFFSET.  */
4842 static void
ix86_emit_restore_regs_using_mov(pointer,offset,maybe_eh_return)4843 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4844      rtx pointer;
4845      int offset;
4846      int maybe_eh_return;
4847 {
4848   int regno;
4849 
4850   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4851     if (ix86_save_reg (regno, maybe_eh_return))
4852       {
4853 	emit_move_insn (gen_rtx_REG (Pmode, regno),
4854 			adjust_address (gen_rtx_MEM (Pmode, pointer),
4855 					Pmode, offset));
4856 	offset += UNITS_PER_WORD;
4857       }
4858 }
4859 
4860 /* Restore function stack, frame, and registers.  */
4861 
4862 void
ix86_expand_epilogue(style)4863 ix86_expand_epilogue (style)
4864      int style;
4865 {
4866   int regno;
4867   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4868   struct ix86_frame frame;
4869   HOST_WIDE_INT offset;
4870 
4871   ix86_compute_frame_layout (&frame);
4872 
4873   /* Calculate start of saved registers relative to ebp.  Special care
4874      must be taken for the normal return case of a function using
4875      eh_return: the eax and edx registers are marked as saved, but not
4876      restored along this path.  */
4877   offset = frame.nregs;
4878   if (current_function_calls_eh_return && style != 2)
4879     offset -= 2;
4880   offset *= -UNITS_PER_WORD;
4881 
4882   /* If we're only restoring one register and sp is not valid then
4883      using a move instruction to restore the register since it's
4884      less work than reloading sp and popping the register.
4885 
4886      The default code result in stack adjustment using add/lea instruction,
4887      while this code results in LEAVE instruction (or discrete equivalent),
4888      so it is profitable in some other cases as well.  Especially when there
4889      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
4890      and there is exactly one register to pop. This heruistic may need some
4891      tuning in future.  */
4892   if ((!sp_valid && frame.nregs <= 1)
4893       || (TARGET_EPILOGUE_USING_MOVE
4894 	  && use_fast_prologue_epilogue
4895 	  && (frame.nregs > 1 || frame.to_allocate))
4896       || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4897       || (frame_pointer_needed && TARGET_USE_LEAVE
4898 	  && use_fast_prologue_epilogue && frame.nregs == 1)
4899       || current_function_calls_eh_return)
4900     {
4901       /* Restore registers.  We can use ebp or esp to address the memory
4902 	 locations.  If both are available, default to ebp, since offsets
4903 	 are known to be small.  Only exception is esp pointing directly to the
4904 	 end of block of saved registers, where we may simplify addressing
4905 	 mode.  */
4906 
4907       if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4908 	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4909 					  frame.to_allocate, style == 2);
4910       else
4911 	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4912 					  offset, style == 2);
4913 
4914       /* eh_return epilogues need %ecx added to the stack pointer.  */
4915       if (style == 2)
4916 	{
4917 	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4918 
4919 	  if (frame_pointer_needed)
4920 	    {
4921 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4922 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
4923 	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4924 
4925 	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4926 	      emit_move_insn (hard_frame_pointer_rtx, tmp);
4927 
4928 	      emit_insn (gen_pro_epilogue_adjust_stack
4929 			 (stack_pointer_rtx, sa, const0_rtx));
4930 	    }
4931 	  else
4932 	    {
4933 	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4934 	      tmp = plus_constant (tmp, (frame.to_allocate
4935                                          + frame.nregs * UNITS_PER_WORD));
4936 	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4937 	    }
4938 	}
4939       else if (!frame_pointer_needed)
4940 	emit_insn (gen_pro_epilogue_adjust_stack
4941 		   (stack_pointer_rtx, stack_pointer_rtx,
4942 		    GEN_INT (frame.to_allocate
4943 			     + frame.nregs * UNITS_PER_WORD)));
4944       /* If not an i386, mov & pop is faster than "leave".  */
4945       else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4946 	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4947       else
4948 	{
4949 	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4950 						    hard_frame_pointer_rtx,
4951 						    const0_rtx));
4952 	  if (TARGET_64BIT)
4953 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4954 	  else
4955 	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4956 	}
4957     }
4958   else
4959     {
4960       /* First step is to deallocate the stack frame so that we can
4961 	 pop the registers.  */
4962       if (!sp_valid)
4963 	{
4964 	  if (!frame_pointer_needed)
4965 	    abort ();
4966           emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4967 						    hard_frame_pointer_rtx,
4968 						    GEN_INT (offset)));
4969 	}
4970       else if (frame.to_allocate)
4971 	emit_insn (gen_pro_epilogue_adjust_stack
4972 		   (stack_pointer_rtx, stack_pointer_rtx,
4973 		    GEN_INT (frame.to_allocate)));
4974 
4975       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4976 	if (ix86_save_reg (regno, false))
4977 	  {
4978 	    if (TARGET_64BIT)
4979 	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4980 	    else
4981 	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4982 	  }
4983       if (frame_pointer_needed)
4984 	{
4985 	  /* Leave results in shorter dependency chains on CPUs that are
4986 	     able to grok it fast.  */
4987 	  if (TARGET_USE_LEAVE)
4988 	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4989 	  else if (TARGET_64BIT)
4990 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4991 	  else
4992 	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4993 	}
4994     }
4995 
4996   /* Sibcall epilogues don't want a return instruction.  */
4997   if (style == 0)
4998     return;
4999 
5000   if (current_function_pops_args && current_function_args_size)
5001     {
5002       rtx popc = GEN_INT (current_function_pops_args);
5003 
5004       /* i386 can only pop 64K bytes.  If asked to pop more, pop
5005 	 return address, do explicit add, and jump indirectly to the
5006 	 caller.  */
5007 
5008       if (current_function_pops_args >= 65536)
5009 	{
5010 	  rtx ecx = gen_rtx_REG (SImode, 2);
5011 
5012 	  /* There are is no "pascal" calling convention in 64bit ABI.  */
5013 	  if (TARGET_64BIT)
5014 	    abort ();
5015 
5016 	  emit_insn (gen_popsi1 (ecx));
5017 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5018 	  emit_jump_insn (gen_return_indirect_internal (ecx));
5019 	}
5020       else
5021 	emit_jump_insn (gen_return_pop_internal (popc));
5022     }
5023   else
5024     emit_jump_insn (gen_return_internal ());
5025 }
5026 
5027 /* Reset from the function's potential modifications.  */
5028 
5029 static void
ix86_output_function_epilogue(file,size)5030 ix86_output_function_epilogue (file, size)
5031      FILE *file ATTRIBUTE_UNUSED;
5032      HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5033 {
5034   if (pic_offset_table_rtx)
5035     REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5036 }
5037 
5038 /* Extract the parts of an RTL expression that is a valid memory address
5039    for an instruction.  Return 0 if the structure of the address is
5040    grossly off.  Return -1 if the address contains ASHIFT, so it is not
5041    strictly valid, but still used for computing length of lea instruction.
5042    */
5043 
5044 static int
ix86_decompose_address(addr,out)5045 ix86_decompose_address (addr, out)
5046      register rtx addr;
5047      struct ix86_address *out;
5048 {
5049   rtx base = NULL_RTX;
5050   rtx index = NULL_RTX;
5051   rtx disp = NULL_RTX;
5052   HOST_WIDE_INT scale = 1;
5053   rtx scale_rtx = NULL_RTX;
5054   int retval = 1;
5055 
5056   if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5057     base = addr;
5058   else if (GET_CODE (addr) == PLUS)
5059     {
5060       rtx op0 = XEXP (addr, 0);
5061       rtx op1 = XEXP (addr, 1);
5062       enum rtx_code code0 = GET_CODE (op0);
5063       enum rtx_code code1 = GET_CODE (op1);
5064 
5065       if (code0 == REG || code0 == SUBREG)
5066 	{
5067 	  if (code1 == REG || code1 == SUBREG)
5068 	    index = op0, base = op1;	/* index + base */
5069 	  else
5070 	    base = op0, disp = op1;	/* base + displacement */
5071 	}
5072       else if (code0 == MULT)
5073 	{
5074 	  index = XEXP (op0, 0);
5075 	  scale_rtx = XEXP (op0, 1);
5076 	  if (code1 == REG || code1 == SUBREG)
5077 	    base = op1;			/* index*scale + base */
5078 	  else
5079 	    disp = op1;			/* index*scale + disp */
5080 	}
5081       else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5082 	{
5083 	  index = XEXP (XEXP (op0, 0), 0);	/* index*scale + base + disp */
5084 	  scale_rtx = XEXP (XEXP (op0, 0), 1);
5085 	  base = XEXP (op0, 1);
5086 	  disp = op1;
5087 	}
5088       else if (code0 == PLUS)
5089 	{
5090 	  index = XEXP (op0, 0);	/* index + base + disp */
5091 	  base = XEXP (op0, 1);
5092 	  disp = op1;
5093 	}
5094       else
5095 	return 0;
5096     }
5097   else if (GET_CODE (addr) == MULT)
5098     {
5099       index = XEXP (addr, 0);		/* index*scale */
5100       scale_rtx = XEXP (addr, 1);
5101     }
5102   else if (GET_CODE (addr) == ASHIFT)
5103     {
5104       rtx tmp;
5105 
5106       /* We're called for lea too, which implements ashift on occasion.  */
5107       index = XEXP (addr, 0);
5108       tmp = XEXP (addr, 1);
5109       if (GET_CODE (tmp) != CONST_INT)
5110 	return 0;
5111       scale = INTVAL (tmp);
5112       if ((unsigned HOST_WIDE_INT) scale > 3)
5113 	return 0;
5114       scale = 1 << scale;
5115       retval = -1;
5116     }
5117   else
5118     disp = addr;			/* displacement */
5119 
5120   /* Extract the integral value of scale.  */
5121   if (scale_rtx)
5122     {
5123       if (GET_CODE (scale_rtx) != CONST_INT)
5124 	return 0;
5125       scale = INTVAL (scale_rtx);
5126     }
5127 
5128   /* Allow arg pointer and stack pointer as index if there is not scaling */
5129   if (base && index && scale == 1
5130       && (index == arg_pointer_rtx || index == frame_pointer_rtx
5131           || index == stack_pointer_rtx))
5132     {
5133       rtx tmp = base;
5134       base = index;
5135       index = tmp;
5136     }
5137 
5138   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5139   if ((base == hard_frame_pointer_rtx
5140        || base == frame_pointer_rtx
5141        || base == arg_pointer_rtx) && !disp)
5142     disp = const0_rtx;
5143 
5144   /* Special case: on K6, [%esi] makes the instruction vector decoded.
5145      Avoid this by transforming to [%esi+0].  */
5146   if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5147       && base && !index && !disp
5148       && REG_P (base)
5149       && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5150     disp = const0_rtx;
5151 
5152   /* Special case: encode reg+reg instead of reg*2.  */
5153   if (!base && index && scale && scale == 2)
5154     base = index, scale = 1;
5155 
5156   /* Special case: scaling cannot be encoded without base or displacement.  */
5157   if (!base && !disp && index && scale != 1)
5158     disp = const0_rtx;
5159 
5160   out->base = base;
5161   out->index = index;
5162   out->disp = disp;
5163   out->scale = scale;
5164 
5165   return retval;
5166 }
5167 
5168 /* Return cost of the memory address x.
5169    For i386, it is better to use a complex address than let gcc copy
5170    the address into a reg and make a new pseudo.  But not if the address
5171    requires to two regs - that would mean more pseudos with longer
5172    lifetimes.  */
5173 int
ix86_address_cost(x)5174 ix86_address_cost (x)
5175      rtx x;
5176 {
5177   struct ix86_address parts;
5178   int cost = 1;
5179 
5180   if (!ix86_decompose_address (x, &parts))
5181     abort ();
5182 
5183   /* More complex memory references are better.  */
5184   if (parts.disp && parts.disp != const0_rtx)
5185     cost--;
5186 
5187   /* Attempt to minimize number of registers in the address.  */
5188   if ((parts.base
5189        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5190       || (parts.index
5191 	  && (!REG_P (parts.index)
5192 	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5193     cost++;
5194 
5195   if (parts.base
5196       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5197       && parts.index
5198       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5199       && parts.base != parts.index)
5200     cost++;
5201 
5202   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5203      since it's predecode logic can't detect the length of instructions
5204      and it degenerates to vector decoded.  Increase cost of such
5205      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5206      to split such addresses or even refuse such addresses at all.
5207 
5208      Following addressing modes are affected:
5209       [base+scale*index]
5210       [scale*index+disp]
5211       [base+index]
5212 
5213      The first and last case  may be avoidable by explicitly coding the zero in
5214      memory address, but I don't have AMD-K6 machine handy to check this
5215      theory.  */
5216 
5217   if (TARGET_K6
5218       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5219 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5220 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5221     cost += 10;
5222 
5223   return cost;
5224 }
5225 
5226 /* If X is a machine specific address (i.e. a symbol or label being
5227    referenced as a displacement from the GOT implemented using an
5228    UNSPEC), then return the base term.  Otherwise return X.  */
5229 
5230 rtx
ix86_find_base_term(x)5231 ix86_find_base_term (x)
5232      rtx x;
5233 {
5234   rtx term;
5235 
5236   if (TARGET_64BIT)
5237     {
5238       if (GET_CODE (x) != CONST)
5239 	return x;
5240       term = XEXP (x, 0);
5241       if (GET_CODE (term) == PLUS
5242 	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5243 	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5244 	term = XEXP (term, 0);
5245       if (GET_CODE (term) != UNSPEC
5246 	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5247 	return x;
5248 
5249       term = XVECEXP (term, 0, 0);
5250 
5251       if (GET_CODE (term) != SYMBOL_REF
5252 	  && GET_CODE (term) != LABEL_REF)
5253 	return x;
5254 
5255       return term;
5256     }
5257 
5258   if (GET_CODE (x) != PLUS
5259       || XEXP (x, 0) != pic_offset_table_rtx
5260       || GET_CODE (XEXP (x, 1)) != CONST)
5261     return x;
5262 
5263   term = XEXP (XEXP (x, 1), 0);
5264 
5265   if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5266     term = XEXP (term, 0);
5267 
5268   if (GET_CODE (term) != UNSPEC
5269       || XINT (term, 1) != UNSPEC_GOTOFF)
5270     return x;
5271 
5272   term = XVECEXP (term, 0, 0);
5273 
5274   if (GET_CODE (term) != SYMBOL_REF
5275       && GET_CODE (term) != LABEL_REF)
5276     return x;
5277 
5278   return term;
5279 }
5280 
5281 /* Determine if a given RTX is a valid constant.  We already know this
5282    satisfies CONSTANT_P.  */
5283 
5284 bool
legitimate_constant_p(x)5285 legitimate_constant_p (x)
5286      rtx x;
5287 {
5288   rtx inner;
5289 
5290   switch (GET_CODE (x))
5291     {
5292     case SYMBOL_REF:
5293       /* TLS symbols are not constant.  */
5294       if (tls_symbolic_operand (x, Pmode))
5295 	return false;
5296       break;
5297 
5298     case CONST:
5299       inner = XEXP (x, 0);
5300 
5301       /* Offsets of TLS symbols are never valid.
5302 	 Discourage CSE from creating them.  */
5303       if (GET_CODE (inner) == PLUS
5304 	  && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5305 	return false;
5306 
5307       /* Only some unspecs are valid as "constants".  */
5308       if (GET_CODE (inner) == UNSPEC)
5309 	switch (XINT (inner, 1))
5310 	  {
5311 	  case UNSPEC_TPOFF:
5312 	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5313 	  default:
5314 	    return false;
5315 	  }
5316       break;
5317 
5318     default:
5319       break;
5320     }
5321 
5322   /* Otherwise we handle everything else in the move patterns.  */
5323   return true;
5324 }
5325 
5326 /* Determine if it's legal to put X into the constant pool.  This
5327    is not possible for the address of thread-local symbols, which
5328    is checked above.  */
5329 
5330 static bool
ix86_cannot_force_const_mem(x)5331 ix86_cannot_force_const_mem (x)
5332      rtx x;
5333 {
5334   return !legitimate_constant_p (x);
5335 }
5336 
5337 /* Determine if a given RTX is a valid constant address.  */
5338 
5339 bool
constant_address_p(x)5340 constant_address_p (x)
5341      rtx x;
5342 {
5343   switch (GET_CODE (x))
5344     {
5345     case LABEL_REF:
5346     case CONST_INT:
5347       return true;
5348 
5349     case CONST_DOUBLE:
5350       return TARGET_64BIT;
5351 
5352     case CONST:
5353       /* For Mach-O, really believe the CONST.  */
5354       if (TARGET_MACHO)
5355 	return true;
5356       /* Otherwise fall through.  */
5357     case SYMBOL_REF:
5358       return !flag_pic && legitimate_constant_p (x);
5359 
5360     default:
5361       return false;
5362     }
5363 }
5364 
5365 /* Nonzero if the constant value X is a legitimate general operand
5366    when generating PIC code.  It is given that flag_pic is on and
5367    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5368 
5369 bool
legitimate_pic_operand_p(x)5370 legitimate_pic_operand_p (x)
5371      rtx x;
5372 {
5373   rtx inner;
5374 
5375   switch (GET_CODE (x))
5376     {
5377     case CONST:
5378       inner = XEXP (x, 0);
5379 
5380       /* Only some unspecs are valid as "constants".  */
5381       if (GET_CODE (inner) == UNSPEC)
5382 	switch (XINT (inner, 1))
5383 	  {
5384 	  case UNSPEC_TPOFF:
5385 	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5386 	  default:
5387 	    return false;
5388 	  }
5389       /* FALLTHRU */
5390 
5391     case SYMBOL_REF:
5392     case LABEL_REF:
5393       return legitimate_pic_address_disp_p (x);
5394 
5395     default:
5396       return true;
5397     }
5398 }
5399 
5400 /* Determine if a given CONST RTX is a valid memory displacement
5401    in PIC mode.  */
5402 
5403 int
legitimate_pic_address_disp_p(disp)5404 legitimate_pic_address_disp_p (disp)
5405      register rtx disp;
5406 {
5407   bool saw_plus;
5408 
5409   /* In 64bit mode we can allow direct addresses of symbols and labels
5410      when they are not dynamic symbols.  */
5411   if (TARGET_64BIT)
5412     {
5413       /* TLS references should always be enclosed in UNSPEC.  */
5414       if (tls_symbolic_operand (disp, GET_MODE (disp)))
5415 	return 0;
5416       if (GET_CODE (disp) == SYMBOL_REF
5417 	  && ix86_cmodel == CM_SMALL_PIC
5418 	  && (CONSTANT_POOL_ADDRESS_P (disp)
5419 	      || SYMBOL_REF_FLAG (disp)))
5420 	return 1;
5421       if (GET_CODE (disp) == LABEL_REF)
5422 	return 1;
5423       if (GET_CODE (disp) == CONST
5424 	  && GET_CODE (XEXP (disp, 0)) == PLUS
5425 	  && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5426 	       && ix86_cmodel == CM_SMALL_PIC
5427 	       && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5428 		   || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5429 	      || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5430 	  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5431 	  && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5432 	  && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5433 	return 1;
5434     }
5435   if (GET_CODE (disp) != CONST)
5436     return 0;
5437   disp = XEXP (disp, 0);
5438 
5439   if (TARGET_64BIT)
5440     {
5441       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5442          of GOT tables.  We should not need these anyway.  */
5443       if (GET_CODE (disp) != UNSPEC
5444 	  || XINT (disp, 1) != UNSPEC_GOTPCREL)
5445 	return 0;
5446 
5447       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5448 	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5449 	return 0;
5450       return 1;
5451     }
5452 
5453   saw_plus = false;
5454   if (GET_CODE (disp) == PLUS)
5455     {
5456       if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5457 	return 0;
5458       disp = XEXP (disp, 0);
5459       saw_plus = true;
5460     }
5461 
5462   /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O.  */
5463   if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5464     {
5465       if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5466           || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5467         if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5468           {
5469             const char *sym_name = XSTR (XEXP (disp, 1), 0);
5470             if (strstr (sym_name, "$pb") != 0)
5471               return 1;
5472           }
5473     }
5474 
5475   if (GET_CODE (disp) != UNSPEC)
5476     return 0;
5477 
5478   switch (XINT (disp, 1))
5479     {
5480     case UNSPEC_GOT:
5481       if (saw_plus)
5482 	return false;
5483       return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5484     case UNSPEC_GOTOFF:
5485       return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5486     case UNSPEC_GOTTPOFF:
5487     case UNSPEC_GOTNTPOFF:
5488     case UNSPEC_INDNTPOFF:
5489       if (saw_plus)
5490 	return false;
5491       return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5492     case UNSPEC_NTPOFF:
5493       return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5494     case UNSPEC_DTPOFF:
5495       return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5496     }
5497 
5498   return 0;
5499 }
5500 
5501 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5502    memory address for an instruction.  The MODE argument is the machine mode
5503    for the MEM expression that wants to use this address.
5504 
5505    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5506    convert common non-canonical forms to canonical form so that they will
5507    be recognized.  */
5508 
5509 int
legitimate_address_p(mode,addr,strict)5510 legitimate_address_p (mode, addr, strict)
5511      enum machine_mode mode;
5512      register rtx addr;
5513      int strict;
5514 {
5515   struct ix86_address parts;
5516   rtx base, index, disp;
5517   HOST_WIDE_INT scale;
5518   const char *reason = NULL;
5519   rtx reason_rtx = NULL_RTX;
5520 
5521   if (TARGET_DEBUG_ADDR)
5522     {
5523       fprintf (stderr,
5524 	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5525 	       GET_MODE_NAME (mode), strict);
5526       debug_rtx (addr);
5527     }
5528 
5529   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5530     {
5531       if (TARGET_DEBUG_ADDR)
5532 	fprintf (stderr, "Success.\n");
5533       return TRUE;
5534     }
5535 
5536   if (ix86_decompose_address (addr, &parts) <= 0)
5537     {
5538       reason = "decomposition failed";
5539       goto report_error;
5540     }
5541 
5542   base = parts.base;
5543   index = parts.index;
5544   disp = parts.disp;
5545   scale = parts.scale;
5546 
5547   /* Validate base register.
5548 
5549      Don't allow SUBREG's here, it can lead to spill failures when the base
5550      is one word out of a two word structure, which is represented internally
5551      as a DImode int.  */
5552 
5553   if (base)
5554     {
5555       reason_rtx = base;
5556 
5557       if (GET_CODE (base) != REG)
5558 	{
5559 	  reason = "base is not a register";
5560 	  goto report_error;
5561 	}
5562 
5563       if (GET_MODE (base) != Pmode)
5564 	{
5565 	  reason = "base is not in Pmode";
5566 	  goto report_error;
5567 	}
5568 
5569       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5570 	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5571 	{
5572 	  reason = "base is not valid";
5573 	  goto report_error;
5574 	}
5575     }
5576 
5577   /* Validate index register.
5578 
5579      Don't allow SUBREG's here, it can lead to spill failures when the index
5580      is one word out of a two word structure, which is represented internally
5581      as a DImode int.  */
5582 
5583   if (index)
5584     {
5585       reason_rtx = index;
5586 
5587       if (GET_CODE (index) != REG)
5588 	{
5589 	  reason = "index is not a register";
5590 	  goto report_error;
5591 	}
5592 
5593       if (GET_MODE (index) != Pmode)
5594 	{
5595 	  reason = "index is not in Pmode";
5596 	  goto report_error;
5597 	}
5598 
5599       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5600 	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5601 	{
5602 	  reason = "index is not valid";
5603 	  goto report_error;
5604 	}
5605     }
5606 
5607   /* Validate scale factor.  */
5608   if (scale != 1)
5609     {
5610       reason_rtx = GEN_INT (scale);
5611       if (!index)
5612 	{
5613 	  reason = "scale without index";
5614 	  goto report_error;
5615 	}
5616 
5617       if (scale != 2 && scale != 4 && scale != 8)
5618 	{
5619 	  reason = "scale is not a valid multiplier";
5620 	  goto report_error;
5621 	}
5622     }
5623 
5624   /* Validate displacement.  */
5625   if (disp)
5626     {
5627       reason_rtx = disp;
5628 
5629       if (GET_CODE (disp) == CONST
5630 	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5631 	switch (XINT (XEXP (disp, 0), 1))
5632 	  {
5633 	  case UNSPEC_GOT:
5634 	  case UNSPEC_GOTOFF:
5635 	  case UNSPEC_GOTPCREL:
5636 	    if (!flag_pic)
5637 	      abort ();
5638 	    goto is_legitimate_pic;
5639 
5640 	  case UNSPEC_GOTTPOFF:
5641 	  case UNSPEC_GOTNTPOFF:
5642 	  case UNSPEC_INDNTPOFF:
5643 	  case UNSPEC_NTPOFF:
5644 	  case UNSPEC_DTPOFF:
5645 	    break;
5646 
5647 	  default:
5648 	    reason = "invalid address unspec";
5649 	    goto report_error;
5650 	  }
5651 
5652       else if (flag_pic && (SYMBOLIC_CONST (disp)
5653 #if TARGET_MACHO
5654 			    && !machopic_operand_p (disp)
5655 #endif
5656 			    ))
5657 	{
5658 	is_legitimate_pic:
5659 	  if (TARGET_64BIT && (index || base))
5660 	    {
5661 	      /* foo@dtpoff(%rX) is ok.  */
5662 	      if (GET_CODE (disp) != CONST
5663 		  || GET_CODE (XEXP (disp, 0)) != PLUS
5664 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5665 		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5666 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5667 		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5668 		{
5669 		  reason = "non-constant pic memory reference";
5670 		  goto report_error;
5671 		}
5672 	    }
5673 	  else if (! legitimate_pic_address_disp_p (disp))
5674 	    {
5675 	      reason = "displacement is an invalid pic construct";
5676 	      goto report_error;
5677 	    }
5678 
5679           /* This code used to verify that a symbolic pic displacement
5680 	     includes the pic_offset_table_rtx register.
5681 
5682 	     While this is good idea, unfortunately these constructs may
5683 	     be created by "adds using lea" optimization for incorrect
5684 	     code like:
5685 
5686 	     int a;
5687 	     int foo(int i)
5688 	       {
5689 	         return *(&a+i);
5690 	       }
5691 
5692 	     This code is nonsensical, but results in addressing
5693 	     GOT table with pic_offset_table_rtx base.  We can't
5694 	     just refuse it easily, since it gets matched by
5695 	     "addsi3" pattern, that later gets split to lea in the
5696 	     case output register differs from input.  While this
5697 	     can be handled by separate addsi pattern for this case
5698 	     that never results in lea, this seems to be easier and
5699 	     correct fix for crash to disable this test.  */
5700 	}
5701       else if (!CONSTANT_ADDRESS_P (disp))
5702 	{
5703 	  reason = "displacement is not constant";
5704 	  goto report_error;
5705 	}
5706       else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5707 	{
5708 	  reason = "displacement is out of range";
5709 	  goto report_error;
5710 	}
5711       else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5712 	{
5713 	  reason = "displacement is a const_double";
5714 	  goto report_error;
5715 	}
5716     }
5717 
5718   /* Everything looks valid.  */
5719   if (TARGET_DEBUG_ADDR)
5720     fprintf (stderr, "Success.\n");
5721   return TRUE;
5722 
5723  report_error:
5724   if (TARGET_DEBUG_ADDR)
5725     {
5726       fprintf (stderr, "Error: %s\n", reason);
5727       debug_rtx (reason_rtx);
5728     }
5729   return FALSE;
5730 }
5731 
5732 /* Return an unique alias set for the GOT.  */
5733 
5734 static HOST_WIDE_INT
ix86_GOT_alias_set()5735 ix86_GOT_alias_set ()
5736 {
5737   static HOST_WIDE_INT set = -1;
5738   if (set == -1)
5739     set = new_alias_set ();
5740   return set;
5741 }
5742 
5743 /* Return a legitimate reference for ORIG (an address) using the
5744    register REG.  If REG is 0, a new pseudo is generated.
5745 
5746    There are two types of references that must be handled:
5747 
5748    1. Global data references must load the address from the GOT, via
5749       the PIC reg.  An insn is emitted to do this load, and the reg is
5750       returned.
5751 
5752    2. Static data references, constant pool addresses, and code labels
5753       compute the address as an offset from the GOT, whose base is in
5754       the PIC reg.  Static data objects have SYMBOL_REF_FLAG set to
5755       differentiate them from global data objects.  The returned
5756       address is the PIC reg + an unspec constant.
5757 
5758    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5759    reg also appears in the address.  */
5760 
5761 rtx
legitimize_pic_address(orig,reg)5762 legitimize_pic_address (orig, reg)
5763      rtx orig;
5764      rtx reg;
5765 {
5766   rtx addr = orig;
5767   rtx new = orig;
5768   rtx base;
5769 
5770 #if TARGET_MACHO
5771   if (reg == 0)
5772     reg = gen_reg_rtx (Pmode);
5773   /* Use the generic Mach-O PIC machinery.  */
5774   return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5775 #endif
5776 
5777   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5778     new = addr;
5779   else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5780     {
5781       /* This symbol may be referenced via a displacement from the PIC
5782 	 base address (@GOTOFF).  */
5783 
5784       if (reload_in_progress)
5785 	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5786       new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5787       new = gen_rtx_CONST (Pmode, new);
5788       new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5789 
5790       if (reg != 0)
5791 	{
5792 	  emit_move_insn (reg, new);
5793 	  new = reg;
5794 	}
5795     }
5796   else if (GET_CODE (addr) == SYMBOL_REF)
5797     {
5798       if (TARGET_64BIT)
5799 	{
5800 	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5801 	  new = gen_rtx_CONST (Pmode, new);
5802 	  new = gen_rtx_MEM (Pmode, new);
5803 	  RTX_UNCHANGING_P (new) = 1;
5804 	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5805 
5806 	  if (reg == 0)
5807 	    reg = gen_reg_rtx (Pmode);
5808 	  /* Use directly gen_movsi, otherwise the address is loaded
5809 	     into register for CSE.  We don't want to CSE this addresses,
5810 	     instead we CSE addresses from the GOT table, so skip this.  */
5811 	  emit_insn (gen_movsi (reg, new));
5812 	  new = reg;
5813 	}
5814       else
5815 	{
5816 	  /* This symbol must be referenced via a load from the
5817 	     Global Offset Table (@GOT).  */
5818 
5819 	  if (reload_in_progress)
5820 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5821 	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5822 	  new = gen_rtx_CONST (Pmode, new);
5823 	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5824 	  new = gen_rtx_MEM (Pmode, new);
5825 	  RTX_UNCHANGING_P (new) = 1;
5826 	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5827 
5828 	  if (reg == 0)
5829 	    reg = gen_reg_rtx (Pmode);
5830 	  emit_move_insn (reg, new);
5831 	  new = reg;
5832 	}
5833     }
5834   else
5835     {
5836       if (GET_CODE (addr) == CONST)
5837 	{
5838 	  addr = XEXP (addr, 0);
5839 
5840 	  /* We must match stuff we generate before.  Assume the only
5841 	     unspecs that can get here are ours.  Not that we could do
5842 	     anything with them anyway...  */
5843 	  if (GET_CODE (addr) == UNSPEC
5844 	      || (GET_CODE (addr) == PLUS
5845 		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5846 	    return orig;
5847 	  if (GET_CODE (addr) != PLUS)
5848 	    abort ();
5849 	}
5850       if (GET_CODE (addr) == PLUS)
5851 	{
5852 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5853 
5854 	  /* Check first to see if this is a constant offset from a @GOTOFF
5855 	     symbol reference.  */
5856 	  if (local_symbolic_operand (op0, Pmode)
5857 	      && GET_CODE (op1) == CONST_INT)
5858 	    {
5859 	      if (!TARGET_64BIT)
5860 		{
5861 		  if (reload_in_progress)
5862 		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5863 		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5864 					UNSPEC_GOTOFF);
5865 		  new = gen_rtx_PLUS (Pmode, new, op1);
5866 		  new = gen_rtx_CONST (Pmode, new);
5867 		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5868 
5869 		  if (reg != 0)
5870 		    {
5871 		      emit_move_insn (reg, new);
5872 		      new = reg;
5873 		    }
5874 		}
5875 	      else
5876 		{
5877 		  if (INTVAL (op1) < -16*1024*1024
5878 		      || INTVAL (op1) >= 16*1024*1024)
5879 		    new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5880 		}
5881 	    }
5882 	  else
5883 	    {
5884 	      base = legitimize_pic_address (XEXP (addr, 0), reg);
5885 	      new  = legitimize_pic_address (XEXP (addr, 1),
5886 					     base == reg ? NULL_RTX : reg);
5887 
5888 	      if (GET_CODE (new) == CONST_INT)
5889 		new = plus_constant (base, INTVAL (new));
5890 	      else
5891 		{
5892 		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5893 		    {
5894 		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5895 		      new = XEXP (new, 1);
5896 		    }
5897 		  new = gen_rtx_PLUS (Pmode, base, new);
5898 		}
5899 	    }
5900 	}
5901     }
5902   return new;
5903 }
5904 
5905 static void
ix86_encode_section_info(decl,first)5906 ix86_encode_section_info (decl, first)
5907      tree decl;
5908      int first ATTRIBUTE_UNUSED;
5909 {
5910   bool local_p = (*targetm.binds_local_p) (decl);
5911   rtx rtl, symbol;
5912 
5913   rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5914   if (GET_CODE (rtl) != MEM)
5915     return;
5916   symbol = XEXP (rtl, 0);
5917   if (GET_CODE (symbol) != SYMBOL_REF)
5918     return;
5919 
5920   /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5921      symbol so that we may access it directly in the GOT.  */
5922 
5923   if (flag_pic)
5924     SYMBOL_REF_FLAG (symbol) = local_p;
5925 
5926   /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5927      "local dynamic", "initial exec" or "local exec" TLS models
5928      respectively.  */
5929 
5930   if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5931     {
5932       const char *symbol_str;
5933       char *newstr;
5934       size_t len;
5935       enum tls_model kind = decl_tls_model (decl);
5936 
5937       if (TARGET_64BIT && ! flag_pic)
5938 	{
5939 	  /* x86-64 doesn't allow non-pic code for shared libraries,
5940 	     so don't generate GD/LD TLS models for non-pic code.  */
5941 	  switch (kind)
5942 	    {
5943 	    case TLS_MODEL_GLOBAL_DYNAMIC:
5944 	      kind = TLS_MODEL_INITIAL_EXEC; break;
5945 	    case TLS_MODEL_LOCAL_DYNAMIC:
5946 	      kind = TLS_MODEL_LOCAL_EXEC; break;
5947 	    default:
5948 	      break;
5949 	    }
5950 	}
5951 
5952       symbol_str = XSTR (symbol, 0);
5953 
5954       if (symbol_str[0] == '%')
5955 	{
5956 	  if (symbol_str[1] == tls_model_chars[kind])
5957 	    return;
5958 	  symbol_str += 2;
5959 	}
5960       len = strlen (symbol_str) + 1;
5961       newstr = alloca (len + 2);
5962 
5963       newstr[0] = '%';
5964       newstr[1] = tls_model_chars[kind];
5965       memcpy (newstr + 2, symbol_str, len);
5966 
5967       XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5968     }
5969 }
5970 
5971 /* Undo the above when printing symbol names.  */
5972 
5973 static const char *
ix86_strip_name_encoding(str)5974 ix86_strip_name_encoding (str)
5975      const char *str;
5976 {
5977   if (str[0] == '%')
5978     str += 2;
5979   if (str [0] == '*')
5980     str += 1;
5981   return str;
5982 }
5983 
5984 /* Load the thread pointer into a register.  */
5985 
5986 static rtx
get_thread_pointer()5987 get_thread_pointer ()
5988 {
5989   rtx tp;
5990 
5991   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5992   tp = gen_rtx_MEM (Pmode, tp);
5993   RTX_UNCHANGING_P (tp) = 1;
5994   set_mem_alias_set (tp, ix86_GOT_alias_set ());
5995   tp = force_reg (Pmode, tp);
5996 
5997   return tp;
5998 }
5999 
6000 /* Try machine-dependent ways of modifying an illegitimate address
6001    to be legitimate.  If we find one, return the new, valid address.
6002    This macro is used in only one place: `memory_address' in explow.c.
6003 
6004    OLDX is the address as it was before break_out_memory_refs was called.
6005    In some cases it is useful to look at this to decide what needs to be done.
6006 
6007    MODE and WIN are passed so that this macro can use
6008    GO_IF_LEGITIMATE_ADDRESS.
6009 
6010    It is always safe for this macro to do nothing.  It exists to recognize
6011    opportunities to optimize the output.
6012 
6013    For the 80386, we handle X+REG by loading X into a register R and
6014    using R+REG.  R will go in a general reg and indexing will be used.
6015    However, if REG is a broken-out memory address or multiplication,
6016    nothing needs to be done because REG can certainly go in a general reg.
6017 
6018    When -fpic is used, special handling is needed for symbolic references.
6019    See comments by legitimize_pic_address in i386.c for details.  */
6020 
6021 rtx
legitimize_address(x,oldx,mode)6022 legitimize_address (x, oldx, mode)
6023      register rtx x;
6024      register rtx oldx ATTRIBUTE_UNUSED;
6025      enum machine_mode mode;
6026 {
6027   int changed = 0;
6028   unsigned log;
6029 
6030   if (TARGET_DEBUG_ADDR)
6031     {
6032       fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6033 	       GET_MODE_NAME (mode));
6034       debug_rtx (x);
6035     }
6036 
6037   log = tls_symbolic_operand (x, mode);
6038   if (log)
6039     {
6040       rtx dest, base, off, pic;
6041       int type;
6042 
6043       switch (log)
6044         {
6045         case TLS_MODEL_GLOBAL_DYNAMIC:
6046 	  dest = gen_reg_rtx (Pmode);
6047 	  if (TARGET_64BIT)
6048 	    {
6049 	      rtx rax = gen_rtx_REG (Pmode, 0), insns;
6050 
6051 	      start_sequence ();
6052 	      emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6053 	      insns = get_insns ();
6054 	      end_sequence ();
6055 
6056 	      emit_libcall_block (insns, dest, rax, x);
6057 	    }
6058 	  else
6059 	    emit_insn (gen_tls_global_dynamic_32 (dest, x));
6060 	  break;
6061 
6062         case TLS_MODEL_LOCAL_DYNAMIC:
6063 	  base = gen_reg_rtx (Pmode);
6064 	  if (TARGET_64BIT)
6065 	    {
6066 	      rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6067 
6068 	      start_sequence ();
6069 	      emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6070 	      insns = get_insns ();
6071 	      end_sequence ();
6072 
6073 	      note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6074 	      note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6075 	      emit_libcall_block (insns, base, rax, note);
6076 	    }
6077 	  else
6078 	    emit_insn (gen_tls_local_dynamic_base_32 (base));
6079 
6080 	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6081 	  off = gen_rtx_CONST (Pmode, off);
6082 
6083 	  return gen_rtx_PLUS (Pmode, base, off);
6084 
6085         case TLS_MODEL_INITIAL_EXEC:
6086 	  if (TARGET_64BIT)
6087 	    {
6088 	      pic = NULL;
6089 	      type = UNSPEC_GOTNTPOFF;
6090 	    }
6091 	  else if (flag_pic)
6092 	    {
6093 	      if (reload_in_progress)
6094 		regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6095 	      pic = pic_offset_table_rtx;
6096 	      type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6097 	    }
6098 	  else if (!TARGET_GNU_TLS)
6099 	    {
6100 	      pic = gen_reg_rtx (Pmode);
6101 	      emit_insn (gen_set_got (pic));
6102 	      type = UNSPEC_GOTTPOFF;
6103 	    }
6104 	  else
6105 	    {
6106 	      pic = NULL;
6107 	      type = UNSPEC_INDNTPOFF;
6108 	    }
6109 
6110 	  base = get_thread_pointer ();
6111 
6112 	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6113 	  off = gen_rtx_CONST (Pmode, off);
6114 	  if (pic)
6115 	    off = gen_rtx_PLUS (Pmode, pic, off);
6116 	  off = gen_rtx_MEM (Pmode, off);
6117 	  RTX_UNCHANGING_P (off) = 1;
6118 	  set_mem_alias_set (off, ix86_GOT_alias_set ());
6119 	  dest = gen_reg_rtx (Pmode);
6120 
6121 	  if (TARGET_64BIT || TARGET_GNU_TLS)
6122 	    {
6123 	      emit_move_insn (dest, off);
6124 	      return gen_rtx_PLUS (Pmode, base, dest);
6125 	    }
6126 	  else
6127 	    emit_insn (gen_subsi3 (dest, base, off));
6128 	  break;
6129 
6130         case TLS_MODEL_LOCAL_EXEC:
6131 	  base = get_thread_pointer ();
6132 
6133 	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6134 				(TARGET_64BIT || TARGET_GNU_TLS)
6135 				? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6136 	  off = gen_rtx_CONST (Pmode, off);
6137 
6138 	  if (TARGET_64BIT || TARGET_GNU_TLS)
6139 	    return gen_rtx_PLUS (Pmode, base, off);
6140 	  else
6141 	    {
6142 	      dest = gen_reg_rtx (Pmode);
6143 	      emit_insn (gen_subsi3 (dest, base, off));
6144 	    }
6145 	  break;
6146 
6147 	default:
6148 	  abort ();
6149         }
6150 
6151       return dest;
6152     }
6153 
6154   if (flag_pic && SYMBOLIC_CONST (x))
6155     return legitimize_pic_address (x, 0);
6156 
6157   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6158   if (GET_CODE (x) == ASHIFT
6159       && GET_CODE (XEXP (x, 1)) == CONST_INT
6160       && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6161     {
6162       changed = 1;
6163       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6164 			GEN_INT (1 << log));
6165     }
6166 
6167   if (GET_CODE (x) == PLUS)
6168     {
6169       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6170 
6171       if (GET_CODE (XEXP (x, 0)) == ASHIFT
6172 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6173 	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6174 	{
6175 	  changed = 1;
6176 	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6177 				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6178 				      GEN_INT (1 << log));
6179 	}
6180 
6181       if (GET_CODE (XEXP (x, 1)) == ASHIFT
6182 	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6183 	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6184 	{
6185 	  changed = 1;
6186 	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6187 				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6188 				      GEN_INT (1 << log));
6189 	}
6190 
6191       /* Put multiply first if it isn't already.  */
6192       if (GET_CODE (XEXP (x, 1)) == MULT)
6193 	{
6194 	  rtx tmp = XEXP (x, 0);
6195 	  XEXP (x, 0) = XEXP (x, 1);
6196 	  XEXP (x, 1) = tmp;
6197 	  changed = 1;
6198 	}
6199 
6200       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6201 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6202 	 created by virtual register instantiation, register elimination, and
6203 	 similar optimizations.  */
6204       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6205 	{
6206 	  changed = 1;
6207 	  x = gen_rtx_PLUS (Pmode,
6208 			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6209 					  XEXP (XEXP (x, 1), 0)),
6210 			    XEXP (XEXP (x, 1), 1));
6211 	}
6212 
6213       /* Canonicalize
6214 	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6215 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6216       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6217 	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6218 	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6219 	       && CONSTANT_P (XEXP (x, 1)))
6220 	{
6221 	  rtx constant;
6222 	  rtx other = NULL_RTX;
6223 
6224 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6225 	    {
6226 	      constant = XEXP (x, 1);
6227 	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6228 	    }
6229 	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6230 	    {
6231 	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6232 	      other = XEXP (x, 1);
6233 	    }
6234 	  else
6235 	    constant = 0;
6236 
6237 	  if (constant)
6238 	    {
6239 	      changed = 1;
6240 	      x = gen_rtx_PLUS (Pmode,
6241 				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6242 					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6243 				plus_constant (other, INTVAL (constant)));
6244 	    }
6245 	}
6246 
6247       if (changed && legitimate_address_p (mode, x, FALSE))
6248 	return x;
6249 
6250       if (GET_CODE (XEXP (x, 0)) == MULT)
6251 	{
6252 	  changed = 1;
6253 	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6254 	}
6255 
6256       if (GET_CODE (XEXP (x, 1)) == MULT)
6257 	{
6258 	  changed = 1;
6259 	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6260 	}
6261 
6262       if (changed
6263 	  && GET_CODE (XEXP (x, 1)) == REG
6264 	  && GET_CODE (XEXP (x, 0)) == REG)
6265 	return x;
6266 
6267       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6268 	{
6269 	  changed = 1;
6270 	  x = legitimize_pic_address (x, 0);
6271 	}
6272 
6273       if (changed && legitimate_address_p (mode, x, FALSE))
6274 	return x;
6275 
6276       if (GET_CODE (XEXP (x, 0)) == REG)
6277 	{
6278 	  register rtx temp = gen_reg_rtx (Pmode);
6279 	  register rtx val  = force_operand (XEXP (x, 1), temp);
6280 	  if (val != temp)
6281 	    emit_move_insn (temp, val);
6282 
6283 	  XEXP (x, 1) = temp;
6284 	  return x;
6285 	}
6286 
6287       else if (GET_CODE (XEXP (x, 1)) == REG)
6288 	{
6289 	  register rtx temp = gen_reg_rtx (Pmode);
6290 	  register rtx val  = force_operand (XEXP (x, 0), temp);
6291 	  if (val != temp)
6292 	    emit_move_insn (temp, val);
6293 
6294 	  XEXP (x, 0) = temp;
6295 	  return x;
6296 	}
6297     }
6298 
6299   return x;
6300 }
6301 
6302 /* Print an integer constant expression in assembler syntax.  Addition
6303    and subtraction are the only arithmetic that may appear in these
6304    expressions.  FILE is the stdio stream to write to, X is the rtx, and
6305    CODE is the operand print code from the output string.  */
6306 
6307 static void
output_pic_addr_const(file,x,code)6308 output_pic_addr_const (file, x, code)
6309      FILE *file;
6310      rtx x;
6311      int code;
6312 {
6313   char buf[256];
6314 
6315   switch (GET_CODE (x))
6316     {
6317     case PC:
6318       if (flag_pic)
6319 	putc ('.', file);
6320       else
6321 	abort ();
6322       break;
6323 
6324     case SYMBOL_REF:
6325       assemble_name (file, XSTR (x, 0));
6326       if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6327 	fputs ("@PLT", file);
6328       break;
6329 
6330     case LABEL_REF:
6331       x = XEXP (x, 0);
6332       /* FALLTHRU */
6333     case CODE_LABEL:
6334       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6335       assemble_name (asm_out_file, buf);
6336       break;
6337 
6338     case CONST_INT:
6339       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6340       break;
6341 
6342     case CONST:
6343       /* This used to output parentheses around the expression,
6344 	 but that does not work on the 386 (either ATT or BSD assembler).  */
6345       output_pic_addr_const (file, XEXP (x, 0), code);
6346       break;
6347 
6348     case CONST_DOUBLE:
6349       if (GET_MODE (x) == VOIDmode)
6350 	{
6351 	  /* We can use %d if the number is <32 bits and positive.  */
6352 	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6353 	    fprintf (file, "0x%lx%08lx",
6354 		     (unsigned long) CONST_DOUBLE_HIGH (x),
6355 		     (unsigned long) CONST_DOUBLE_LOW (x));
6356 	  else
6357 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6358 	}
6359       else
6360 	/* We can't handle floating point constants;
6361 	   PRINT_OPERAND must handle them.  */
6362 	output_operand_lossage ("floating constant misused");
6363       break;
6364 
6365     case PLUS:
6366       /* Some assemblers need integer constants to appear first.  */
6367       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6368 	{
6369 	  output_pic_addr_const (file, XEXP (x, 0), code);
6370 	  putc ('+', file);
6371 	  output_pic_addr_const (file, XEXP (x, 1), code);
6372 	}
6373       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6374 	{
6375 	  output_pic_addr_const (file, XEXP (x, 1), code);
6376 	  putc ('+', file);
6377 	  output_pic_addr_const (file, XEXP (x, 0), code);
6378 	}
6379       else
6380 	abort ();
6381       break;
6382 
6383     case MINUS:
6384       if (!TARGET_MACHO)
6385 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6386       output_pic_addr_const (file, XEXP (x, 0), code);
6387       putc ('-', file);
6388       output_pic_addr_const (file, XEXP (x, 1), code);
6389       if (!TARGET_MACHO)
6390 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6391       break;
6392 
6393      case UNSPEC:
6394        if (XVECLEN (x, 0) != 1)
6395 	 abort ();
6396        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6397        switch (XINT (x, 1))
6398 	{
6399 	case UNSPEC_GOT:
6400 	  fputs ("@GOT", file);
6401 	  break;
6402 	case UNSPEC_GOTOFF:
6403 	  fputs ("@GOTOFF", file);
6404 	  break;
6405 	case UNSPEC_GOTPCREL:
6406 	  fputs ("@GOTPCREL(%rip)", file);
6407 	  break;
6408 	case UNSPEC_GOTTPOFF:
6409 	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6410 	  fputs ("@GOTTPOFF", file);
6411 	  break;
6412 	case UNSPEC_TPOFF:
6413 	  fputs ("@TPOFF", file);
6414 	  break;
6415 	case UNSPEC_NTPOFF:
6416 	  if (TARGET_64BIT)
6417 	    fputs ("@TPOFF", file);
6418 	  else
6419 	    fputs ("@NTPOFF", file);
6420 	  break;
6421 	case UNSPEC_DTPOFF:
6422 	  fputs ("@DTPOFF", file);
6423 	  break;
6424 	case UNSPEC_GOTNTPOFF:
6425 	  if (TARGET_64BIT)
6426 	    fputs ("@GOTTPOFF(%rip)", file);
6427 	  else
6428 	    fputs ("@GOTNTPOFF", file);
6429 	  break;
6430 	case UNSPEC_INDNTPOFF:
6431 	  fputs ("@INDNTPOFF", file);
6432 	  break;
6433 	default:
6434 	  output_operand_lossage ("invalid UNSPEC as operand");
6435 	  break;
6436 	}
6437        break;
6438 
6439     default:
6440       output_operand_lossage ("invalid expression as operand");
6441     }
6442 }
6443 
6444 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6445    We need to handle our special PIC relocations.  */
6446 
6447 void
i386_dwarf_output_addr_const(file,x)6448 i386_dwarf_output_addr_const (file, x)
6449      FILE *file;
6450      rtx x;
6451 {
6452 #ifdef ASM_QUAD
6453   fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6454 #else
6455   if (TARGET_64BIT)
6456     abort ();
6457   fprintf (file, "%s", ASM_LONG);
6458 #endif
6459   if (flag_pic)
6460     output_pic_addr_const (file, x, '\0');
6461   else
6462     output_addr_const (file, x);
6463   fputc ('\n', file);
6464 }
6465 
6466 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6467    We need to emit DTP-relative relocations.  */
6468 
6469 void
i386_output_dwarf_dtprel(file,size,x)6470 i386_output_dwarf_dtprel (file, size, x)
6471      FILE *file;
6472      int size;
6473      rtx x;
6474 {
6475   fputs (ASM_LONG, file);
6476   output_addr_const (file, x);
6477   fputs ("@DTPOFF", file);
6478   switch (size)
6479     {
6480     case 4:
6481       break;
6482     case 8:
6483       fputs (", 0", file);
6484       break;
6485     default:
6486       abort ();
6487    }
6488 }
6489 
6490 /* In the name of slightly smaller debug output, and to cater to
6491    general assembler losage, recognize PIC+GOTOFF and turn it back
6492    into a direct symbol reference.  */
6493 
6494 rtx
i386_simplify_dwarf_addr(orig_x)6495 i386_simplify_dwarf_addr (orig_x)
6496      rtx orig_x;
6497 {
6498   rtx x = orig_x, y;
6499 
6500   if (GET_CODE (x) == MEM)
6501     x = XEXP (x, 0);
6502 
6503   if (TARGET_64BIT)
6504     {
6505       if (GET_CODE (x) != CONST
6506 	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6507 	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6508 	  || GET_CODE (orig_x) != MEM)
6509 	return orig_x;
6510       return XVECEXP (XEXP (x, 0), 0, 0);
6511     }
6512 
6513   if (GET_CODE (x) != PLUS
6514       || GET_CODE (XEXP (x, 1)) != CONST)
6515     return orig_x;
6516 
6517   if (GET_CODE (XEXP (x, 0)) == REG
6518       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6519     /* %ebx + GOT/GOTOFF */
6520     y = NULL;
6521   else if (GET_CODE (XEXP (x, 0)) == PLUS)
6522     {
6523       /* %ebx + %reg * scale + GOT/GOTOFF */
6524       y = XEXP (x, 0);
6525       if (GET_CODE (XEXP (y, 0)) == REG
6526 	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6527 	y = XEXP (y, 1);
6528       else if (GET_CODE (XEXP (y, 1)) == REG
6529 	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6530 	y = XEXP (y, 0);
6531       else
6532 	return orig_x;
6533       if (GET_CODE (y) != REG
6534 	  && GET_CODE (y) != MULT
6535 	  && GET_CODE (y) != ASHIFT)
6536 	return orig_x;
6537     }
6538   else
6539     return orig_x;
6540 
6541   x = XEXP (XEXP (x, 1), 0);
6542   if (GET_CODE (x) == UNSPEC
6543       && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6544 	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6545     {
6546       if (y)
6547 	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6548       return XVECEXP (x, 0, 0);
6549     }
6550 
6551   if (GET_CODE (x) == PLUS
6552       && GET_CODE (XEXP (x, 0)) == UNSPEC
6553       && GET_CODE (XEXP (x, 1)) == CONST_INT
6554       && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6555 	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6556 	      && GET_CODE (orig_x) != MEM)))
6557     {
6558       x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6559       if (y)
6560 	return gen_rtx_PLUS (Pmode, y, x);
6561       return x;
6562     }
6563 
6564   return orig_x;
6565 }
6566 
6567 static void
put_condition_code(code,mode,reverse,fp,file)6568 put_condition_code (code, mode, reverse, fp, file)
6569      enum rtx_code code;
6570      enum machine_mode mode;
6571      int reverse, fp;
6572      FILE *file;
6573 {
6574   const char *suffix;
6575 
6576   if (mode == CCFPmode || mode == CCFPUmode)
6577     {
6578       enum rtx_code second_code, bypass_code;
6579       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6580       if (bypass_code != NIL || second_code != NIL)
6581 	abort ();
6582       code = ix86_fp_compare_code_to_integer (code);
6583       mode = CCmode;
6584     }
6585   if (reverse)
6586     code = reverse_condition (code);
6587 
6588   switch (code)
6589     {
6590     case EQ:
6591       suffix = "e";
6592       break;
6593     case NE:
6594       suffix = "ne";
6595       break;
6596     case GT:
6597       if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6598 	abort ();
6599       suffix = "g";
6600       break;
6601     case GTU:
6602       /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6603 	 Those same assemblers have the same but opposite losage on cmov.  */
6604       if (mode != CCmode)
6605 	abort ();
6606       suffix = fp ? "nbe" : "a";
6607       break;
6608     case LT:
6609       if (mode == CCNOmode || mode == CCGOCmode)
6610 	suffix = "s";
6611       else if (mode == CCmode || mode == CCGCmode)
6612 	suffix = "l";
6613       else
6614 	abort ();
6615       break;
6616     case LTU:
6617       if (mode != CCmode)
6618 	abort ();
6619       suffix = "b";
6620       break;
6621     case GE:
6622       if (mode == CCNOmode || mode == CCGOCmode)
6623 	suffix = "ns";
6624       else if (mode == CCmode || mode == CCGCmode)
6625 	suffix = "ge";
6626       else
6627 	abort ();
6628       break;
6629     case GEU:
6630       /* ??? As above.  */
6631       if (mode != CCmode)
6632 	abort ();
6633       suffix = fp ? "nb" : "ae";
6634       break;
6635     case LE:
6636       if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6637 	abort ();
6638       suffix = "le";
6639       break;
6640     case LEU:
6641       if (mode != CCmode)
6642 	abort ();
6643       suffix = "be";
6644       break;
6645     case UNORDERED:
6646       suffix = fp ? "u" : "p";
6647       break;
6648     case ORDERED:
6649       suffix = fp ? "nu" : "np";
6650       break;
6651     default:
6652       abort ();
6653     }
6654   fputs (suffix, file);
6655 }
6656 
6657 void
print_reg(x,code,file)6658 print_reg (x, code, file)
6659      rtx x;
6660      int code;
6661      FILE *file;
6662 {
6663   if (REGNO (x) == ARG_POINTER_REGNUM
6664       || REGNO (x) == FRAME_POINTER_REGNUM
6665       || REGNO (x) == FLAGS_REG
6666       || REGNO (x) == FPSR_REG)
6667     abort ();
6668 
6669   if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6670     putc ('%', file);
6671 
6672   if (code == 'w' || MMX_REG_P (x))
6673     code = 2;
6674   else if (code == 'b')
6675     code = 1;
6676   else if (code == 'k')
6677     code = 4;
6678   else if (code == 'q')
6679     code = 8;
6680   else if (code == 'y')
6681     code = 3;
6682   else if (code == 'h')
6683     code = 0;
6684   else
6685     code = GET_MODE_SIZE (GET_MODE (x));
6686 
6687   /* Irritatingly, AMD extended registers use different naming convention
6688      from the normal registers.  */
6689   if (REX_INT_REG_P (x))
6690     {
6691       if (!TARGET_64BIT)
6692 	abort ();
6693       switch (code)
6694 	{
6695 	  case 0:
6696 	    error ("extended registers have no high halves");
6697 	    break;
6698 	  case 1:
6699 	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6700 	    break;
6701 	  case 2:
6702 	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6703 	    break;
6704 	  case 4:
6705 	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6706 	    break;
6707 	  case 8:
6708 	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6709 	    break;
6710 	  default:
6711 	    error ("unsupported operand size for extended register");
6712 	    break;
6713 	}
6714       return;
6715     }
6716   switch (code)
6717     {
6718     case 3:
6719       if (STACK_TOP_P (x))
6720 	{
6721 	  fputs ("st(0)", file);
6722 	  break;
6723 	}
6724       /* FALLTHRU */
6725     case 8:
6726     case 4:
6727     case 12:
6728       if (! ANY_FP_REG_P (x))
6729 	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6730       /* FALLTHRU */
6731     case 16:
6732     case 2:
6733       fputs (hi_reg_name[REGNO (x)], file);
6734       break;
6735     case 1:
6736       fputs (qi_reg_name[REGNO (x)], file);
6737       break;
6738     case 0:
6739       fputs (qi_high_reg_name[REGNO (x)], file);
6740       break;
6741     default:
6742       abort ();
6743     }
6744 }
6745 
6746 /* Locate some local-dynamic symbol still in use by this function
6747    so that we can print its name in some tls_local_dynamic_base
6748    pattern.  */
6749 
6750 static const char *
get_some_local_dynamic_name()6751 get_some_local_dynamic_name ()
6752 {
6753   rtx insn;
6754 
6755   if (cfun->machine->some_ld_name)
6756     return cfun->machine->some_ld_name;
6757 
6758   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6759     if (INSN_P (insn)
6760 	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6761       return cfun->machine->some_ld_name;
6762 
6763   abort ();
6764 }
6765 
6766 static int
get_some_local_dynamic_name_1(px,data)6767 get_some_local_dynamic_name_1 (px, data)
6768      rtx *px;
6769      void *data ATTRIBUTE_UNUSED;
6770 {
6771   rtx x = *px;
6772 
6773   if (GET_CODE (x) == SYMBOL_REF
6774       && local_dynamic_symbolic_operand (x, Pmode))
6775     {
6776       cfun->machine->some_ld_name = XSTR (x, 0);
6777       return 1;
6778     }
6779 
6780   return 0;
6781 }
6782 
6783 /* Meaning of CODE:
6784    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6785    C -- print opcode suffix for set/cmov insn.
6786    c -- like C, but print reversed condition
6787    F,f -- likewise, but for floating-point.
6788    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6789         otherwise nothing
6790    R -- print the prefix for register names.
6791    z -- print the opcode suffix for the size of the current operand.
6792    * -- print a star (in certain assembler syntax)
6793    A -- print an absolute memory reference.
6794    w -- print the operand as if it's a "word" (HImode) even if it isn't.
6795    s -- print a shift double count, followed by the assemblers argument
6796 	delimiter.
6797    b -- print the QImode name of the register for the indicated operand.
6798 	%b0 would print %al if operands[0] is reg 0.
6799    w --  likewise, print the HImode name of the register.
6800    k --  likewise, print the SImode name of the register.
6801    q --  likewise, print the DImode name of the register.
6802    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6803    y -- print "st(0)" instead of "st" as a register.
6804    D -- print condition for SSE cmp instruction.
6805    P -- if PIC, print an @PLT suffix.
6806    X -- don't print any sort of PIC '@' suffix for a symbol.
6807    & -- print some in-use local-dynamic symbol name.
6808  */
6809 
6810 void
print_operand(file,x,code)6811 print_operand (file, x, code)
6812      FILE *file;
6813      rtx x;
6814      int code;
6815 {
6816   if (code)
6817     {
6818       switch (code)
6819 	{
6820 	case '*':
6821 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6822 	    putc ('*', file);
6823 	  return;
6824 
6825 	case '&':
6826 	  assemble_name (file, get_some_local_dynamic_name ());
6827 	  return;
6828 
6829 	case 'A':
6830 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6831 	    putc ('*', file);
6832 	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
6833 	    {
6834 	      /* Intel syntax. For absolute addresses, registers should not
6835 		 be surrounded by braces.  */
6836 	      if (GET_CODE (x) != REG)
6837 		{
6838 		  putc ('[', file);
6839 		  PRINT_OPERAND (file, x, 0);
6840 		  putc (']', file);
6841 		  return;
6842 		}
6843 	    }
6844 	  else
6845 	    abort ();
6846 
6847 	  PRINT_OPERAND (file, x, 0);
6848 	  return;
6849 
6850 
6851 	case 'L':
6852 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6853 	    putc ('l', file);
6854 	  return;
6855 
6856 	case 'W':
6857 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6858 	    putc ('w', file);
6859 	  return;
6860 
6861 	case 'B':
6862 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6863 	    putc ('b', file);
6864 	  return;
6865 
6866 	case 'Q':
6867 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6868 	    putc ('l', file);
6869 	  return;
6870 
6871 	case 'S':
6872 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6873 	    putc ('s', file);
6874 	  return;
6875 
6876 	case 'T':
6877 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6878 	    putc ('t', file);
6879 	  return;
6880 
6881 	case 'z':
6882 	  /* 387 opcodes don't get size suffixes if the operands are
6883 	     registers.  */
6884 	  if (STACK_REG_P (x))
6885 	    return;
6886 
6887 	  /* Likewise if using Intel opcodes.  */
6888 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
6889 	    return;
6890 
6891 	  /* This is the size of op from size of operand.  */
6892 	  switch (GET_MODE_SIZE (GET_MODE (x)))
6893 	    {
6894 	    case 2:
6895 #ifdef HAVE_GAS_FILDS_FISTS
6896 	      putc ('s', file);
6897 #endif
6898 	      return;
6899 
6900 	    case 4:
6901 	      if (GET_MODE (x) == SFmode)
6902 		{
6903 		  putc ('s', file);
6904 		  return;
6905 		}
6906 	      else
6907 		putc ('l', file);
6908 	      return;
6909 
6910 	    case 12:
6911 	    case 16:
6912 	      putc ('t', file);
6913 	      return;
6914 
6915 	    case 8:
6916 	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6917 		{
6918 #ifdef GAS_MNEMONICS
6919 		  putc ('q', file);
6920 #else
6921 		  putc ('l', file);
6922 		  putc ('l', file);
6923 #endif
6924 		}
6925 	      else
6926 	        putc ('l', file);
6927 	      return;
6928 
6929 	    default:
6930 	      abort ();
6931 	    }
6932 
6933 	case 'b':
6934 	case 'w':
6935 	case 'k':
6936 	case 'q':
6937 	case 'h':
6938 	case 'y':
6939 	case 'X':
6940 	case 'P':
6941 	  break;
6942 
6943 	case 's':
6944 	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6945 	    {
6946 	      PRINT_OPERAND (file, x, 0);
6947 	      putc (',', file);
6948 	    }
6949 	  return;
6950 
6951 	case 'D':
6952 	  /* Little bit of braindamage here.  The SSE compare instructions
6953 	     does use completely different names for the comparisons that the
6954 	     fp conditional moves.  */
6955 	  switch (GET_CODE (x))
6956 	    {
6957 	    case EQ:
6958 	    case UNEQ:
6959 	      fputs ("eq", file);
6960 	      break;
6961 	    case LT:
6962 	    case UNLT:
6963 	      fputs ("lt", file);
6964 	      break;
6965 	    case LE:
6966 	    case UNLE:
6967 	      fputs ("le", file);
6968 	      break;
6969 	    case UNORDERED:
6970 	      fputs ("unord", file);
6971 	      break;
6972 	    case NE:
6973 	    case LTGT:
6974 	      fputs ("neq", file);
6975 	      break;
6976 	    case UNGE:
6977 	    case GE:
6978 	      fputs ("nlt", file);
6979 	      break;
6980 	    case UNGT:
6981 	    case GT:
6982 	      fputs ("nle", file);
6983 	      break;
6984 	    case ORDERED:
6985 	      fputs ("ord", file);
6986 	      break;
6987 	    default:
6988 	      abort ();
6989 	      break;
6990 	    }
6991 	  return;
6992 	case 'O':
6993 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6994 	  if (ASSEMBLER_DIALECT == ASM_ATT)
6995 	    {
6996 	      switch (GET_MODE (x))
6997 		{
6998 		case HImode: putc ('w', file); break;
6999 		case SImode:
7000 		case SFmode: putc ('l', file); break;
7001 		case DImode:
7002 		case DFmode: putc ('q', file); break;
7003 		default: abort ();
7004 		}
7005 	      putc ('.', file);
7006 	    }
7007 #endif
7008 	  return;
7009 	case 'C':
7010 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7011 	  return;
7012 	case 'F':
7013 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7014 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7015 	    putc ('.', file);
7016 #endif
7017 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7018 	  return;
7019 
7020 	  /* Like above, but reverse condition */
7021 	case 'c':
7022 	  /* Check to see if argument to %c is really a constant
7023 	     and not a condition code which needs to be reversed.  */
7024 	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7025 	  {
7026 	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7027 	     return;
7028 	  }
7029 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7030 	  return;
7031 	case 'f':
7032 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7033 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7034 	    putc ('.', file);
7035 #endif
7036 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7037 	  return;
7038 	case '+':
7039 	  {
7040 	    rtx x;
7041 
7042 	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7043 	      return;
7044 
7045 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7046 	    if (x)
7047 	      {
7048 		int pred_val = INTVAL (XEXP (x, 0));
7049 
7050 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7051 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7052 		  {
7053 		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7054 		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7055 
7056 		    /* Emit hints only in the case default branch prediction
7057 		       heruistics would fail.  */
7058 		    if (taken != cputaken)
7059 		      {
7060 			/* We use 3e (DS) prefix for taken branches and
7061 			   2e (CS) prefix for not taken branches.  */
7062 			if (taken)
7063 			  fputs ("ds ; ", file);
7064 			else
7065 			  fputs ("cs ; ", file);
7066 		      }
7067 		  }
7068 	      }
7069 	    return;
7070 	  }
7071 	default:
7072 	    output_operand_lossage ("invalid operand code `%c'", code);
7073 	}
7074     }
7075 
7076   if (GET_CODE (x) == REG)
7077     {
7078       PRINT_REG (x, code, file);
7079     }
7080 
7081   else if (GET_CODE (x) == MEM)
7082     {
7083       /* No `byte ptr' prefix for call instructions.  */
7084       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7085 	{
7086 	  const char * size;
7087 	  switch (GET_MODE_SIZE (GET_MODE (x)))
7088 	    {
7089 	    case 1: size = "BYTE"; break;
7090 	    case 2: size = "WORD"; break;
7091 	    case 4: size = "DWORD"; break;
7092 	    case 8: size = "QWORD"; break;
7093 	    case 12: size = "XWORD"; break;
7094 	    case 16: size = "XMMWORD"; break;
7095 	    default:
7096 	      abort ();
7097 	    }
7098 
7099 	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7100 	  if (code == 'b')
7101 	    size = "BYTE";
7102 	  else if (code == 'w')
7103 	    size = "WORD";
7104 	  else if (code == 'k')
7105 	    size = "DWORD";
7106 
7107 	  fputs (size, file);
7108 	  fputs (" PTR ", file);
7109 	}
7110 
7111       x = XEXP (x, 0);
7112       if (flag_pic && CONSTANT_ADDRESS_P (x))
7113 	output_pic_addr_const (file, x, code);
7114       /* Avoid (%rip) for call operands.  */
7115       else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7116 	       && GET_CODE (x) != CONST_INT)
7117 	output_addr_const (file, x);
7118       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7119 	output_operand_lossage ("invalid constraints for operand");
7120       else
7121 	output_address (x);
7122     }
7123 
7124   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7125     {
7126       REAL_VALUE_TYPE r;
7127       long l;
7128 
7129       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7130       REAL_VALUE_TO_TARGET_SINGLE (r, l);
7131 
7132       if (ASSEMBLER_DIALECT == ASM_ATT)
7133 	putc ('$', file);
7134       fprintf (file, "0x%lx", l);
7135     }
7136 
7137  /* These float cases don't actually occur as immediate operands.  */
7138  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7139     {
7140       char dstr[30];
7141 
7142       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7143       fprintf (file, "%s", dstr);
7144     }
7145 
7146   else if (GET_CODE (x) == CONST_DOUBLE
7147 	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7148     {
7149       char dstr[30];
7150 
7151       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7152       fprintf (file, "%s", dstr);
7153     }
7154 
7155   else
7156     {
7157       if (code != 'P')
7158 	{
7159 	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7160 	    {
7161 	      if (ASSEMBLER_DIALECT == ASM_ATT)
7162 		putc ('$', file);
7163 	    }
7164 	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7165 		   || GET_CODE (x) == LABEL_REF)
7166 	    {
7167 	      if (ASSEMBLER_DIALECT == ASM_ATT)
7168 		putc ('$', file);
7169 	      else
7170 		fputs ("OFFSET FLAT:", file);
7171 	    }
7172 	}
7173       if (GET_CODE (x) == CONST_INT)
7174 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7175       else if (flag_pic)
7176 	output_pic_addr_const (file, x, code);
7177       else
7178 	output_addr_const (file, x);
7179     }
7180 }
7181 
7182 /* Print a memory operand whose address is ADDR.  */
7183 
7184 void
print_operand_address(file,addr)7185 print_operand_address (file, addr)
7186      FILE *file;
7187      register rtx addr;
7188 {
7189   struct ix86_address parts;
7190   rtx base, index, disp;
7191   int scale;
7192 
7193   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7194     {
7195       if (ASSEMBLER_DIALECT == ASM_INTEL)
7196 	fputs ("DWORD PTR ", file);
7197       if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7198 	putc ('%', file);
7199       if (TARGET_64BIT)
7200 	fputs ("fs:0", file);
7201       else
7202 	fputs ("gs:0", file);
7203       return;
7204     }
7205 
7206   if (! ix86_decompose_address (addr, &parts))
7207     abort ();
7208 
7209   base = parts.base;
7210   index = parts.index;
7211   disp = parts.disp;
7212   scale = parts.scale;
7213 
7214   if (!base && !index)
7215     {
7216       /* Displacement only requires special attention.  */
7217 
7218       if (GET_CODE (disp) == CONST_INT)
7219 	{
7220 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7221 	    {
7222 	      if (USER_LABEL_PREFIX[0] == 0)
7223 		putc ('%', file);
7224 	      fputs ("ds:", file);
7225 	    }
7226 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7227 	}
7228       else if (flag_pic)
7229 	output_pic_addr_const (file, addr, 0);
7230       else
7231 	output_addr_const (file, addr);
7232 
7233       /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7234       if (TARGET_64BIT
7235 	  && ((GET_CODE (addr) == SYMBOL_REF
7236 	       && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7237 	      || GET_CODE (addr) == LABEL_REF
7238 	      || (GET_CODE (addr) == CONST
7239 		  && GET_CODE (XEXP (addr, 0)) == PLUS
7240 		  && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7241 		      || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7242 		  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7243 	fputs ("(%rip)", file);
7244     }
7245   else
7246     {
7247       if (ASSEMBLER_DIALECT == ASM_ATT)
7248 	{
7249 	  if (disp)
7250 	    {
7251 	      if (flag_pic)
7252 		output_pic_addr_const (file, disp, 0);
7253 	      else if (GET_CODE (disp) == LABEL_REF)
7254 		output_asm_label (disp);
7255 	      else
7256 		output_addr_const (file, disp);
7257 	    }
7258 
7259 	  putc ('(', file);
7260 	  if (base)
7261 	    PRINT_REG (base, 0, file);
7262 	  if (index)
7263 	    {
7264 	      putc (',', file);
7265 	      PRINT_REG (index, 0, file);
7266 	      if (scale != 1)
7267 		fprintf (file, ",%d", scale);
7268 	    }
7269 	  putc (')', file);
7270 	}
7271       else
7272 	{
7273 	  rtx offset = NULL_RTX;
7274 
7275 	  if (disp)
7276 	    {
7277 	      /* Pull out the offset of a symbol; print any symbol itself.  */
7278 	      if (GET_CODE (disp) == CONST
7279 		  && GET_CODE (XEXP (disp, 0)) == PLUS
7280 		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7281 		{
7282 		  offset = XEXP (XEXP (disp, 0), 1);
7283 		  disp = gen_rtx_CONST (VOIDmode,
7284 					XEXP (XEXP (disp, 0), 0));
7285 		}
7286 
7287 	      if (flag_pic)
7288 		output_pic_addr_const (file, disp, 0);
7289 	      else if (GET_CODE (disp) == LABEL_REF)
7290 		output_asm_label (disp);
7291 	      else if (GET_CODE (disp) == CONST_INT)
7292 		offset = disp;
7293 	      else
7294 		output_addr_const (file, disp);
7295 	    }
7296 
7297 	  putc ('[', file);
7298 	  if (base)
7299 	    {
7300 	      PRINT_REG (base, 0, file);
7301 	      if (offset)
7302 		{
7303 		  if (INTVAL (offset) >= 0)
7304 		    putc ('+', file);
7305 		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7306 		}
7307 	    }
7308 	  else if (offset)
7309 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7310 	  else
7311 	    putc ('0', file);
7312 
7313 	  if (index)
7314 	    {
7315 	      putc ('+', file);
7316 	      PRINT_REG (index, 0, file);
7317 	      if (scale != 1)
7318 		fprintf (file, "*%d", scale);
7319 	    }
7320 	  putc (']', file);
7321 	}
7322     }
7323 }
7324 
7325 bool
output_addr_const_extra(file,x)7326 output_addr_const_extra (file, x)
7327      FILE *file;
7328      rtx x;
7329 {
7330   rtx op;
7331 
7332   if (GET_CODE (x) != UNSPEC)
7333     return false;
7334 
7335   op = XVECEXP (x, 0, 0);
7336   switch (XINT (x, 1))
7337     {
7338     case UNSPEC_GOTTPOFF:
7339       output_addr_const (file, op);
7340       /* FIXME: This might be @TPOFF in Sun ld.  */
7341       fputs ("@GOTTPOFF", file);
7342       break;
7343     case UNSPEC_TPOFF:
7344       output_addr_const (file, op);
7345       fputs ("@TPOFF", file);
7346       break;
7347     case UNSPEC_NTPOFF:
7348       output_addr_const (file, op);
7349       if (TARGET_64BIT)
7350 	fputs ("@TPOFF", file);
7351       else
7352 	fputs ("@NTPOFF", file);
7353       break;
7354     case UNSPEC_DTPOFF:
7355       output_addr_const (file, op);
7356       fputs ("@DTPOFF", file);
7357       break;
7358     case UNSPEC_GOTNTPOFF:
7359       output_addr_const (file, op);
7360       if (TARGET_64BIT)
7361 	fputs ("@GOTTPOFF(%rip)", file);
7362       else
7363 	fputs ("@GOTNTPOFF", file);
7364       break;
7365     case UNSPEC_INDNTPOFF:
7366       output_addr_const (file, op);
7367       fputs ("@INDNTPOFF", file);
7368       break;
7369 
7370     default:
7371       return false;
7372     }
7373 
7374   return true;
7375 }
7376 
7377 /* Split one or more DImode RTL references into pairs of SImode
7378    references.  The RTL can be REG, offsettable MEM, integer constant, or
7379    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7380    split and "num" is its length.  lo_half and hi_half are output arrays
7381    that parallel "operands".  */
7382 
7383 void
split_di(operands,num,lo_half,hi_half)7384 split_di (operands, num, lo_half, hi_half)
7385      rtx operands[];
7386      int num;
7387      rtx lo_half[], hi_half[];
7388 {
7389   while (num--)
7390     {
7391       rtx op = operands[num];
7392 
7393       /* simplify_subreg refuse to split volatile memory addresses,
7394          but we still have to handle it.  */
7395       if (GET_CODE (op) == MEM)
7396 	{
7397 	  lo_half[num] = adjust_address (op, SImode, 0);
7398 	  hi_half[num] = adjust_address (op, SImode, 4);
7399 	}
7400       else
7401 	{
7402 	  lo_half[num] = simplify_gen_subreg (SImode, op,
7403 					      GET_MODE (op) == VOIDmode
7404 					      ? DImode : GET_MODE (op), 0);
7405 	  hi_half[num] = simplify_gen_subreg (SImode, op,
7406 					      GET_MODE (op) == VOIDmode
7407 					      ? DImode : GET_MODE (op), 4);
7408 	}
7409     }
7410 }
7411 /* Split one or more TImode RTL references into pairs of SImode
7412    references.  The RTL can be REG, offsettable MEM, integer constant, or
7413    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7414    split and "num" is its length.  lo_half and hi_half are output arrays
7415    that parallel "operands".  */
7416 
7417 void
split_ti(operands,num,lo_half,hi_half)7418 split_ti (operands, num, lo_half, hi_half)
7419      rtx operands[];
7420      int num;
7421      rtx lo_half[], hi_half[];
7422 {
7423   while (num--)
7424     {
7425       rtx op = operands[num];
7426 
7427       /* simplify_subreg refuse to split volatile memory addresses, but we
7428          still have to handle it.  */
7429       if (GET_CODE (op) == MEM)
7430 	{
7431 	  lo_half[num] = adjust_address (op, DImode, 0);
7432 	  hi_half[num] = adjust_address (op, DImode, 8);
7433 	}
7434       else
7435 	{
7436 	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7437 	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7438 	}
7439     }
7440 }
7441 
7442 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7443    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7444    is the expression of the binary operation.  The output may either be
7445    emitted here, or returned to the caller, like all output_* functions.
7446 
7447    There is no guarantee that the operands are the same mode, as they
7448    might be within FLOAT or FLOAT_EXTEND expressions.  */
7449 
7450 #ifndef SYSV386_COMPAT
7451 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7452    wants to fix the assemblers because that causes incompatibility
7453    with gcc.  No-one wants to fix gcc because that causes
7454    incompatibility with assemblers...  You can use the option of
7455    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7456 #define SYSV386_COMPAT 1
7457 #endif
7458 
7459 const char *
output_387_binary_op(insn,operands)7460 output_387_binary_op (insn, operands)
7461      rtx insn;
7462      rtx *operands;
7463 {
7464   static char buf[30];
7465   const char *p;
7466   const char *ssep;
7467   int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7468 
7469 #ifdef ENABLE_CHECKING
7470   /* Even if we do not want to check the inputs, this documents input
7471      constraints.  Which helps in understanding the following code.  */
7472   if (STACK_REG_P (operands[0])
7473       && ((REG_P (operands[1])
7474 	   && REGNO (operands[0]) == REGNO (operands[1])
7475 	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7476 	  || (REG_P (operands[2])
7477 	      && REGNO (operands[0]) == REGNO (operands[2])
7478 	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7479       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7480     ; /* ok */
7481   else if (!is_sse)
7482     abort ();
7483 #endif
7484 
7485   switch (GET_CODE (operands[3]))
7486     {
7487     case PLUS:
7488       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7489 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7490 	p = "fiadd";
7491       else
7492 	p = "fadd";
7493       ssep = "add";
7494       break;
7495 
7496     case MINUS:
7497       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7498 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7499 	p = "fisub";
7500       else
7501 	p = "fsub";
7502       ssep = "sub";
7503       break;
7504 
7505     case MULT:
7506       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7507 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7508 	p = "fimul";
7509       else
7510 	p = "fmul";
7511       ssep = "mul";
7512       break;
7513 
7514     case DIV:
7515       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7516 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7517 	p = "fidiv";
7518       else
7519 	p = "fdiv";
7520       ssep = "div";
7521       break;
7522 
7523     default:
7524       abort ();
7525     }
7526 
7527   if (is_sse)
7528    {
7529       strlcpy (buf, ssep, sizeof buf);
7530       if (GET_MODE (operands[0]) == SFmode)
7531 	strlcat (buf, "ss\t{%2, %0|%0, %2}", sizeof buf);
7532       else
7533 	strlcat (buf, "sd\t{%2, %0|%0, %2}", sizeof buf);
7534       return buf;
7535    }
7536   strlcpy (buf, p, sizeof buf);
7537 
7538   switch (GET_CODE (operands[3]))
7539     {
7540     case MULT:
7541     case PLUS:
7542       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7543 	{
7544 	  rtx temp = operands[2];
7545 	  operands[2] = operands[1];
7546 	  operands[1] = temp;
7547 	}
7548 
7549       /* know operands[0] == operands[1].  */
7550 
7551       if (GET_CODE (operands[2]) == MEM)
7552 	{
7553 	  p = "%z2\t%2";
7554 	  break;
7555 	}
7556 
7557       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7558 	{
7559 	  if (STACK_TOP_P (operands[0]))
7560 	    /* How is it that we are storing to a dead operand[2]?
7561 	       Well, presumably operands[1] is dead too.  We can't
7562 	       store the result to st(0) as st(0) gets popped on this
7563 	       instruction.  Instead store to operands[2] (which I
7564 	       think has to be st(1)).  st(1) will be popped later.
7565 	       gcc <= 2.8.1 didn't have this check and generated
7566 	       assembly code that the Unixware assembler rejected.  */
7567 	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7568 	  else
7569 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7570 	  break;
7571 	}
7572 
7573       if (STACK_TOP_P (operands[0]))
7574 	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7575       else
7576 	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7577       break;
7578 
7579     case MINUS:
7580     case DIV:
7581       if (GET_CODE (operands[1]) == MEM)
7582 	{
7583 	  p = "r%z1\t%1";
7584 	  break;
7585 	}
7586 
7587       if (GET_CODE (operands[2]) == MEM)
7588 	{
7589 	  p = "%z2\t%2";
7590 	  break;
7591 	}
7592 
7593       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7594 	{
7595 #if SYSV386_COMPAT
7596 	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7597 	     derived assemblers, confusingly reverse the direction of
7598 	     the operation for fsub{r} and fdiv{r} when the
7599 	     destination register is not st(0).  The Intel assembler
7600 	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
7601 	     figure out what the hardware really does.  */
7602 	  if (STACK_TOP_P (operands[0]))
7603 	    p = "{p\t%0, %2|rp\t%2, %0}";
7604 	  else
7605 	    p = "{rp\t%2, %0|p\t%0, %2}";
7606 #else
7607 	  if (STACK_TOP_P (operands[0]))
7608 	    /* As above for fmul/fadd, we can't store to st(0).  */
7609 	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7610 	  else
7611 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7612 #endif
7613 	  break;
7614 	}
7615 
7616       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7617 	{
7618 #if SYSV386_COMPAT
7619 	  if (STACK_TOP_P (operands[0]))
7620 	    p = "{rp\t%0, %1|p\t%1, %0}";
7621 	  else
7622 	    p = "{p\t%1, %0|rp\t%0, %1}";
7623 #else
7624 	  if (STACK_TOP_P (operands[0]))
7625 	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
7626 	  else
7627 	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
7628 #endif
7629 	  break;
7630 	}
7631 
7632       if (STACK_TOP_P (operands[0]))
7633 	{
7634 	  if (STACK_TOP_P (operands[1]))
7635 	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7636 	  else
7637 	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
7638 	  break;
7639 	}
7640       else if (STACK_TOP_P (operands[1]))
7641 	{
7642 #if SYSV386_COMPAT
7643 	  p = "{\t%1, %0|r\t%0, %1}";
7644 #else
7645 	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
7646 #endif
7647 	}
7648       else
7649 	{
7650 #if SYSV386_COMPAT
7651 	  p = "{r\t%2, %0|\t%0, %2}";
7652 #else
7653 	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7654 #endif
7655 	}
7656       break;
7657 
7658     default:
7659       abort ();
7660     }
7661 
7662   strcat (buf, p);
7663   return buf;
7664 }
7665 
7666 /* Output code to initialize control word copies used by
7667    trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
7668    is set to control word rounding downwards.  */
7669 void
emit_i387_cw_initialization(normal,round_down)7670 emit_i387_cw_initialization (normal, round_down)
7671      rtx normal, round_down;
7672 {
7673   rtx reg = gen_reg_rtx (HImode);
7674 
7675   emit_insn (gen_x86_fnstcw_1 (normal));
7676   emit_move_insn (reg, normal);
7677   if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7678       && !TARGET_64BIT)
7679     emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7680   else
7681     emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7682   emit_move_insn (round_down, reg);
7683 }
7684 
7685 /* Output code for INSN to convert a float to a signed int.  OPERANDS
7686    are the insn operands.  The output may be [HSD]Imode and the input
7687    operand may be [SDX]Fmode.  */
7688 
7689 const char *
output_fix_trunc(insn,operands)7690 output_fix_trunc (insn, operands)
7691      rtx insn;
7692      rtx *operands;
7693 {
7694   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7695   int dimode_p = GET_MODE (operands[0]) == DImode;
7696 
7697   /* Jump through a hoop or two for DImode, since the hardware has no
7698      non-popping instruction.  We used to do this a different way, but
7699      that was somewhat fragile and broke with post-reload splitters.  */
7700   if (dimode_p && !stack_top_dies)
7701     output_asm_insn ("fld\t%y1", operands);
7702 
7703   if (!STACK_TOP_P (operands[1]))
7704     abort ();
7705 
7706   if (GET_CODE (operands[0]) != MEM)
7707     abort ();
7708 
7709   output_asm_insn ("fldcw\t%3", operands);
7710   if (stack_top_dies || dimode_p)
7711     output_asm_insn ("fistp%z0\t%0", operands);
7712   else
7713     output_asm_insn ("fist%z0\t%0", operands);
7714   output_asm_insn ("fldcw\t%2", operands);
7715 
7716   return "";
7717 }
7718 
7719 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
7720    should be used and 2 when fnstsw should be used.  UNORDERED_P is true
7721    when fucom should be used.  */
7722 
7723 const char *
output_fp_compare(insn,operands,eflags_p,unordered_p)7724 output_fp_compare (insn, operands, eflags_p, unordered_p)
7725      rtx insn;
7726      rtx *operands;
7727      int eflags_p, unordered_p;
7728 {
7729   int stack_top_dies;
7730   rtx cmp_op0 = operands[0];
7731   rtx cmp_op1 = operands[1];
7732   int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7733 
7734   if (eflags_p == 2)
7735     {
7736       cmp_op0 = cmp_op1;
7737       cmp_op1 = operands[2];
7738     }
7739   if (is_sse)
7740     {
7741       if (GET_MODE (operands[0]) == SFmode)
7742 	if (unordered_p)
7743 	  return "ucomiss\t{%1, %0|%0, %1}";
7744 	else
7745 	  return "comiss\t{%1, %0|%0, %1}";
7746       else
7747 	if (unordered_p)
7748 	  return "ucomisd\t{%1, %0|%0, %1}";
7749 	else
7750 	  return "comisd\t{%1, %0|%0, %1}";
7751     }
7752 
7753   if (! STACK_TOP_P (cmp_op0))
7754     abort ();
7755 
7756   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7757 
7758   if (STACK_REG_P (cmp_op1)
7759       && stack_top_dies
7760       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7761       && REGNO (cmp_op1) != FIRST_STACK_REG)
7762     {
7763       /* If both the top of the 387 stack dies, and the other operand
7764 	 is also a stack register that dies, then this must be a
7765 	 `fcompp' float compare */
7766 
7767       if (eflags_p == 1)
7768 	{
7769 	  /* There is no double popping fcomi variant.  Fortunately,
7770 	     eflags is immune from the fstp's cc clobbering.  */
7771 	  if (unordered_p)
7772 	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7773 	  else
7774 	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7775 	  return "fstp\t%y0";
7776 	}
7777       else
7778 	{
7779 	  if (eflags_p == 2)
7780 	    {
7781 	      if (unordered_p)
7782 		return "fucompp\n\tfnstsw\t%0";
7783 	      else
7784 		return "fcompp\n\tfnstsw\t%0";
7785 	    }
7786 	  else
7787 	    {
7788 	      if (unordered_p)
7789 		return "fucompp";
7790 	      else
7791 		return "fcompp";
7792 	    }
7793 	}
7794     }
7795   else
7796     {
7797       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
7798 
7799       static const char * const alt[24] =
7800       {
7801 	"fcom%z1\t%y1",
7802 	"fcomp%z1\t%y1",
7803 	"fucom%z1\t%y1",
7804 	"fucomp%z1\t%y1",
7805 
7806 	"ficom%z1\t%y1",
7807 	"ficomp%z1\t%y1",
7808 	NULL,
7809 	NULL,
7810 
7811 	"fcomi\t{%y1, %0|%0, %y1}",
7812 	"fcomip\t{%y1, %0|%0, %y1}",
7813 	"fucomi\t{%y1, %0|%0, %y1}",
7814 	"fucomip\t{%y1, %0|%0, %y1}",
7815 
7816 	NULL,
7817 	NULL,
7818 	NULL,
7819 	NULL,
7820 
7821 	"fcom%z2\t%y2\n\tfnstsw\t%0",
7822 	"fcomp%z2\t%y2\n\tfnstsw\t%0",
7823 	"fucom%z2\t%y2\n\tfnstsw\t%0",
7824 	"fucomp%z2\t%y2\n\tfnstsw\t%0",
7825 
7826 	"ficom%z2\t%y2\n\tfnstsw\t%0",
7827 	"ficomp%z2\t%y2\n\tfnstsw\t%0",
7828 	NULL,
7829 	NULL
7830       };
7831 
7832       int mask;
7833       const char *ret;
7834 
7835       mask  = eflags_p << 3;
7836       mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7837       mask |= unordered_p << 1;
7838       mask |= stack_top_dies;
7839 
7840       if (mask >= 24)
7841 	abort ();
7842       ret = alt[mask];
7843       if (ret == NULL)
7844 	abort ();
7845 
7846       return ret;
7847     }
7848 }
7849 
7850 void
ix86_output_addr_vec_elt(file,value)7851 ix86_output_addr_vec_elt (file, value)
7852      FILE *file;
7853      int value;
7854 {
7855   const char *directive = ASM_LONG;
7856 
7857   if (TARGET_64BIT)
7858     {
7859 #ifdef ASM_QUAD
7860       directive = ASM_QUAD;
7861 #else
7862       abort ();
7863 #endif
7864     }
7865 
7866   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7867 }
7868 
7869 void
ix86_output_addr_diff_elt(file,value,rel)7870 ix86_output_addr_diff_elt (file, value, rel)
7871      FILE *file;
7872      int value, rel;
7873 {
7874   if (TARGET_64BIT)
7875     fprintf (file, "%s%s%d-%s%d\n",
7876 	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
7877   else if (HAVE_AS_GOTOFF_IN_DATA)
7878     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7879 #if TARGET_MACHO
7880   else if (TARGET_MACHO)
7881     fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7882 	     machopic_function_base_name () + 1);
7883 #endif
7884   else
7885     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7886 		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7887 }
7888 
7889 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7890    for the target.  */
7891 
7892 void
ix86_expand_clear(dest)7893 ix86_expand_clear (dest)
7894      rtx dest;
7895 {
7896   rtx tmp;
7897 
7898   /* We play register width games, which are only valid after reload.  */
7899   if (!reload_completed)
7900     abort ();
7901 
7902   /* Avoid HImode and its attendant prefix byte.  */
7903   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7904     dest = gen_rtx_REG (SImode, REGNO (dest));
7905 
7906   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7907 
7908   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
7909   if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7910     {
7911       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7912       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7913     }
7914 
7915   emit_insn (tmp);
7916 }
7917 
7918 /* X is an unchanging MEM.  If it is a constant pool reference, return
7919    the constant pool rtx, else NULL.  */
7920 
7921 static rtx
maybe_get_pool_constant(x)7922 maybe_get_pool_constant (x)
7923      rtx x;
7924 {
7925   x = XEXP (x, 0);
7926 
7927   if (flag_pic && ! TARGET_64BIT)
7928     {
7929       if (GET_CODE (x) != PLUS)
7930 	return NULL_RTX;
7931       if (XEXP (x, 0) != pic_offset_table_rtx)
7932 	return NULL_RTX;
7933       x = XEXP (x, 1);
7934       if (GET_CODE (x) != CONST)
7935 	return NULL_RTX;
7936       x = XEXP (x, 0);
7937       if (GET_CODE (x) != UNSPEC)
7938 	return NULL_RTX;
7939       if (XINT (x, 1) != UNSPEC_GOTOFF)
7940 	return NULL_RTX;
7941       x = XVECEXP (x, 0, 0);
7942     }
7943 
7944   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7945     return get_pool_constant (x);
7946 
7947   return NULL_RTX;
7948 }
7949 
7950 void
ix86_expand_move(mode,operands)7951 ix86_expand_move (mode, operands)
7952      enum machine_mode mode;
7953      rtx operands[];
7954 {
7955   int strict = (reload_in_progress || reload_completed);
7956   rtx insn, op0, op1, tmp;
7957 
7958   op0 = operands[0];
7959   op1 = operands[1];
7960 
7961   if (tls_symbolic_operand (op1, Pmode))
7962     {
7963       op1 = legitimize_address (op1, op1, VOIDmode);
7964       if (GET_CODE (op0) == MEM)
7965 	{
7966 	  tmp = gen_reg_rtx (mode);
7967 	  emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7968 	  op1 = tmp;
7969 	}
7970     }
7971   else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7972     {
7973 #if TARGET_MACHO
7974       if (MACHOPIC_PURE)
7975 	{
7976 	  rtx temp = ((reload_in_progress
7977 		       || ((op0 && GET_CODE (op0) == REG)
7978 			   && mode == Pmode))
7979 		      ? op0 : gen_reg_rtx (Pmode));
7980 	  op1 = machopic_indirect_data_reference (op1, temp);
7981 	  op1 = machopic_legitimize_pic_address (op1, mode,
7982 						 temp == op1 ? 0 : temp);
7983 	}
7984       else
7985 	{
7986 	  if (MACHOPIC_INDIRECT)
7987 	    op1 = machopic_indirect_data_reference (op1, 0);
7988 	}
7989       if (op0 != op1)
7990 	{
7991 	  insn = gen_rtx_SET (VOIDmode, op0, op1);
7992 	  emit_insn (insn);
7993 	}
7994       return;
7995 #endif /* TARGET_MACHO */
7996       if (GET_CODE (op0) == MEM)
7997 	op1 = force_reg (Pmode, op1);
7998       else
7999 	{
8000 	  rtx temp = op0;
8001 	  if (GET_CODE (temp) != REG)
8002 	    temp = gen_reg_rtx (Pmode);
8003 	  temp = legitimize_pic_address (op1, temp);
8004 	  if (temp == op0)
8005 	    return;
8006 	  op1 = temp;
8007 	}
8008     }
8009   else
8010     {
8011       if (GET_CODE (op0) == MEM
8012 	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8013 	      || !push_operand (op0, mode))
8014 	  && GET_CODE (op1) == MEM)
8015 	op1 = force_reg (mode, op1);
8016 
8017       if (push_operand (op0, mode)
8018 	  && ! general_no_elim_operand (op1, mode))
8019 	op1 = copy_to_mode_reg (mode, op1);
8020 
8021       /* Force large constants in 64bit compilation into register
8022 	 to get them CSEed.  */
8023       if (TARGET_64BIT && mode == DImode
8024 	  && immediate_operand (op1, mode)
8025 	  && !x86_64_zero_extended_value (op1)
8026 	  && !register_operand (op0, mode)
8027 	  && optimize && !reload_completed && !reload_in_progress)
8028 	op1 = copy_to_mode_reg (mode, op1);
8029 
8030       if (FLOAT_MODE_P (mode))
8031 	{
8032 	  /* If we are loading a floating point constant to a register,
8033 	     force the value to memory now, since we'll get better code
8034 	     out the back end.  */
8035 
8036 	  if (strict)
8037 	    ;
8038 	  else if (GET_CODE (op1) == CONST_DOUBLE)
8039 	    {
8040 	      op1 = validize_mem (force_const_mem (mode, op1));
8041 	      if (!register_operand (op0, mode))
8042 		{
8043 		  rtx temp = gen_reg_rtx (mode);
8044 		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8045 		  emit_move_insn (op0, temp);
8046 		  return;
8047 		}
8048 	    }
8049 	}
8050     }
8051 
8052   insn = gen_rtx_SET (VOIDmode, op0, op1);
8053 
8054   emit_insn (insn);
8055 }
8056 
8057 void
ix86_expand_vector_move(mode,operands)8058 ix86_expand_vector_move (mode, operands)
8059      enum machine_mode mode;
8060      rtx operands[];
8061 {
8062   /* Force constants other than zero into memory.  We do not know how
8063      the instructions used to build constants modify the upper 64 bits
8064      of the register, once we have that information we may be able
8065      to handle some of them more efficiently.  */
8066   if ((reload_in_progress | reload_completed) == 0
8067       && register_operand (operands[0], mode)
8068       && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8069     {
8070       operands[1] = force_const_mem (mode, operands[1]);
8071       emit_move_insn (operands[0], operands[1]);
8072       return;
8073     }
8074 
8075   /* Make operand1 a register if it isn't already.  */
8076   if (!no_new_pseudos
8077       && !register_operand (operands[0], mode)
8078       && !register_operand (operands[1], mode))
8079     {
8080       rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8081       emit_move_insn (operands[0], temp);
8082       return;
8083     }
8084 
8085   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8086 }
8087 
8088 /* Attempt to expand a binary operator.  Make the expansion closer to the
8089    actual machine, then just general_operand, which will allow 3 separate
8090    memory references (one output, two input) in a single insn.  */
8091 
8092 void
ix86_expand_binary_operator(code,mode,operands)8093 ix86_expand_binary_operator (code, mode, operands)
8094      enum rtx_code code;
8095      enum machine_mode mode;
8096      rtx operands[];
8097 {
8098   int matching_memory;
8099   rtx src1, src2, dst, op, clob;
8100 
8101   dst = operands[0];
8102   src1 = operands[1];
8103   src2 = operands[2];
8104 
8105   /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8106   if (GET_RTX_CLASS (code) == 'c'
8107       && (rtx_equal_p (dst, src2)
8108 	  || immediate_operand (src1, mode)))
8109     {
8110       rtx temp = src1;
8111       src1 = src2;
8112       src2 = temp;
8113     }
8114 
8115   /* If the destination is memory, and we do not have matching source
8116      operands, do things in registers.  */
8117   matching_memory = 0;
8118   if (GET_CODE (dst) == MEM)
8119     {
8120       if (rtx_equal_p (dst, src1))
8121 	matching_memory = 1;
8122       else if (GET_RTX_CLASS (code) == 'c'
8123 	       && rtx_equal_p (dst, src2))
8124 	matching_memory = 2;
8125       else
8126 	dst = gen_reg_rtx (mode);
8127     }
8128 
8129   /* Both source operands cannot be in memory.  */
8130   if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8131     {
8132       if (matching_memory != 2)
8133 	src2 = force_reg (mode, src2);
8134       else
8135 	src1 = force_reg (mode, src1);
8136     }
8137 
8138   /* If the operation is not commutable, source 1 cannot be a constant
8139      or non-matching memory.  */
8140   if ((CONSTANT_P (src1)
8141        || (!matching_memory && GET_CODE (src1) == MEM))
8142       && GET_RTX_CLASS (code) != 'c')
8143     src1 = force_reg (mode, src1);
8144 
8145   /* If optimizing, copy to regs to improve CSE */
8146   if (optimize && ! no_new_pseudos)
8147     {
8148       if (GET_CODE (dst) == MEM)
8149 	dst = gen_reg_rtx (mode);
8150       if (GET_CODE (src1) == MEM)
8151 	src1 = force_reg (mode, src1);
8152       if (GET_CODE (src2) == MEM)
8153 	src2 = force_reg (mode, src2);
8154     }
8155 
8156   /* Emit the instruction.  */
8157 
8158   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8159   if (reload_in_progress)
8160     {
8161       /* Reload doesn't know about the flags register, and doesn't know that
8162          it doesn't want to clobber it.  We can only do this with PLUS.  */
8163       if (code != PLUS)
8164 	abort ();
8165       emit_insn (op);
8166     }
8167   else
8168     {
8169       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8170       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8171     }
8172 
8173   /* Fix up the destination if needed.  */
8174   if (dst != operands[0])
8175     emit_move_insn (operands[0], dst);
8176 }
8177 
8178 /* Return TRUE or FALSE depending on whether the binary operator meets the
8179    appropriate constraints.  */
8180 
8181 int
ix86_binary_operator_ok(code,mode,operands)8182 ix86_binary_operator_ok (code, mode, operands)
8183      enum rtx_code code;
8184      enum machine_mode mode ATTRIBUTE_UNUSED;
8185      rtx operands[3];
8186 {
8187   /* Both source operands cannot be in memory.  */
8188   if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8189     return 0;
8190   /* If the operation is not commutable, source 1 cannot be a constant.  */
8191   if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8192     return 0;
8193   /* If the destination is memory, we must have a matching source operand.  */
8194   if (GET_CODE (operands[0]) == MEM
8195       && ! (rtx_equal_p (operands[0], operands[1])
8196 	    || (GET_RTX_CLASS (code) == 'c'
8197 		&& rtx_equal_p (operands[0], operands[2]))))
8198     return 0;
8199   /* If the operation is not commutable and the source 1 is memory, we must
8200      have a matching destination.  */
8201   if (GET_CODE (operands[1]) == MEM
8202       && GET_RTX_CLASS (code) != 'c'
8203       && ! rtx_equal_p (operands[0], operands[1]))
8204     return 0;
8205   return 1;
8206 }
8207 
8208 /* Attempt to expand a unary operator.  Make the expansion closer to the
8209    actual machine, then just general_operand, which will allow 2 separate
8210    memory references (one output, one input) in a single insn.  */
8211 
8212 void
ix86_expand_unary_operator(code,mode,operands)8213 ix86_expand_unary_operator (code, mode, operands)
8214      enum rtx_code code;
8215      enum machine_mode mode;
8216      rtx operands[];
8217 {
8218   int matching_memory;
8219   rtx src, dst, op, clob;
8220 
8221   dst = operands[0];
8222   src = operands[1];
8223 
8224   /* If the destination is memory, and we do not have matching source
8225      operands, do things in registers.  */
8226   matching_memory = 0;
8227   if (GET_CODE (dst) == MEM)
8228     {
8229       if (rtx_equal_p (dst, src))
8230 	matching_memory = 1;
8231       else
8232 	dst = gen_reg_rtx (mode);
8233     }
8234 
8235   /* When source operand is memory, destination must match.  */
8236   if (!matching_memory && GET_CODE (src) == MEM)
8237     src = force_reg (mode, src);
8238 
8239   /* If optimizing, copy to regs to improve CSE */
8240   if (optimize && ! no_new_pseudos)
8241     {
8242       if (GET_CODE (dst) == MEM)
8243 	dst = gen_reg_rtx (mode);
8244       if (GET_CODE (src) == MEM)
8245 	src = force_reg (mode, src);
8246     }
8247 
8248   /* Emit the instruction.  */
8249 
8250   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8251   if (reload_in_progress || code == NOT)
8252     {
8253       /* Reload doesn't know about the flags register, and doesn't know that
8254          it doesn't want to clobber it.  */
8255       if (code != NOT)
8256         abort ();
8257       emit_insn (op);
8258     }
8259   else
8260     {
8261       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8262       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8263     }
8264 
8265   /* Fix up the destination if needed.  */
8266   if (dst != operands[0])
8267     emit_move_insn (operands[0], dst);
8268 }
8269 
8270 /* Return TRUE or FALSE depending on whether the unary operator meets the
8271    appropriate constraints.  */
8272 
8273 int
ix86_unary_operator_ok(code,mode,operands)8274 ix86_unary_operator_ok (code, mode, operands)
8275      enum rtx_code code ATTRIBUTE_UNUSED;
8276      enum machine_mode mode ATTRIBUTE_UNUSED;
8277      rtx operands[2] ATTRIBUTE_UNUSED;
8278 {
8279   /* If one of operands is memory, source and destination must match.  */
8280   if ((GET_CODE (operands[0]) == MEM
8281        || GET_CODE (operands[1]) == MEM)
8282       && ! rtx_equal_p (operands[0], operands[1]))
8283     return FALSE;
8284   return TRUE;
8285 }
8286 
8287 /* Return TRUE or FALSE depending on whether the first SET in INSN
8288    has source and destination with matching CC modes, and that the
8289    CC mode is at least as constrained as REQ_MODE.  */
8290 
8291 int
ix86_match_ccmode(insn,req_mode)8292 ix86_match_ccmode (insn, req_mode)
8293      rtx insn;
8294      enum machine_mode req_mode;
8295 {
8296   rtx set;
8297   enum machine_mode set_mode;
8298 
8299   set = PATTERN (insn);
8300   if (GET_CODE (set) == PARALLEL)
8301     set = XVECEXP (set, 0, 0);
8302   if (GET_CODE (set) != SET)
8303     abort ();
8304   if (GET_CODE (SET_SRC (set)) != COMPARE)
8305     abort ();
8306 
8307   set_mode = GET_MODE (SET_DEST (set));
8308   switch (set_mode)
8309     {
8310     case CCNOmode:
8311       if (req_mode != CCNOmode
8312 	  && (req_mode != CCmode
8313 	      || XEXP (SET_SRC (set), 1) != const0_rtx))
8314 	return 0;
8315       break;
8316     case CCmode:
8317       if (req_mode == CCGCmode)
8318 	return 0;
8319       /* FALLTHRU */
8320     case CCGCmode:
8321       if (req_mode == CCGOCmode || req_mode == CCNOmode)
8322 	return 0;
8323       /* FALLTHRU */
8324     case CCGOCmode:
8325       if (req_mode == CCZmode)
8326 	return 0;
8327       /* FALLTHRU */
8328     case CCZmode:
8329       break;
8330 
8331     default:
8332       abort ();
8333     }
8334 
8335   return (GET_MODE (SET_SRC (set)) == set_mode);
8336 }
8337 
8338 /* Generate insn patterns to do an integer compare of OPERANDS.  */
8339 
8340 static rtx
ix86_expand_int_compare(code,op0,op1)8341 ix86_expand_int_compare (code, op0, op1)
8342      enum rtx_code code;
8343      rtx op0, op1;
8344 {
8345   enum machine_mode cmpmode;
8346   rtx tmp, flags;
8347 
8348   cmpmode = SELECT_CC_MODE (code, op0, op1);
8349   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8350 
8351   /* This is very simple, but making the interface the same as in the
8352      FP case makes the rest of the code easier.  */
8353   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8354   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8355 
8356   /* Return the test that should be put into the flags user, i.e.
8357      the bcc, scc, or cmov instruction.  */
8358   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8359 }
8360 
8361 /* Figure out whether to use ordered or unordered fp comparisons.
8362    Return the appropriate mode to use.  */
8363 
8364 enum machine_mode
ix86_fp_compare_mode(code)8365 ix86_fp_compare_mode (code)
8366      enum rtx_code code ATTRIBUTE_UNUSED;
8367 {
8368   /* ??? In order to make all comparisons reversible, we do all comparisons
8369      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
8370      all forms trapping and nontrapping comparisons, we can make inequality
8371      comparisons trapping again, since it results in better code when using
8372      FCOM based compares.  */
8373   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8374 }
8375 
8376 enum machine_mode
ix86_cc_mode(code,op0,op1)8377 ix86_cc_mode (code, op0, op1)
8378      enum rtx_code code;
8379      rtx op0, op1;
8380 {
8381   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8382     return ix86_fp_compare_mode (code);
8383   switch (code)
8384     {
8385       /* Only zero flag is needed.  */
8386     case EQ:			/* ZF=0 */
8387     case NE:			/* ZF!=0 */
8388       return CCZmode;
8389       /* Codes needing carry flag.  */
8390     case GEU:			/* CF=0 */
8391     case GTU:			/* CF=0 & ZF=0 */
8392     case LTU:			/* CF=1 */
8393     case LEU:			/* CF=1 | ZF=1 */
8394       return CCmode;
8395       /* Codes possibly doable only with sign flag when
8396          comparing against zero.  */
8397     case GE:			/* SF=OF   or   SF=0 */
8398     case LT:			/* SF<>OF  or   SF=1 */
8399       if (op1 == const0_rtx)
8400 	return CCGOCmode;
8401       else
8402 	/* For other cases Carry flag is not required.  */
8403 	return CCGCmode;
8404       /* Codes doable only with sign flag when comparing
8405          against zero, but we miss jump instruction for it
8406          so we need to use relational tests agains overflow
8407          that thus needs to be zero.  */
8408     case GT:			/* ZF=0 & SF=OF */
8409     case LE:			/* ZF=1 | SF<>OF */
8410       if (op1 == const0_rtx)
8411 	return CCNOmode;
8412       else
8413 	return CCGCmode;
8414       /* strcmp pattern do (use flags) and combine may ask us for proper
8415 	 mode.  */
8416     case USE:
8417       return CCmode;
8418     default:
8419       abort ();
8420     }
8421 }
8422 
8423 /* Return the fixed registers used for condition codes.  */
8424 
8425 static bool
ix86_fixed_condition_code_regs(p1,p2)8426 ix86_fixed_condition_code_regs (p1, p2)
8427      unsigned int *p1;
8428      unsigned int *p2;
8429 {
8430   *p1 = FLAGS_REG;
8431   *p2 = FPSR_REG;
8432   return true;
8433 }
8434 
8435 /* If two condition code modes are compatible, return a condition code
8436    mode which is compatible with both.  Otherwise, return
8437    VOIDmode.  */
8438 
8439 static enum machine_mode
ix86_cc_modes_compatible(m1,m2)8440 ix86_cc_modes_compatible (m1, m2)
8441      enum machine_mode m1;
8442      enum machine_mode m2;
8443 {
8444   if (m1 == m2)
8445     return m1;
8446 
8447   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8448     return VOIDmode;
8449 
8450   if ((m1 == CCGCmode && m2 == CCGOCmode)
8451       || (m1 == CCGOCmode && m2 == CCGCmode))
8452     return CCGCmode;
8453 
8454   switch (m1)
8455     {
8456     default:
8457       abort ();
8458 
8459     case CCmode:
8460     case CCGCmode:
8461     case CCGOCmode:
8462     case CCNOmode:
8463     case CCZmode:
8464       switch (m2)
8465 	{
8466 	default:
8467 	  return VOIDmode;
8468 
8469 	case CCmode:
8470 	case CCGCmode:
8471 	case CCGOCmode:
8472 	case CCNOmode:
8473 	case CCZmode:
8474 	  return CCmode;
8475 	}
8476 
8477     case CCFPmode:
8478     case CCFPUmode:
8479       /* These are only compatible with themselves, which we already
8480 	 checked above.  */
8481       return VOIDmode;
8482     }
8483 }
8484 
8485 /* Return true if we should use an FCOMI instruction for this fp comparison.  */
8486 
8487 int
ix86_use_fcomi_compare(code)8488 ix86_use_fcomi_compare (code)
8489      enum rtx_code code ATTRIBUTE_UNUSED;
8490 {
8491   enum rtx_code swapped_code = swap_condition (code);
8492   return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8493 	  || (ix86_fp_comparison_cost (swapped_code)
8494 	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
8495 }
8496 
8497 /* Swap, force into registers, or otherwise massage the two operands
8498    to a fp comparison.  The operands are updated in place; the new
8499    comparsion code is returned.  */
8500 
8501 static enum rtx_code
ix86_prepare_fp_compare_args(code,pop0,pop1)8502 ix86_prepare_fp_compare_args (code, pop0, pop1)
8503      enum rtx_code code;
8504      rtx *pop0, *pop1;
8505 {
8506   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8507   rtx op0 = *pop0, op1 = *pop1;
8508   enum machine_mode op_mode = GET_MODE (op0);
8509   int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8510 
8511   /* All of the unordered compare instructions only work on registers.
8512      The same is true of the XFmode compare instructions.  The same is
8513      true of the fcomi compare instructions.  */
8514 
8515   if (!is_sse
8516       && (fpcmp_mode == CCFPUmode
8517 	  || op_mode == XFmode
8518 	  || op_mode == TFmode
8519 	  || ix86_use_fcomi_compare (code)))
8520     {
8521       op0 = force_reg (op_mode, op0);
8522       op1 = force_reg (op_mode, op1);
8523     }
8524   else
8525     {
8526       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
8527 	 things around if they appear profitable, otherwise force op0
8528 	 into a register.  */
8529 
8530       if (standard_80387_constant_p (op0) == 0
8531 	  || (GET_CODE (op0) == MEM
8532 	      && ! (standard_80387_constant_p (op1) == 0
8533 		    || GET_CODE (op1) == MEM)))
8534 	{
8535 	  rtx tmp;
8536 	  tmp = op0, op0 = op1, op1 = tmp;
8537 	  code = swap_condition (code);
8538 	}
8539 
8540       if (GET_CODE (op0) != REG)
8541 	op0 = force_reg (op_mode, op0);
8542 
8543       if (CONSTANT_P (op1))
8544 	{
8545 	  if (standard_80387_constant_p (op1))
8546 	    op1 = force_reg (op_mode, op1);
8547 	  else
8548 	    op1 = validize_mem (force_const_mem (op_mode, op1));
8549 	}
8550     }
8551 
8552   /* Try to rearrange the comparison to make it cheaper.  */
8553   if (ix86_fp_comparison_cost (code)
8554       > ix86_fp_comparison_cost (swap_condition (code))
8555       && (GET_CODE (op1) == REG || !no_new_pseudos))
8556     {
8557       rtx tmp;
8558       tmp = op0, op0 = op1, op1 = tmp;
8559       code = swap_condition (code);
8560       if (GET_CODE (op0) != REG)
8561 	op0 = force_reg (op_mode, op0);
8562     }
8563 
8564   *pop0 = op0;
8565   *pop1 = op1;
8566   return code;
8567 }
8568 
8569 /* Convert comparison codes we use to represent FP comparison to integer
8570    code that will result in proper branch.  Return UNKNOWN if no such code
8571    is available.  */
8572 static enum rtx_code
ix86_fp_compare_code_to_integer(code)8573 ix86_fp_compare_code_to_integer (code)
8574      enum rtx_code code;
8575 {
8576   switch (code)
8577     {
8578     case GT:
8579       return GTU;
8580     case GE:
8581       return GEU;
8582     case ORDERED:
8583     case UNORDERED:
8584       return code;
8585       break;
8586     case UNEQ:
8587       return EQ;
8588       break;
8589     case UNLT:
8590       return LTU;
8591       break;
8592     case UNLE:
8593       return LEU;
8594       break;
8595     case LTGT:
8596       return NE;
8597       break;
8598     default:
8599       return UNKNOWN;
8600     }
8601 }
8602 
8603 /* Split comparison code CODE into comparisons we can do using branch
8604    instructions.  BYPASS_CODE is comparison code for branch that will
8605    branch around FIRST_CODE and SECOND_CODE.  If some of branches
8606    is not required, set value to NIL.
8607    We never require more than two branches.  */
8608 static void
ix86_fp_comparison_codes(code,bypass_code,first_code,second_code)8609 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8610      enum rtx_code code, *bypass_code, *first_code, *second_code;
8611 {
8612   *first_code = code;
8613   *bypass_code = NIL;
8614   *second_code = NIL;
8615 
8616   /* The fcomi comparison sets flags as follows:
8617 
8618      cmp    ZF PF CF
8619      >      0  0  0
8620      <      0  0  1
8621      =      1  0  0
8622      un     1  1  1 */
8623 
8624   switch (code)
8625     {
8626     case GT:			/* GTU - CF=0 & ZF=0 */
8627     case GE:			/* GEU - CF=0 */
8628     case ORDERED:		/* PF=0 */
8629     case UNORDERED:		/* PF=1 */
8630     case UNEQ:			/* EQ - ZF=1 */
8631     case UNLT:			/* LTU - CF=1 */
8632     case UNLE:			/* LEU - CF=1 | ZF=1 */
8633     case LTGT:			/* EQ - ZF=0 */
8634       break;
8635     case LT:			/* LTU - CF=1 - fails on unordered */
8636       *first_code = UNLT;
8637       *bypass_code = UNORDERED;
8638       break;
8639     case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
8640       *first_code = UNLE;
8641       *bypass_code = UNORDERED;
8642       break;
8643     case EQ:			/* EQ - ZF=1 - fails on unordered */
8644       *first_code = UNEQ;
8645       *bypass_code = UNORDERED;
8646       break;
8647     case NE:			/* NE - ZF=0 - fails on unordered */
8648       *first_code = LTGT;
8649       *second_code = UNORDERED;
8650       break;
8651     case UNGE:			/* GEU - CF=0 - fails on unordered */
8652       *first_code = GE;
8653       *second_code = UNORDERED;
8654       break;
8655     case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
8656       *first_code = GT;
8657       *second_code = UNORDERED;
8658       break;
8659     default:
8660       abort ();
8661     }
8662   if (!TARGET_IEEE_FP)
8663     {
8664       *second_code = NIL;
8665       *bypass_code = NIL;
8666     }
8667 }
8668 
8669 /* Return cost of comparison done fcom + arithmetics operations on AX.
8670    All following functions do use number of instructions as an cost metrics.
8671    In future this should be tweaked to compute bytes for optimize_size and
8672    take into account performance of various instructions on various CPUs.  */
8673 static int
ix86_fp_comparison_arithmetics_cost(code)8674 ix86_fp_comparison_arithmetics_cost (code)
8675      enum rtx_code code;
8676 {
8677   if (!TARGET_IEEE_FP)
8678     return 4;
8679   /* The cost of code output by ix86_expand_fp_compare.  */
8680   switch (code)
8681     {
8682     case UNLE:
8683     case UNLT:
8684     case LTGT:
8685     case GT:
8686     case GE:
8687     case UNORDERED:
8688     case ORDERED:
8689     case UNEQ:
8690       return 4;
8691       break;
8692     case LT:
8693     case NE:
8694     case EQ:
8695     case UNGE:
8696       return 5;
8697       break;
8698     case LE:
8699     case UNGT:
8700       return 6;
8701       break;
8702     default:
8703       abort ();
8704     }
8705 }
8706 
8707 /* Return cost of comparison done using fcomi operation.
8708    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8709 static int
ix86_fp_comparison_fcomi_cost(code)8710 ix86_fp_comparison_fcomi_cost (code)
8711      enum rtx_code code;
8712 {
8713   enum rtx_code bypass_code, first_code, second_code;
8714   /* Return arbitarily high cost when instruction is not supported - this
8715      prevents gcc from using it.  */
8716   if (!TARGET_CMOVE)
8717     return 1024;
8718   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8719   return (bypass_code != NIL || second_code != NIL) + 2;
8720 }
8721 
8722 /* Return cost of comparison done using sahf operation.
8723    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8724 static int
ix86_fp_comparison_sahf_cost(code)8725 ix86_fp_comparison_sahf_cost (code)
8726      enum rtx_code code;
8727 {
8728   enum rtx_code bypass_code, first_code, second_code;
8729   /* Return arbitarily high cost when instruction is not preferred - this
8730      avoids gcc from using it.  */
8731   if (!TARGET_USE_SAHF && !optimize_size)
8732     return 1024;
8733   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8734   return (bypass_code != NIL || second_code != NIL) + 3;
8735 }
8736 
8737 /* Compute cost of the comparison done using any method.
8738    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8739 static int
ix86_fp_comparison_cost(code)8740 ix86_fp_comparison_cost (code)
8741      enum rtx_code code;
8742 {
8743   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8744   int min;
8745 
8746   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8747   sahf_cost = ix86_fp_comparison_sahf_cost (code);
8748 
8749   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8750   if (min > sahf_cost)
8751     min = sahf_cost;
8752   if (min > fcomi_cost)
8753     min = fcomi_cost;
8754   return min;
8755 }
8756 
8757 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
8758 
8759 static rtx
ix86_expand_fp_compare(code,op0,op1,scratch,second_test,bypass_test)8760 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8761      enum rtx_code code;
8762      rtx op0, op1, scratch;
8763      rtx *second_test;
8764      rtx *bypass_test;
8765 {
8766   enum machine_mode fpcmp_mode, intcmp_mode;
8767   rtx tmp, tmp2;
8768   int cost = ix86_fp_comparison_cost (code);
8769   enum rtx_code bypass_code, first_code, second_code;
8770 
8771   fpcmp_mode = ix86_fp_compare_mode (code);
8772   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8773 
8774   if (second_test)
8775     *second_test = NULL_RTX;
8776   if (bypass_test)
8777     *bypass_test = NULL_RTX;
8778 
8779   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8780 
8781   /* Do fcomi/sahf based test when profitable.  */
8782   if ((bypass_code == NIL || bypass_test)
8783       && (second_code == NIL || second_test)
8784       && ix86_fp_comparison_arithmetics_cost (code) > cost)
8785     {
8786       if (TARGET_CMOVE)
8787 	{
8788 	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8789 	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8790 			     tmp);
8791 	  emit_insn (tmp);
8792 	}
8793       else
8794 	{
8795 	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8796 	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8797 	  if (!scratch)
8798 	    scratch = gen_reg_rtx (HImode);
8799 	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8800 	  emit_insn (gen_x86_sahf_1 (scratch));
8801 	}
8802 
8803       /* The FP codes work out to act like unsigned.  */
8804       intcmp_mode = fpcmp_mode;
8805       code = first_code;
8806       if (bypass_code != NIL)
8807 	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8808 				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
8809 				       const0_rtx);
8810       if (second_code != NIL)
8811 	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8812 				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
8813 				       const0_rtx);
8814     }
8815   else
8816     {
8817       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
8818       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8819       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8820       if (!scratch)
8821 	scratch = gen_reg_rtx (HImode);
8822       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8823 
8824       /* In the unordered case, we have to check C2 for NaN's, which
8825 	 doesn't happen to work out to anything nice combination-wise.
8826 	 So do some bit twiddling on the value we've got in AH to come
8827 	 up with an appropriate set of condition codes.  */
8828 
8829       intcmp_mode = CCNOmode;
8830       switch (code)
8831 	{
8832 	case GT:
8833 	case UNGT:
8834 	  if (code == GT || !TARGET_IEEE_FP)
8835 	    {
8836 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8837 	      code = EQ;
8838 	    }
8839 	  else
8840 	    {
8841 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8842 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8843 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8844 	      intcmp_mode = CCmode;
8845 	      code = GEU;
8846 	    }
8847 	  break;
8848 	case LT:
8849 	case UNLT:
8850 	  if (code == LT && TARGET_IEEE_FP)
8851 	    {
8852 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8853 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8854 	      intcmp_mode = CCmode;
8855 	      code = EQ;
8856 	    }
8857 	  else
8858 	    {
8859 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8860 	      code = NE;
8861 	    }
8862 	  break;
8863 	case GE:
8864 	case UNGE:
8865 	  if (code == GE || !TARGET_IEEE_FP)
8866 	    {
8867 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8868 	      code = EQ;
8869 	    }
8870 	  else
8871 	    {
8872 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8873 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8874 					     GEN_INT (0x01)));
8875 	      code = NE;
8876 	    }
8877 	  break;
8878 	case LE:
8879 	case UNLE:
8880 	  if (code == LE && TARGET_IEEE_FP)
8881 	    {
8882 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8883 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8884 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8885 	      intcmp_mode = CCmode;
8886 	      code = LTU;
8887 	    }
8888 	  else
8889 	    {
8890 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8891 	      code = NE;
8892 	    }
8893 	  break;
8894 	case EQ:
8895 	case UNEQ:
8896 	  if (code == EQ && TARGET_IEEE_FP)
8897 	    {
8898 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8899 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8900 	      intcmp_mode = CCmode;
8901 	      code = EQ;
8902 	    }
8903 	  else
8904 	    {
8905 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8906 	      code = NE;
8907 	      break;
8908 	    }
8909 	  break;
8910 	case NE:
8911 	case LTGT:
8912 	  if (code == NE && TARGET_IEEE_FP)
8913 	    {
8914 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8915 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8916 					     GEN_INT (0x40)));
8917 	      code = NE;
8918 	    }
8919 	  else
8920 	    {
8921 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8922 	      code = EQ;
8923 	    }
8924 	  break;
8925 
8926 	case UNORDERED:
8927 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8928 	  code = NE;
8929 	  break;
8930 	case ORDERED:
8931 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8932 	  code = EQ;
8933 	  break;
8934 
8935 	default:
8936 	  abort ();
8937 	}
8938     }
8939 
8940   /* Return the test that should be put into the flags user, i.e.
8941      the bcc, scc, or cmov instruction.  */
8942   return gen_rtx_fmt_ee (code, VOIDmode,
8943 			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8944 			 const0_rtx);
8945 }
8946 
8947 rtx
ix86_expand_compare(code,second_test,bypass_test)8948 ix86_expand_compare (code, second_test, bypass_test)
8949      enum rtx_code code;
8950      rtx *second_test, *bypass_test;
8951 {
8952   rtx op0, op1, ret;
8953   op0 = ix86_compare_op0;
8954   op1 = ix86_compare_op1;
8955 
8956   if (second_test)
8957     *second_test = NULL_RTX;
8958   if (bypass_test)
8959     *bypass_test = NULL_RTX;
8960 
8961   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8962     ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8963 				  second_test, bypass_test);
8964   else
8965     ret = ix86_expand_int_compare (code, op0, op1);
8966 
8967   return ret;
8968 }
8969 
8970 /* Return true if the CODE will result in nontrivial jump sequence.  */
8971 bool
ix86_fp_jump_nontrivial_p(code)8972 ix86_fp_jump_nontrivial_p (code)
8973     enum rtx_code code;
8974 {
8975   enum rtx_code bypass_code, first_code, second_code;
8976   if (!TARGET_CMOVE)
8977     return true;
8978   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8979   return bypass_code != NIL || second_code != NIL;
8980 }
8981 
8982 void
ix86_expand_branch(code,label)8983 ix86_expand_branch (code, label)
8984      enum rtx_code code;
8985      rtx label;
8986 {
8987   rtx tmp;
8988 
8989   switch (GET_MODE (ix86_compare_op0))
8990     {
8991     case QImode:
8992     case HImode:
8993     case SImode:
8994       simple:
8995       tmp = ix86_expand_compare (code, NULL, NULL);
8996       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8997 				  gen_rtx_LABEL_REF (VOIDmode, label),
8998 				  pc_rtx);
8999       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9000       return;
9001 
9002     case SFmode:
9003     case DFmode:
9004     case XFmode:
9005     case TFmode:
9006       {
9007 	rtvec vec;
9008 	int use_fcomi;
9009 	enum rtx_code bypass_code, first_code, second_code;
9010 
9011 	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9012 					     &ix86_compare_op1);
9013 
9014 	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9015 
9016 	/* Check whether we will use the natural sequence with one jump.  If
9017 	   so, we can expand jump early.  Otherwise delay expansion by
9018 	   creating compound insn to not confuse optimizers.  */
9019 	if (bypass_code == NIL && second_code == NIL
9020 	    && TARGET_CMOVE)
9021 	  {
9022 	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9023 				  gen_rtx_LABEL_REF (VOIDmode, label),
9024 				  pc_rtx, NULL_RTX);
9025 	  }
9026 	else
9027 	  {
9028 	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
9029 				  ix86_compare_op0, ix86_compare_op1);
9030 	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9031 					gen_rtx_LABEL_REF (VOIDmode, label),
9032 					pc_rtx);
9033 	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9034 
9035 	    use_fcomi = ix86_use_fcomi_compare (code);
9036 	    vec = rtvec_alloc (3 + !use_fcomi);
9037 	    RTVEC_ELT (vec, 0) = tmp;
9038 	    RTVEC_ELT (vec, 1)
9039 	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9040 	    RTVEC_ELT (vec, 2)
9041 	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9042 	    if (! use_fcomi)
9043 	      RTVEC_ELT (vec, 3)
9044 		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9045 
9046 	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9047 	  }
9048 	return;
9049       }
9050 
9051     case DImode:
9052       if (TARGET_64BIT)
9053 	goto simple;
9054       /* Expand DImode branch into multiple compare+branch.  */
9055       {
9056 	rtx lo[2], hi[2], label2;
9057 	enum rtx_code code1, code2, code3;
9058 
9059 	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9060 	  {
9061 	    tmp = ix86_compare_op0;
9062 	    ix86_compare_op0 = ix86_compare_op1;
9063 	    ix86_compare_op1 = tmp;
9064 	    code = swap_condition (code);
9065 	  }
9066 	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9067 	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9068 
9069 	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9070 	   avoid two branches.  This costs one extra insn, so disable when
9071 	   optimizing for size.  */
9072 
9073 	if ((code == EQ || code == NE)
9074 	    && (!optimize_size
9075 	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
9076 	  {
9077 	    rtx xor0, xor1;
9078 
9079 	    xor1 = hi[0];
9080 	    if (hi[1] != const0_rtx)
9081 	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9082 				   NULL_RTX, 0, OPTAB_WIDEN);
9083 
9084 	    xor0 = lo[0];
9085 	    if (lo[1] != const0_rtx)
9086 	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9087 				   NULL_RTX, 0, OPTAB_WIDEN);
9088 
9089 	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9090 				NULL_RTX, 0, OPTAB_WIDEN);
9091 
9092 	    ix86_compare_op0 = tmp;
9093 	    ix86_compare_op1 = const0_rtx;
9094 	    ix86_expand_branch (code, label);
9095 	    return;
9096 	  }
9097 
9098 	/* Otherwise, if we are doing less-than or greater-or-equal-than,
9099 	   op1 is a constant and the low word is zero, then we can just
9100 	   examine the high word.  */
9101 
9102 	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9103 	  switch (code)
9104 	    {
9105 	    case LT: case LTU: case GE: case GEU:
9106 	      ix86_compare_op0 = hi[0];
9107 	      ix86_compare_op1 = hi[1];
9108 	      ix86_expand_branch (code, label);
9109 	      return;
9110 	    default:
9111 	      break;
9112 	    }
9113 
9114 	/* Otherwise, we need two or three jumps.  */
9115 
9116 	label2 = gen_label_rtx ();
9117 
9118 	code1 = code;
9119 	code2 = swap_condition (code);
9120 	code3 = unsigned_condition (code);
9121 
9122 	switch (code)
9123 	  {
9124 	  case LT: case GT: case LTU: case GTU:
9125 	    break;
9126 
9127 	  case LE:   code1 = LT;  code2 = GT;  break;
9128 	  case GE:   code1 = GT;  code2 = LT;  break;
9129 	  case LEU:  code1 = LTU; code2 = GTU; break;
9130 	  case GEU:  code1 = GTU; code2 = LTU; break;
9131 
9132 	  case EQ:   code1 = NIL; code2 = NE;  break;
9133 	  case NE:   code2 = NIL; break;
9134 
9135 	  default:
9136 	    abort ();
9137 	  }
9138 
9139 	/*
9140 	 * a < b =>
9141 	 *    if (hi(a) < hi(b)) goto true;
9142 	 *    if (hi(a) > hi(b)) goto false;
9143 	 *    if (lo(a) < lo(b)) goto true;
9144 	 *  false:
9145 	 */
9146 
9147 	ix86_compare_op0 = hi[0];
9148 	ix86_compare_op1 = hi[1];
9149 
9150 	if (code1 != NIL)
9151 	  ix86_expand_branch (code1, label);
9152 	if (code2 != NIL)
9153 	  ix86_expand_branch (code2, label2);
9154 
9155 	ix86_compare_op0 = lo[0];
9156 	ix86_compare_op1 = lo[1];
9157 	ix86_expand_branch (code3, label);
9158 
9159 	if (code2 != NIL)
9160 	  emit_label (label2);
9161 	return;
9162       }
9163 
9164     default:
9165       abort ();
9166     }
9167 }
9168 
9169 /* Split branch based on floating point condition.  */
9170 void
ix86_split_fp_branch(code,op1,op2,target1,target2,tmp)9171 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9172      enum rtx_code code;
9173      rtx op1, op2, target1, target2, tmp;
9174 {
9175   rtx second, bypass;
9176   rtx label = NULL_RTX;
9177   rtx condition;
9178   int bypass_probability = -1, second_probability = -1, probability = -1;
9179   rtx i;
9180 
9181   if (target2 != pc_rtx)
9182     {
9183       rtx tmp = target2;
9184       code = reverse_condition_maybe_unordered (code);
9185       target2 = target1;
9186       target1 = tmp;
9187     }
9188 
9189   condition = ix86_expand_fp_compare (code, op1, op2,
9190 				      tmp, &second, &bypass);
9191 
9192   if (split_branch_probability >= 0)
9193     {
9194       /* Distribute the probabilities across the jumps.
9195 	 Assume the BYPASS and SECOND to be always test
9196 	 for UNORDERED.  */
9197       probability = split_branch_probability;
9198 
9199       /* Value of 1 is low enough to make no need for probability
9200 	 to be updated.  Later we may run some experiments and see
9201 	 if unordered values are more frequent in practice.  */
9202       if (bypass)
9203 	bypass_probability = 1;
9204       if (second)
9205 	second_probability = 1;
9206     }
9207   if (bypass != NULL_RTX)
9208     {
9209       label = gen_label_rtx ();
9210       i = emit_jump_insn (gen_rtx_SET
9211 			  (VOIDmode, pc_rtx,
9212 			   gen_rtx_IF_THEN_ELSE (VOIDmode,
9213 						 bypass,
9214 						 gen_rtx_LABEL_REF (VOIDmode,
9215 								    label),
9216 						 pc_rtx)));
9217       if (bypass_probability >= 0)
9218 	REG_NOTES (i)
9219 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9220 			       GEN_INT (bypass_probability),
9221 			       REG_NOTES (i));
9222     }
9223   i = emit_jump_insn (gen_rtx_SET
9224 		      (VOIDmode, pc_rtx,
9225 		       gen_rtx_IF_THEN_ELSE (VOIDmode,
9226 					     condition, target1, target2)));
9227   if (probability >= 0)
9228     REG_NOTES (i)
9229       = gen_rtx_EXPR_LIST (REG_BR_PROB,
9230 			   GEN_INT (probability),
9231 			   REG_NOTES (i));
9232   if (second != NULL_RTX)
9233     {
9234       i = emit_jump_insn (gen_rtx_SET
9235 			  (VOIDmode, pc_rtx,
9236 			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9237 						 target2)));
9238       if (second_probability >= 0)
9239 	REG_NOTES (i)
9240 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9241 			       GEN_INT (second_probability),
9242 			       REG_NOTES (i));
9243     }
9244   if (label != NULL_RTX)
9245     emit_label (label);
9246 }
9247 
9248 int
ix86_expand_setcc(code,dest)9249 ix86_expand_setcc (code, dest)
9250      enum rtx_code code;
9251      rtx dest;
9252 {
9253   rtx ret, tmp, tmpreg;
9254   rtx second_test, bypass_test;
9255 
9256   if (GET_MODE (ix86_compare_op0) == DImode
9257       && !TARGET_64BIT)
9258     return 0; /* FAIL */
9259 
9260   if (GET_MODE (dest) != QImode)
9261     abort ();
9262 
9263   ret = ix86_expand_compare (code, &second_test, &bypass_test);
9264   PUT_MODE (ret, QImode);
9265 
9266   tmp = dest;
9267   tmpreg = dest;
9268 
9269   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9270   if (bypass_test || second_test)
9271     {
9272       rtx test = second_test;
9273       int bypass = 0;
9274       rtx tmp2 = gen_reg_rtx (QImode);
9275       if (bypass_test)
9276 	{
9277 	  if (second_test)
9278 	    abort ();
9279 	  test = bypass_test;
9280 	  bypass = 1;
9281 	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9282 	}
9283       PUT_MODE (test, QImode);
9284       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9285 
9286       if (bypass)
9287 	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9288       else
9289 	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9290     }
9291 
9292   return 1; /* DONE */
9293 }
9294 
9295 int
ix86_expand_int_movcc(operands)9296 ix86_expand_int_movcc (operands)
9297      rtx operands[];
9298 {
9299   enum rtx_code code = GET_CODE (operands[1]), compare_code;
9300   rtx compare_seq, compare_op;
9301   rtx second_test, bypass_test;
9302   enum machine_mode mode = GET_MODE (operands[0]);
9303 
9304   /* When the compare code is not LTU or GEU, we can not use sbbl case.
9305      In case comparsion is done with immediate, we can convert it to LTU or
9306      GEU by altering the integer.  */
9307 
9308   if ((code == LEU || code == GTU)
9309       && GET_CODE (ix86_compare_op1) == CONST_INT
9310       && mode != HImode
9311       && INTVAL (ix86_compare_op1) != -1
9312       /* For x86-64, the immediate field in the instruction is 32-bit
9313 	 signed, so we can't increment a DImode value above 0x7fffffff.  */
9314       && (!TARGET_64BIT
9315 	  || GET_MODE (ix86_compare_op0) != DImode
9316 	  || INTVAL (ix86_compare_op1) != 0x7fffffff)
9317       && GET_CODE (operands[2]) == CONST_INT
9318       && GET_CODE (operands[3]) == CONST_INT)
9319     {
9320       if (code == LEU)
9321 	code = LTU;
9322       else
9323 	code = GEU;
9324       ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9325 				       GET_MODE (ix86_compare_op0));
9326     }
9327 
9328   start_sequence ();
9329   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9330   compare_seq = get_insns ();
9331   end_sequence ();
9332 
9333   compare_code = GET_CODE (compare_op);
9334 
9335   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9336      HImode insns, we'd be swallowed in word prefix ops.  */
9337 
9338   if (mode != HImode
9339       && (mode != DImode || TARGET_64BIT)
9340       && GET_CODE (operands[2]) == CONST_INT
9341       && GET_CODE (operands[3]) == CONST_INT)
9342     {
9343       rtx out = operands[0];
9344       HOST_WIDE_INT ct = INTVAL (operands[2]);
9345       HOST_WIDE_INT cf = INTVAL (operands[3]);
9346       HOST_WIDE_INT diff;
9347 
9348       if ((compare_code == LTU || compare_code == GEU)
9349 	  && !second_test && !bypass_test)
9350 	{
9351 	  /* Detect overlap between destination and compare sources.  */
9352 	  rtx tmp = out;
9353 
9354 	  /* To simplify rest of code, restrict to the GEU case.  */
9355 	  if (compare_code == LTU)
9356 	    {
9357 	      HOST_WIDE_INT tmp = ct;
9358 	      ct = cf;
9359 	      cf = tmp;
9360 	      compare_code = reverse_condition (compare_code);
9361 	      code = reverse_condition (code);
9362 	    }
9363 	  diff = ct - cf;
9364 
9365 	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9366 	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
9367 	    tmp = gen_reg_rtx (mode);
9368 
9369 	  emit_insn (compare_seq);
9370 	  if (mode == DImode)
9371 	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9372 	  else
9373 	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
9374 
9375 	  if (diff == 1)
9376 	    {
9377 	      /*
9378 	       * cmpl op0,op1
9379 	       * sbbl dest,dest
9380 	       * [addl dest, ct]
9381 	       *
9382 	       * Size 5 - 8.
9383 	       */
9384 	      if (ct)
9385 	       	tmp = expand_simple_binop (mode, PLUS,
9386 					   tmp, GEN_INT (ct),
9387 					   tmp, 1, OPTAB_DIRECT);
9388 	    }
9389 	  else if (cf == -1)
9390 	    {
9391 	      /*
9392 	       * cmpl op0,op1
9393 	       * sbbl dest,dest
9394 	       * orl $ct, dest
9395 	       *
9396 	       * Size 8.
9397 	       */
9398 	      tmp = expand_simple_binop (mode, IOR,
9399 					 tmp, GEN_INT (ct),
9400 					 tmp, 1, OPTAB_DIRECT);
9401 	    }
9402 	  else if (diff == -1 && ct)
9403 	    {
9404 	      /*
9405 	       * cmpl op0,op1
9406 	       * sbbl dest,dest
9407 	       * notl dest
9408 	       * [addl dest, cf]
9409 	       *
9410 	       * Size 8 - 11.
9411 	       */
9412 	      tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9413 	      if (cf)
9414 	       	tmp = expand_simple_binop (mode, PLUS,
9415 					   tmp, GEN_INT (cf),
9416 					   tmp, 1, OPTAB_DIRECT);
9417 	    }
9418 	  else
9419 	    {
9420 	      /*
9421 	       * cmpl op0,op1
9422 	       * sbbl dest,dest
9423 	       * [notl dest]
9424 	       * andl cf - ct, dest
9425 	       * [addl dest, ct]
9426 	       *
9427 	       * Size 8 - 11.
9428 	       */
9429 
9430 	      if (cf == 0)
9431 		{
9432 		  cf = ct;
9433 		  ct = 0;
9434 		  tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9435 		}
9436 
9437 	      tmp = expand_simple_binop (mode, AND,
9438 					 tmp,
9439 					 gen_int_mode (cf - ct, mode),
9440 					 tmp, 1, OPTAB_DIRECT);
9441 	      if (ct)
9442 	       	tmp = expand_simple_binop (mode, PLUS,
9443 					   tmp, GEN_INT (ct),
9444 					   tmp, 1, OPTAB_DIRECT);
9445 	    }
9446 
9447 	  if (tmp != out)
9448 	    emit_move_insn (out, tmp);
9449 
9450 	  return 1; /* DONE */
9451 	}
9452 
9453       diff = ct - cf;
9454       if (diff < 0)
9455 	{
9456 	  HOST_WIDE_INT tmp;
9457 	  tmp = ct, ct = cf, cf = tmp;
9458 	  diff = -diff;
9459 	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9460 	    {
9461 	      /* We may be reversing unordered compare to normal compare, that
9462 		 is not valid in general (we may convert non-trapping condition
9463 		 to trapping one), however on i386 we currently emit all
9464 		 comparisons unordered.  */
9465 	      compare_code = reverse_condition_maybe_unordered (compare_code);
9466 	      code = reverse_condition_maybe_unordered (code);
9467 	    }
9468 	  else
9469 	    {
9470 	      compare_code = reverse_condition (compare_code);
9471 	      code = reverse_condition (code);
9472 	    }
9473 	}
9474 
9475       compare_code = NIL;
9476       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9477 	  && GET_CODE (ix86_compare_op1) == CONST_INT)
9478 	{
9479 	  if (ix86_compare_op1 == const0_rtx
9480 	      && (code == LT || code == GE))
9481 	    compare_code = code;
9482 	  else if (ix86_compare_op1 == constm1_rtx)
9483 	    {
9484 	      if (code == LE)
9485 		compare_code = LT;
9486 	      else if (code == GT)
9487 		compare_code = GE;
9488 	    }
9489 	}
9490 
9491       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
9492       if (compare_code != NIL
9493 	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9494 	  && (cf == -1 || ct == -1))
9495 	{
9496 	  /* If lea code below could be used, only optimize
9497 	     if it results in a 2 insn sequence.  */
9498 
9499 	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9500 		 || diff == 3 || diff == 5 || diff == 9)
9501 	      || (compare_code == LT && ct == -1)
9502 	      || (compare_code == GE && cf == -1))
9503 	    {
9504 	      /*
9505 	       * notl op1	(if necessary)
9506 	       * sarl $31, op1
9507 	       * orl cf, op1
9508 	       */
9509 	      if (ct != -1)
9510 		{
9511 		  cf = ct;
9512 	  	  ct = -1;
9513 		  code = reverse_condition (code);
9514 		}
9515 
9516 	      out = emit_store_flag (out, code, ix86_compare_op0,
9517 				     ix86_compare_op1, VOIDmode, 0, -1);
9518 
9519 	      out = expand_simple_binop (mode, IOR,
9520 					 out, GEN_INT (cf),
9521 					 out, 1, OPTAB_DIRECT);
9522 	      if (out != operands[0])
9523 		emit_move_insn (operands[0], out);
9524 
9525 	      return 1; /* DONE */
9526 	    }
9527 	}
9528 
9529       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9530 	   || diff == 3 || diff == 5 || diff == 9)
9531 	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9532 	{
9533 	  /*
9534 	   * xorl dest,dest
9535 	   * cmpl op1,op2
9536 	   * setcc dest
9537 	   * lea cf(dest*(ct-cf)),dest
9538 	   *
9539 	   * Size 14.
9540 	   *
9541 	   * This also catches the degenerate setcc-only case.
9542 	   */
9543 
9544 	  rtx tmp;
9545 	  int nops;
9546 
9547 	  out = emit_store_flag (out, code, ix86_compare_op0,
9548 				 ix86_compare_op1, VOIDmode, 0, 1);
9549 
9550 	  nops = 0;
9551 	  /* On x86_64 the lea instruction operates on Pmode, so we need
9552 	     to get arithmetics done in proper mode to match.  */
9553 	  if (diff == 1)
9554 	    tmp = copy_rtx (out);
9555 	  else
9556 	    {
9557 	      rtx out1;
9558 	      out1 = copy_rtx (out);
9559 	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9560 	      nops++;
9561 	      if (diff & 1)
9562 		{
9563 		  tmp = gen_rtx_PLUS (mode, tmp, out1);
9564 		  nops++;
9565 		}
9566 	    }
9567 	  if (cf != 0)
9568 	    {
9569 	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9570 	      nops++;
9571 	    }
9572 	  if (tmp != out
9573 	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9574 	    {
9575 	      if (nops == 1)
9576 		out = force_operand (tmp, copy_rtx (out));
9577 	      else
9578 		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9579 	    }
9580 	  if (out != operands[0])
9581 	    emit_move_insn (operands[0], copy_rtx (out));
9582 
9583 	  return 1; /* DONE */
9584 	}
9585 
9586       /*
9587        * General case:			Jumpful:
9588        *   xorl dest,dest		cmpl op1, op2
9589        *   cmpl op1, op2		movl ct, dest
9590        *   setcc dest			jcc 1f
9591        *   decl dest			movl cf, dest
9592        *   andl (cf-ct),dest		1:
9593        *   addl ct,dest
9594        *
9595        * Size 20.			Size 14.
9596        *
9597        * This is reasonably steep, but branch mispredict costs are
9598        * high on modern cpus, so consider failing only if optimizing
9599        * for space.
9600        *
9601        * %%% Parameterize branch_cost on the tuning architecture, then
9602        * use that.  The 80386 couldn't care less about mispredicts.
9603        */
9604 
9605       if (!optimize_size && !TARGET_CMOVE)
9606 	{
9607 	  if (cf == 0)
9608 	    {
9609 	      cf = ct;
9610 	      ct = 0;
9611 	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9612 		/* We may be reversing unordered compare to normal compare,
9613 		   that is not valid in general (we may convert non-trapping
9614 		   condition to trapping one), however on i386 we currently
9615 		   emit all comparisons unordered.  */
9616 		code = reverse_condition_maybe_unordered (code);
9617 	      else
9618 		{
9619 		  code = reverse_condition (code);
9620 		  if (compare_code != NIL)
9621 		    compare_code = reverse_condition (compare_code);
9622 		}
9623 	    }
9624 
9625 	  if (compare_code != NIL)
9626 	    {
9627 	      /* notl op1	(if needed)
9628 		 sarl $31, op1
9629 		 andl (cf-ct), op1
9630 	 	 addl ct, op1
9631 
9632 		 For x < 0 (resp. x <= -1) there will be no notl,
9633 		 so if possible swap the constants to get rid of the
9634 		 complement.
9635 		 True/false will be -1/0 while code below (store flag
9636 		 followed by decrement) is 0/-1, so the constants need
9637 		 to be exchanged once more.  */
9638 
9639 	      if (compare_code == GE || !cf)
9640 		{
9641 	  	  code = reverse_condition (code);
9642 		  compare_code = LT;
9643 		}
9644 	      else
9645 		{
9646 		  HOST_WIDE_INT tmp = cf;
9647 	  	  cf = ct;
9648 		  ct = tmp;
9649 		}
9650 
9651 	      out = emit_store_flag (out, code, ix86_compare_op0,
9652 				     ix86_compare_op1, VOIDmode, 0, -1);
9653 	    }
9654 	  else
9655 	    {
9656 	      out = emit_store_flag (out, code, ix86_compare_op0,
9657 				     ix86_compare_op1, VOIDmode, 0, 1);
9658 
9659 	      out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9660 					 out, 1, OPTAB_DIRECT);
9661 	    }
9662 
9663 	  out = expand_simple_binop (mode, AND, out,
9664 				     gen_int_mode (cf - ct, mode),
9665 				     out, 1, OPTAB_DIRECT);
9666 	  if (ct)
9667 	    out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9668 				       out, 1, OPTAB_DIRECT);
9669 	  if (out != operands[0])
9670 	    emit_move_insn (operands[0], out);
9671 
9672 	  return 1; /* DONE */
9673 	}
9674     }
9675 
9676   if (!TARGET_CMOVE)
9677     {
9678       /* Try a few things more with specific constants and a variable.  */
9679 
9680       optab op;
9681       rtx var, orig_out, out, tmp;
9682 
9683       if (optimize_size)
9684 	return 0; /* FAIL */
9685 
9686       /* If one of the two operands is an interesting constant, load a
9687 	 constant with the above and mask it in with a logical operation.  */
9688 
9689       if (GET_CODE (operands[2]) == CONST_INT)
9690 	{
9691 	  var = operands[3];
9692 	  if (INTVAL (operands[2]) == 0)
9693 	    operands[3] = constm1_rtx, op = and_optab;
9694 	  else if (INTVAL (operands[2]) == -1)
9695 	    operands[3] = const0_rtx, op = ior_optab;
9696 	  else
9697 	    return 0; /* FAIL */
9698 	}
9699       else if (GET_CODE (operands[3]) == CONST_INT)
9700 	{
9701 	  var = operands[2];
9702 	  if (INTVAL (operands[3]) == 0)
9703 	    operands[2] = constm1_rtx, op = and_optab;
9704 	  else if (INTVAL (operands[3]) == -1)
9705 	    operands[2] = const0_rtx, op = ior_optab;
9706 	  else
9707 	    return 0; /* FAIL */
9708 	}
9709       else
9710         return 0; /* FAIL */
9711 
9712       orig_out = operands[0];
9713       tmp = gen_reg_rtx (mode);
9714       operands[0] = tmp;
9715 
9716       /* Recurse to get the constant loaded.  */
9717       if (ix86_expand_int_movcc (operands) == 0)
9718         return 0; /* FAIL */
9719 
9720       /* Mask in the interesting variable.  */
9721       out = expand_binop (mode, op, var, tmp, orig_out, 0,
9722 			  OPTAB_WIDEN);
9723       if (out != orig_out)
9724 	emit_move_insn (orig_out, out);
9725 
9726       return 1; /* DONE */
9727     }
9728 
9729   /*
9730    * For comparison with above,
9731    *
9732    * movl cf,dest
9733    * movl ct,tmp
9734    * cmpl op1,op2
9735    * cmovcc tmp,dest
9736    *
9737    * Size 15.
9738    */
9739 
9740   if (! nonimmediate_operand (operands[2], mode))
9741     operands[2] = force_reg (mode, operands[2]);
9742   if (! nonimmediate_operand (operands[3], mode))
9743     operands[3] = force_reg (mode, operands[3]);
9744 
9745   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9746     {
9747       rtx tmp = gen_reg_rtx (mode);
9748       emit_move_insn (tmp, operands[3]);
9749       operands[3] = tmp;
9750     }
9751   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9752     {
9753       rtx tmp = gen_reg_rtx (mode);
9754       emit_move_insn (tmp, operands[2]);
9755       operands[2] = tmp;
9756     }
9757   if (! register_operand (operands[2], VOIDmode)
9758       && ! register_operand (operands[3], VOIDmode))
9759     operands[2] = force_reg (mode, operands[2]);
9760 
9761   emit_insn (compare_seq);
9762   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9763 			  gen_rtx_IF_THEN_ELSE (mode,
9764 						compare_op, operands[2],
9765 						operands[3])));
9766   if (bypass_test)
9767     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9768 			    gen_rtx_IF_THEN_ELSE (mode,
9769 				  bypass_test,
9770 				  operands[3],
9771 				  operands[0])));
9772   if (second_test)
9773     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9774 			    gen_rtx_IF_THEN_ELSE (mode,
9775 				  second_test,
9776 				  operands[2],
9777 				  operands[0])));
9778 
9779   return 1; /* DONE */
9780 }
9781 
9782 int
ix86_expand_fp_movcc(operands)9783 ix86_expand_fp_movcc (operands)
9784      rtx operands[];
9785 {
9786   enum rtx_code code;
9787   rtx tmp;
9788   rtx compare_op, second_test, bypass_test;
9789 
9790   /* For SF/DFmode conditional moves based on comparisons
9791      in same mode, we may want to use SSE min/max instructions.  */
9792   if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9793        || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9794       && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9795       /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
9796       && (!TARGET_IEEE_FP
9797 	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9798       /* We may be called from the post-reload splitter.  */
9799       && (!REG_P (operands[0])
9800 	  || SSE_REG_P (operands[0])
9801 	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9802     {
9803       rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9804       code = GET_CODE (operands[1]);
9805 
9806       /* See if we have (cross) match between comparison operands and
9807          conditional move operands.  */
9808       if (rtx_equal_p (operands[2], op1))
9809 	{
9810 	  rtx tmp = op0;
9811 	  op0 = op1;
9812 	  op1 = tmp;
9813 	  code = reverse_condition_maybe_unordered (code);
9814 	}
9815       if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9816 	{
9817 	  /* Check for min operation.  */
9818 	  if (code == LT)
9819 	    {
9820 	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9821 	       if (memory_operand (op0, VOIDmode))
9822 		 op0 = force_reg (GET_MODE (operands[0]), op0);
9823 	       if (GET_MODE (operands[0]) == SFmode)
9824 		 emit_insn (gen_minsf3 (operands[0], op0, op1));
9825 	       else
9826 		 emit_insn (gen_mindf3 (operands[0], op0, op1));
9827 	       return 1;
9828 	    }
9829 	  /* Check for max operation.  */
9830 	  if (code == GT)
9831 	    {
9832 	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9833 	       if (memory_operand (op0, VOIDmode))
9834 		 op0 = force_reg (GET_MODE (operands[0]), op0);
9835 	       if (GET_MODE (operands[0]) == SFmode)
9836 		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9837 	       else
9838 		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9839 	       return 1;
9840 	    }
9841 	}
9842       /* Manage condition to be sse_comparison_operator.  In case we are
9843 	 in non-ieee mode, try to canonicalize the destination operand
9844 	 to be first in the comparison - this helps reload to avoid extra
9845 	 moves.  */
9846       if (!sse_comparison_operator (operands[1], VOIDmode)
9847 	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9848 	{
9849 	  rtx tmp = ix86_compare_op0;
9850 	  ix86_compare_op0 = ix86_compare_op1;
9851 	  ix86_compare_op1 = tmp;
9852 	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9853 					VOIDmode, ix86_compare_op0,
9854 					ix86_compare_op1);
9855 	}
9856       /* Similary try to manage result to be first operand of conditional
9857 	 move. We also don't support the NE comparison on SSE, so try to
9858 	 avoid it.  */
9859       if ((rtx_equal_p (operands[0], operands[3])
9860 	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9861 	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9862 	{
9863 	  rtx tmp = operands[2];
9864 	  operands[2] = operands[3];
9865 	  operands[3] = tmp;
9866 	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9867 					  (GET_CODE (operands[1])),
9868 					VOIDmode, ix86_compare_op0,
9869 					ix86_compare_op1);
9870 	}
9871       if (GET_MODE (operands[0]) == SFmode)
9872 	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9873 				    operands[2], operands[3],
9874 				    ix86_compare_op0, ix86_compare_op1));
9875       else
9876 	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9877 				    operands[2], operands[3],
9878 				    ix86_compare_op0, ix86_compare_op1));
9879       return 1;
9880     }
9881 
9882   /* The floating point conditional move instructions don't directly
9883      support conditions resulting from a signed integer comparison.  */
9884 
9885   code = GET_CODE (operands[1]);
9886   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9887 
9888   /* The floating point conditional move instructions don't directly
9889      support signed integer comparisons.  */
9890 
9891   if (!fcmov_comparison_operator (compare_op, VOIDmode))
9892     {
9893       if (second_test != NULL || bypass_test != NULL)
9894 	abort ();
9895       tmp = gen_reg_rtx (QImode);
9896       ix86_expand_setcc (code, tmp);
9897       code = NE;
9898       ix86_compare_op0 = tmp;
9899       ix86_compare_op1 = const0_rtx;
9900       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
9901     }
9902   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9903     {
9904       tmp = gen_reg_rtx (GET_MODE (operands[0]));
9905       emit_move_insn (tmp, operands[3]);
9906       operands[3] = tmp;
9907     }
9908   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9909     {
9910       tmp = gen_reg_rtx (GET_MODE (operands[0]));
9911       emit_move_insn (tmp, operands[2]);
9912       operands[2] = tmp;
9913     }
9914 
9915   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9916 			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9917 				compare_op,
9918 				operands[2],
9919 				operands[3])));
9920   if (bypass_test)
9921     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9922 			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9923 				  bypass_test,
9924 				  operands[3],
9925 				  operands[0])));
9926   if (second_test)
9927     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9928 			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9929 				  second_test,
9930 				  operands[2],
9931 				  operands[0])));
9932 
9933   return 1;
9934 }
9935 
9936 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
9937    works for floating pointer parameters and nonoffsetable memories.
9938    For pushes, it returns just stack offsets; the values will be saved
9939    in the right order.  Maximally three parts are generated.  */
9940 
9941 static int
ix86_split_to_parts(operand,parts,mode)9942 ix86_split_to_parts (operand, parts, mode)
9943      rtx operand;
9944      rtx *parts;
9945      enum machine_mode mode;
9946 {
9947   int size;
9948 
9949   if (!TARGET_64BIT)
9950     size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9951   else
9952     size = (GET_MODE_SIZE (mode) + 4) / 8;
9953 
9954   if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9955     abort ();
9956   if (size < 2 || size > 3)
9957     abort ();
9958 
9959   /* Optimize constant pool reference to immediates.  This is used by fp
9960      moves, that force all constants to memory to allow combining.  */
9961   if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9962     {
9963       rtx tmp = maybe_get_pool_constant (operand);
9964       if (tmp)
9965 	operand = tmp;
9966     }
9967 
9968   if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9969     {
9970       /* The only non-offsetable memories we handle are pushes.  */
9971       if (! push_operand (operand, VOIDmode))
9972 	abort ();
9973 
9974       operand = copy_rtx (operand);
9975       PUT_MODE (operand, Pmode);
9976       parts[0] = parts[1] = parts[2] = operand;
9977     }
9978   else if (!TARGET_64BIT)
9979     {
9980       if (mode == DImode)
9981 	split_di (&operand, 1, &parts[0], &parts[1]);
9982       else
9983 	{
9984 	  if (REG_P (operand))
9985 	    {
9986 	      if (!reload_completed)
9987 		abort ();
9988 	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9989 	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9990 	      if (size == 3)
9991 		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9992 	    }
9993 	  else if (offsettable_memref_p (operand))
9994 	    {
9995 	      operand = adjust_address (operand, SImode, 0);
9996 	      parts[0] = operand;
9997 	      parts[1] = adjust_address (operand, SImode, 4);
9998 	      if (size == 3)
9999 		parts[2] = adjust_address (operand, SImode, 8);
10000 	    }
10001 	  else if (GET_CODE (operand) == CONST_DOUBLE)
10002 	    {
10003 	      REAL_VALUE_TYPE r;
10004 	      long l[4];
10005 
10006 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10007 	      switch (mode)
10008 		{
10009 		case XFmode:
10010 		case TFmode:
10011 		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10012 		  parts[2] = gen_int_mode (l[2], SImode);
10013 		  break;
10014 		case DFmode:
10015 		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10016 		  break;
10017 		default:
10018 		  abort ();
10019 		}
10020 	      parts[1] = gen_int_mode (l[1], SImode);
10021 	      parts[0] = gen_int_mode (l[0], SImode);
10022 	    }
10023 	  else
10024 	    abort ();
10025 	}
10026     }
10027   else
10028     {
10029       if (mode == TImode)
10030 	split_ti (&operand, 1, &parts[0], &parts[1]);
10031       if (mode == XFmode || mode == TFmode)
10032 	{
10033 	  if (REG_P (operand))
10034 	    {
10035 	      if (!reload_completed)
10036 		abort ();
10037 	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10038 	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10039 	    }
10040 	  else if (offsettable_memref_p (operand))
10041 	    {
10042 	      operand = adjust_address (operand, DImode, 0);
10043 	      parts[0] = operand;
10044 	      parts[1] = adjust_address (operand, SImode, 8);
10045 	    }
10046 	  else if (GET_CODE (operand) == CONST_DOUBLE)
10047 	    {
10048 	      REAL_VALUE_TYPE r;
10049 	      long l[3];
10050 
10051 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10052 	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10053 	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
10054 	      if (HOST_BITS_PER_WIDE_INT >= 64)
10055 	        parts[0]
10056 		  = gen_int_mode
10057 		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10058 		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10059 		       DImode);
10060 	      else
10061 	        parts[0] = immed_double_const (l[0], l[1], DImode);
10062 	      parts[1] = gen_int_mode (l[2], SImode);
10063 	    }
10064 	  else
10065 	    abort ();
10066 	}
10067     }
10068 
10069   return size;
10070 }
10071 
10072 /* Emit insns to perform a move or push of DI, DF, and XF values.
10073    Return false when normal moves are needed; true when all required
10074    insns have been emitted.  Operands 2-4 contain the input values
10075    int the correct order; operands 5-7 contain the output values.  */
10076 
10077 void
ix86_split_long_move(operands)10078 ix86_split_long_move (operands)
10079      rtx operands[];
10080 {
10081   rtx part[2][3];
10082   int nparts;
10083   int push = 0;
10084   int collisions = 0;
10085   enum machine_mode mode = GET_MODE (operands[0]);
10086 
10087   /* The DFmode expanders may ask us to move double.
10088      For 64bit target this is single move.  By hiding the fact
10089      here we simplify i386.md splitters.  */
10090   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10091     {
10092       /* Optimize constant pool reference to immediates.  This is used by
10093 	 fp moves, that force all constants to memory to allow combining.  */
10094 
10095       if (GET_CODE (operands[1]) == MEM
10096 	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10097 	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10098 	operands[1] = get_pool_constant (XEXP (operands[1], 0));
10099       if (push_operand (operands[0], VOIDmode))
10100 	{
10101 	  operands[0] = copy_rtx (operands[0]);
10102 	  PUT_MODE (operands[0], Pmode);
10103 	}
10104       else
10105         operands[0] = gen_lowpart (DImode, operands[0]);
10106       operands[1] = gen_lowpart (DImode, operands[1]);
10107       emit_move_insn (operands[0], operands[1]);
10108       return;
10109     }
10110 
10111   /* The only non-offsettable memory we handle is push.  */
10112   if (push_operand (operands[0], VOIDmode))
10113     push = 1;
10114   else if (GET_CODE (operands[0]) == MEM
10115 	   && ! offsettable_memref_p (operands[0]))
10116     abort ();
10117 
10118   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10119   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10120 
10121   /* When emitting push, take care for source operands on the stack.  */
10122   if (push && GET_CODE (operands[1]) == MEM
10123       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10124     {
10125       if (nparts == 3)
10126 	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10127 				     XEXP (part[1][2], 0));
10128       part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10129 				   XEXP (part[1][1], 0));
10130     }
10131 
10132   /* We need to do copy in the right order in case an address register
10133      of the source overlaps the destination.  */
10134   if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10135     {
10136       if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10137 	collisions++;
10138       if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10139 	collisions++;
10140       if (nparts == 3
10141 	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10142 	collisions++;
10143 
10144       /* Collision in the middle part can be handled by reordering.  */
10145       if (collisions == 1 && nparts == 3
10146 	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10147 	{
10148 	  rtx tmp;
10149 	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10150 	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10151 	}
10152 
10153       /* If there are more collisions, we can't handle it by reordering.
10154 	 Do an lea to the last part and use only one colliding move.  */
10155       else if (collisions > 1)
10156 	{
10157 	  rtx base;
10158 
10159 	  collisions = 1;
10160 
10161 	  base = part[0][nparts - 1];
10162 
10163 	  /* Handle the case when the last part isn't valid for lea.
10164 	     Happens in 64-bit mode storing the 12-byte XFmode.  */
10165 	  if (GET_MODE (base) != Pmode)
10166 	    base = gen_rtx_REG (Pmode, REGNO (base));
10167 
10168 	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10169 	  part[1][0] = replace_equiv_address (part[1][0], base);
10170 	  part[1][1] = replace_equiv_address (part[1][1],
10171 				      plus_constant (base, UNITS_PER_WORD));
10172 	  if (nparts == 3)
10173 	    part[1][2] = replace_equiv_address (part[1][2],
10174 				      plus_constant (base, 8));
10175 	}
10176     }
10177 
10178   if (push)
10179     {
10180       if (!TARGET_64BIT)
10181 	{
10182 	  if (nparts == 3)
10183 	    {
10184 	      /* We use only first 12 bytes of TFmode value, but for pushing we
10185 		 are required to adjust stack as if we were pushing real 16byte
10186 		 value.  */
10187 	      if (mode == TFmode && !TARGET_64BIT)
10188 		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10189 				       GEN_INT (-4)));
10190 	      emit_move_insn (part[0][2], part[1][2]);
10191 	    }
10192 	}
10193       else
10194 	{
10195 	  /* In 64bit mode we don't have 32bit push available.  In case this is
10196 	     register, it is OK - we will just use larger counterpart.  We also
10197 	     retype memory - these comes from attempt to avoid REX prefix on
10198 	     moving of second half of TFmode value.  */
10199 	  if (GET_MODE (part[1][1]) == SImode)
10200 	    {
10201 	      if (GET_CODE (part[1][1]) == MEM)
10202 		part[1][1] = adjust_address (part[1][1], DImode, 0);
10203 	      else if (REG_P (part[1][1]))
10204 		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10205 	      else
10206 		abort ();
10207 	      if (GET_MODE (part[1][0]) == SImode)
10208 		part[1][0] = part[1][1];
10209 	    }
10210 	}
10211       emit_move_insn (part[0][1], part[1][1]);
10212       emit_move_insn (part[0][0], part[1][0]);
10213       return;
10214     }
10215 
10216   /* Choose correct order to not overwrite the source before it is copied.  */
10217   if ((REG_P (part[0][0])
10218        && REG_P (part[1][1])
10219        && (REGNO (part[0][0]) == REGNO (part[1][1])
10220 	   || (nparts == 3
10221 	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
10222       || (collisions > 0
10223 	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10224     {
10225       if (nparts == 3)
10226 	{
10227 	  operands[2] = part[0][2];
10228 	  operands[3] = part[0][1];
10229 	  operands[4] = part[0][0];
10230 	  operands[5] = part[1][2];
10231 	  operands[6] = part[1][1];
10232 	  operands[7] = part[1][0];
10233 	}
10234       else
10235 	{
10236 	  operands[2] = part[0][1];
10237 	  operands[3] = part[0][0];
10238 	  operands[5] = part[1][1];
10239 	  operands[6] = part[1][0];
10240 	}
10241     }
10242   else
10243     {
10244       if (nparts == 3)
10245 	{
10246 	  operands[2] = part[0][0];
10247 	  operands[3] = part[0][1];
10248 	  operands[4] = part[0][2];
10249 	  operands[5] = part[1][0];
10250 	  operands[6] = part[1][1];
10251 	  operands[7] = part[1][2];
10252 	}
10253       else
10254 	{
10255 	  operands[2] = part[0][0];
10256 	  operands[3] = part[0][1];
10257 	  operands[5] = part[1][0];
10258 	  operands[6] = part[1][1];
10259 	}
10260     }
10261   emit_move_insn (operands[2], operands[5]);
10262   emit_move_insn (operands[3], operands[6]);
10263   if (nparts == 3)
10264     emit_move_insn (operands[4], operands[7]);
10265 
10266   return;
10267 }
10268 
10269 void
ix86_split_ashldi(operands,scratch)10270 ix86_split_ashldi (operands, scratch)
10271      rtx *operands, scratch;
10272 {
10273   rtx low[2], high[2];
10274   int count;
10275 
10276   if (GET_CODE (operands[2]) == CONST_INT)
10277     {
10278       split_di (operands, 2, low, high);
10279       count = INTVAL (operands[2]) & 63;
10280 
10281       if (count >= 32)
10282 	{
10283 	  emit_move_insn (high[0], low[1]);
10284 	  emit_move_insn (low[0], const0_rtx);
10285 
10286 	  if (count > 32)
10287 	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10288 	}
10289       else
10290 	{
10291 	  if (!rtx_equal_p (operands[0], operands[1]))
10292 	    emit_move_insn (operands[0], operands[1]);
10293 	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10294 	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10295 	}
10296     }
10297   else
10298     {
10299       if (!rtx_equal_p (operands[0], operands[1]))
10300 	emit_move_insn (operands[0], operands[1]);
10301 
10302       split_di (operands, 1, low, high);
10303 
10304       emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10305       emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10306 
10307       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10308 	{
10309 	  if (! no_new_pseudos)
10310 	    scratch = force_reg (SImode, const0_rtx);
10311 	  else
10312 	    emit_move_insn (scratch, const0_rtx);
10313 
10314 	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10315 					  scratch));
10316 	}
10317       else
10318 	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10319     }
10320 }
10321 
10322 void
ix86_split_ashrdi(operands,scratch)10323 ix86_split_ashrdi (operands, scratch)
10324      rtx *operands, scratch;
10325 {
10326   rtx low[2], high[2];
10327   int count;
10328 
10329   if (GET_CODE (operands[2]) == CONST_INT)
10330     {
10331       split_di (operands, 2, low, high);
10332       count = INTVAL (operands[2]) & 63;
10333 
10334       if (count >= 32)
10335 	{
10336 	  emit_move_insn (low[0], high[1]);
10337 
10338 	  if (! reload_completed)
10339 	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10340 	  else
10341 	    {
10342 	      emit_move_insn (high[0], low[0]);
10343 	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10344 	    }
10345 
10346 	  if (count > 32)
10347 	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10348 	}
10349       else
10350 	{
10351 	  if (!rtx_equal_p (operands[0], operands[1]))
10352 	    emit_move_insn (operands[0], operands[1]);
10353 	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10354 	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10355 	}
10356     }
10357   else
10358     {
10359       if (!rtx_equal_p (operands[0], operands[1]))
10360 	emit_move_insn (operands[0], operands[1]);
10361 
10362       split_di (operands, 1, low, high);
10363 
10364       emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10365       emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10366 
10367       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10368 	{
10369 	  if (! no_new_pseudos)
10370 	    scratch = gen_reg_rtx (SImode);
10371 	  emit_move_insn (scratch, high[0]);
10372 	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10373 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10374 					  scratch));
10375 	}
10376       else
10377 	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10378     }
10379 }
10380 
10381 void
ix86_split_lshrdi(operands,scratch)10382 ix86_split_lshrdi (operands, scratch)
10383      rtx *operands, scratch;
10384 {
10385   rtx low[2], high[2];
10386   int count;
10387 
10388   if (GET_CODE (operands[2]) == CONST_INT)
10389     {
10390       split_di (operands, 2, low, high);
10391       count = INTVAL (operands[2]) & 63;
10392 
10393       if (count >= 32)
10394 	{
10395 	  emit_move_insn (low[0], high[1]);
10396 	  emit_move_insn (high[0], const0_rtx);
10397 
10398 	  if (count > 32)
10399 	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10400 	}
10401       else
10402 	{
10403 	  if (!rtx_equal_p (operands[0], operands[1]))
10404 	    emit_move_insn (operands[0], operands[1]);
10405 	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10406 	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10407 	}
10408     }
10409   else
10410     {
10411       if (!rtx_equal_p (operands[0], operands[1]))
10412 	emit_move_insn (operands[0], operands[1]);
10413 
10414       split_di (operands, 1, low, high);
10415 
10416       emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10417       emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10418 
10419       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
10420       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10421 	{
10422 	  if (! no_new_pseudos)
10423 	    scratch = force_reg (SImode, const0_rtx);
10424 	  else
10425 	    emit_move_insn (scratch, const0_rtx);
10426 
10427 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10428 					  scratch));
10429 	}
10430       else
10431 	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10432     }
10433 }
10434 
10435 /* Helper function for the string operations below.  Dest VARIABLE whether
10436    it is aligned to VALUE bytes.  If true, jump to the label.  */
10437 static rtx
ix86_expand_aligntest(variable,value)10438 ix86_expand_aligntest (variable, value)
10439      rtx variable;
10440      int value;
10441 {
10442   rtx label = gen_label_rtx ();
10443   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10444   if (GET_MODE (variable) == DImode)
10445     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10446   else
10447     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10448   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10449 			   1, label);
10450   return label;
10451 }
10452 
10453 /* Adjust COUNTER by the VALUE.  */
10454 static void
ix86_adjust_counter(countreg,value)10455 ix86_adjust_counter (countreg, value)
10456      rtx countreg;
10457      HOST_WIDE_INT value;
10458 {
10459   if (GET_MODE (countreg) == DImode)
10460     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10461   else
10462     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10463 }
10464 
10465 /* Zero extend possibly SImode EXP to Pmode register.  */
10466 rtx
ix86_zero_extend_to_Pmode(exp)10467 ix86_zero_extend_to_Pmode (exp)
10468    rtx exp;
10469 {
10470   rtx r;
10471   if (GET_MODE (exp) == VOIDmode)
10472     return force_reg (Pmode, exp);
10473   if (GET_MODE (exp) == Pmode)
10474     return copy_to_mode_reg (Pmode, exp);
10475   r = gen_reg_rtx (Pmode);
10476   emit_insn (gen_zero_extendsidi2 (r, exp));
10477   return r;
10478 }
10479 
10480 /* Expand string move (memcpy) operation.  Use i386 string operations when
10481    profitable.  expand_clrstr contains similar code.  */
10482 int
ix86_expand_movstr(dst,src,count_exp,align_exp)10483 ix86_expand_movstr (dst, src, count_exp, align_exp)
10484      rtx dst, src, count_exp, align_exp;
10485 {
10486   rtx srcreg, destreg, countreg;
10487   enum machine_mode counter_mode;
10488   HOST_WIDE_INT align = 0;
10489   unsigned HOST_WIDE_INT count = 0;
10490   rtx insns;
10491 
10492   start_sequence ();
10493 
10494   if (GET_CODE (align_exp) == CONST_INT)
10495     align = INTVAL (align_exp);
10496 
10497   /* This simple hack avoids all inlining code and simplifies code below.  */
10498   if (!TARGET_ALIGN_STRINGOPS)
10499     align = 64;
10500 
10501   if (GET_CODE (count_exp) == CONST_INT)
10502     count = INTVAL (count_exp);
10503 
10504   /* Figure out proper mode for counter.  For 32bits it is always SImode,
10505      for 64bits use SImode when possible, otherwise DImode.
10506      Set count to number of bytes copied when known at compile time.  */
10507   if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10508       || x86_64_zero_extended_value (count_exp))
10509     counter_mode = SImode;
10510   else
10511     counter_mode = DImode;
10512 
10513   if (counter_mode != SImode && counter_mode != DImode)
10514     abort ();
10515 
10516   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10517   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10518 
10519   emit_insn (gen_cld ());
10520 
10521   /* When optimizing for size emit simple rep ; movsb instruction for
10522      counts not divisible by 4.  */
10523 
10524   if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10525     {
10526       countreg = ix86_zero_extend_to_Pmode (count_exp);
10527       if (TARGET_64BIT)
10528 	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10529 				        destreg, srcreg, countreg));
10530       else
10531 	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10532 				  destreg, srcreg, countreg));
10533     }
10534 
10535   /* For constant aligned (or small unaligned) copies use rep movsl
10536      followed by code copying the rest.  For PentiumPro ensure 8 byte
10537      alignment to allow rep movsl acceleration.  */
10538 
10539   else if (count != 0
10540 	   && (align >= 8
10541 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10542 	       || optimize_size || count < (unsigned int) 64))
10543     {
10544       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10545       if (count & ~(size - 1))
10546 	{
10547 	  countreg = copy_to_mode_reg (counter_mode,
10548 				       GEN_INT ((count >> (size == 4 ? 2 : 3))
10549 						& (TARGET_64BIT ? -1 : 0x3fffffff)));
10550 	  countreg = ix86_zero_extend_to_Pmode (countreg);
10551 	  if (size == 4)
10552 	    {
10553 	      if (TARGET_64BIT)
10554 		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10555 					        destreg, srcreg, countreg));
10556 	      else
10557 		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10558 					  destreg, srcreg, countreg));
10559 	    }
10560 	  else
10561 	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10562 					    destreg, srcreg, countreg));
10563 	}
10564       if (size == 8 && (count & 0x04))
10565 	emit_insn (gen_strmovsi (destreg, srcreg));
10566       if (count & 0x02)
10567 	emit_insn (gen_strmovhi (destreg, srcreg));
10568       if (count & 0x01)
10569 	emit_insn (gen_strmovqi (destreg, srcreg));
10570     }
10571   /* The generic code based on the glibc implementation:
10572      - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10573      allowing accelerated copying there)
10574      - copy the data using rep movsl
10575      - copy the rest.  */
10576   else
10577     {
10578       rtx countreg2;
10579       rtx label = NULL;
10580       int desired_alignment = (TARGET_PENTIUMPRO
10581 			       && (count == 0 || count >= (unsigned int) 260)
10582 			       ? 8 : UNITS_PER_WORD);
10583 
10584       /* In case we don't know anything about the alignment, default to
10585          library version, since it is usually equally fast and result in
10586          shorter code.  */
10587       if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10588 	{
10589 	  end_sequence ();
10590 	  return 0;
10591 	}
10592 
10593       if (TARGET_SINGLE_STRINGOP)
10594 	emit_insn (gen_cld ());
10595 
10596       countreg2 = gen_reg_rtx (Pmode);
10597       countreg = copy_to_mode_reg (counter_mode, count_exp);
10598 
10599       /* We don't use loops to align destination and to copy parts smaller
10600          than 4 bytes, because gcc is able to optimize such code better (in
10601          the case the destination or the count really is aligned, gcc is often
10602          able to predict the branches) and also it is friendlier to the
10603          hardware branch prediction.
10604 
10605          Using loops is benefical for generic case, because we can
10606          handle small counts using the loops.  Many CPUs (such as Athlon)
10607          have large REP prefix setup costs.
10608 
10609          This is quite costy.  Maybe we can revisit this decision later or
10610          add some customizability to this code.  */
10611 
10612       if (count == 0 && align < desired_alignment)
10613 	{
10614 	  label = gen_label_rtx ();
10615 	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10616 				   LEU, 0, counter_mode, 1, label);
10617 	}
10618       if (align <= 1)
10619 	{
10620 	  rtx label = ix86_expand_aligntest (destreg, 1);
10621 	  emit_insn (gen_strmovqi (destreg, srcreg));
10622 	  ix86_adjust_counter (countreg, 1);
10623 	  emit_label (label);
10624 	  LABEL_NUSES (label) = 1;
10625 	}
10626       if (align <= 2)
10627 	{
10628 	  rtx label = ix86_expand_aligntest (destreg, 2);
10629 	  emit_insn (gen_strmovhi (destreg, srcreg));
10630 	  ix86_adjust_counter (countreg, 2);
10631 	  emit_label (label);
10632 	  LABEL_NUSES (label) = 1;
10633 	}
10634       if (align <= 4 && desired_alignment > 4)
10635 	{
10636 	  rtx label = ix86_expand_aligntest (destreg, 4);
10637 	  emit_insn (gen_strmovsi (destreg, srcreg));
10638 	  ix86_adjust_counter (countreg, 4);
10639 	  emit_label (label);
10640 	  LABEL_NUSES (label) = 1;
10641 	}
10642 
10643       if (label && desired_alignment > 4 && !TARGET_64BIT)
10644 	{
10645 	  emit_label (label);
10646 	  LABEL_NUSES (label) = 1;
10647 	  label = NULL_RTX;
10648 	}
10649       if (!TARGET_SINGLE_STRINGOP)
10650 	emit_insn (gen_cld ());
10651       if (TARGET_64BIT)
10652 	{
10653 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10654 				  GEN_INT (3)));
10655 	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10656 					  destreg, srcreg, countreg2));
10657 	}
10658       else
10659 	{
10660 	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10661 	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10662 				    destreg, srcreg, countreg2));
10663 	}
10664 
10665       if (label)
10666 	{
10667 	  emit_label (label);
10668 	  LABEL_NUSES (label) = 1;
10669 	}
10670       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10671 	emit_insn (gen_strmovsi (destreg, srcreg));
10672       if ((align <= 4 || count == 0) && TARGET_64BIT)
10673 	{
10674 	  rtx label = ix86_expand_aligntest (countreg, 4);
10675 	  emit_insn (gen_strmovsi (destreg, srcreg));
10676 	  emit_label (label);
10677 	  LABEL_NUSES (label) = 1;
10678 	}
10679       if (align > 2 && count != 0 && (count & 2))
10680 	emit_insn (gen_strmovhi (destreg, srcreg));
10681       if (align <= 2 || count == 0)
10682 	{
10683 	  rtx label = ix86_expand_aligntest (countreg, 2);
10684 	  emit_insn (gen_strmovhi (destreg, srcreg));
10685 	  emit_label (label);
10686 	  LABEL_NUSES (label) = 1;
10687 	}
10688       if (align > 1 && count != 0 && (count & 1))
10689 	emit_insn (gen_strmovqi (destreg, srcreg));
10690       if (align <= 1 || count == 0)
10691 	{
10692 	  rtx label = ix86_expand_aligntest (countreg, 1);
10693 	  emit_insn (gen_strmovqi (destreg, srcreg));
10694 	  emit_label (label);
10695 	  LABEL_NUSES (label) = 1;
10696 	}
10697     }
10698 
10699   insns = get_insns ();
10700   end_sequence ();
10701 
10702   ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10703   emit_insn (insns);
10704   return 1;
10705 }
10706 
10707 /* Expand string clear operation (bzero).  Use i386 string operations when
10708    profitable.  expand_movstr contains similar code.  */
10709 int
ix86_expand_clrstr(src,count_exp,align_exp)10710 ix86_expand_clrstr (src, count_exp, align_exp)
10711      rtx src, count_exp, align_exp;
10712 {
10713   rtx destreg, zeroreg, countreg;
10714   enum machine_mode counter_mode;
10715   HOST_WIDE_INT align = 0;
10716   unsigned HOST_WIDE_INT count = 0;
10717 
10718   if (GET_CODE (align_exp) == CONST_INT)
10719     align = INTVAL (align_exp);
10720 
10721   /* This simple hack avoids all inlining code and simplifies code below.  */
10722   if (!TARGET_ALIGN_STRINGOPS)
10723     align = 32;
10724 
10725   if (GET_CODE (count_exp) == CONST_INT)
10726     count = INTVAL (count_exp);
10727   /* Figure out proper mode for counter.  For 32bits it is always SImode,
10728      for 64bits use SImode when possible, otherwise DImode.
10729      Set count to number of bytes copied when known at compile time.  */
10730   if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10731       || x86_64_zero_extended_value (count_exp))
10732     counter_mode = SImode;
10733   else
10734     counter_mode = DImode;
10735 
10736   destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10737 
10738   emit_insn (gen_cld ());
10739 
10740   /* When optimizing for size emit simple rep ; movsb instruction for
10741      counts not divisible by 4.  */
10742 
10743   if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10744     {
10745       countreg = ix86_zero_extend_to_Pmode (count_exp);
10746       zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10747       if (TARGET_64BIT)
10748 	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10749 				         destreg, countreg));
10750       else
10751 	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10752 				   destreg, countreg));
10753     }
10754   else if (count != 0
10755 	   && (align >= 8
10756 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10757 	       || optimize_size || count < (unsigned int) 64))
10758     {
10759       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10760       zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10761       if (count & ~(size - 1))
10762 	{
10763 	  countreg = copy_to_mode_reg (counter_mode,
10764 				       GEN_INT ((count >> (size == 4 ? 2 : 3))
10765 						& (TARGET_64BIT ? -1 : 0x3fffffff)));
10766 	  countreg = ix86_zero_extend_to_Pmode (countreg);
10767 	  if (size == 4)
10768 	    {
10769 	      if (TARGET_64BIT)
10770 		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10771 					         destreg, countreg));
10772 	      else
10773 		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10774 					   destreg, countreg));
10775 	    }
10776 	  else
10777 	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10778 					     destreg, countreg));
10779 	}
10780       if (size == 8 && (count & 0x04))
10781 	emit_insn (gen_strsetsi (destreg,
10782 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10783       if (count & 0x02)
10784 	emit_insn (gen_strsethi (destreg,
10785 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10786       if (count & 0x01)
10787 	emit_insn (gen_strsetqi (destreg,
10788 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10789     }
10790   else
10791     {
10792       rtx countreg2;
10793       rtx label = NULL;
10794       /* Compute desired alignment of the string operation.  */
10795       int desired_alignment = (TARGET_PENTIUMPRO
10796 			       && (count == 0 || count >= (unsigned int) 260)
10797 			       ? 8 : UNITS_PER_WORD);
10798 
10799       /* In case we don't know anything about the alignment, default to
10800          library version, since it is usually equally fast and result in
10801          shorter code.  */
10802       if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10803 	return 0;
10804 
10805       if (TARGET_SINGLE_STRINGOP)
10806 	emit_insn (gen_cld ());
10807 
10808       countreg2 = gen_reg_rtx (Pmode);
10809       countreg = copy_to_mode_reg (counter_mode, count_exp);
10810       zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10811 
10812       if (count == 0 && align < desired_alignment)
10813 	{
10814 	  label = gen_label_rtx ();
10815 	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10816 				   LEU, 0, counter_mode, 1, label);
10817 	}
10818       if (align <= 1)
10819 	{
10820 	  rtx label = ix86_expand_aligntest (destreg, 1);
10821 	  emit_insn (gen_strsetqi (destreg,
10822 				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
10823 	  ix86_adjust_counter (countreg, 1);
10824 	  emit_label (label);
10825 	  LABEL_NUSES (label) = 1;
10826 	}
10827       if (align <= 2)
10828 	{
10829 	  rtx label = ix86_expand_aligntest (destreg, 2);
10830 	  emit_insn (gen_strsethi (destreg,
10831 				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
10832 	  ix86_adjust_counter (countreg, 2);
10833 	  emit_label (label);
10834 	  LABEL_NUSES (label) = 1;
10835 	}
10836       if (align <= 4 && desired_alignment > 4)
10837 	{
10838 	  rtx label = ix86_expand_aligntest (destreg, 4);
10839 	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10840 					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10841 					     : zeroreg)));
10842 	  ix86_adjust_counter (countreg, 4);
10843 	  emit_label (label);
10844 	  LABEL_NUSES (label) = 1;
10845 	}
10846 
10847       if (label && desired_alignment > 4 && !TARGET_64BIT)
10848 	{
10849 	  emit_label (label);
10850 	  LABEL_NUSES (label) = 1;
10851 	  label = NULL_RTX;
10852 	}
10853 
10854       if (!TARGET_SINGLE_STRINGOP)
10855 	emit_insn (gen_cld ());
10856       if (TARGET_64BIT)
10857 	{
10858 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10859 				  GEN_INT (3)));
10860 	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10861 					   destreg, countreg2));
10862 	}
10863       else
10864 	{
10865 	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10866 	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10867 				     destreg, countreg2));
10868 	}
10869       if (label)
10870 	{
10871 	  emit_label (label);
10872 	  LABEL_NUSES (label) = 1;
10873 	}
10874 
10875       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10876 	emit_insn (gen_strsetsi (destreg,
10877 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10878       if (TARGET_64BIT && (align <= 4 || count == 0))
10879 	{
10880 	  rtx label = ix86_expand_aligntest (countreg, 4);
10881 	  emit_insn (gen_strsetsi (destreg,
10882 				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
10883 	  emit_label (label);
10884 	  LABEL_NUSES (label) = 1;
10885 	}
10886       if (align > 2 && count != 0 && (count & 2))
10887 	emit_insn (gen_strsethi (destreg,
10888 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10889       if (align <= 2 || count == 0)
10890 	{
10891 	  rtx label = ix86_expand_aligntest (countreg, 2);
10892 	  emit_insn (gen_strsethi (destreg,
10893 				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
10894 	  emit_label (label);
10895 	  LABEL_NUSES (label) = 1;
10896 	}
10897       if (align > 1 && count != 0 && (count & 1))
10898 	emit_insn (gen_strsetqi (destreg,
10899 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10900       if (align <= 1 || count == 0)
10901 	{
10902 	  rtx label = ix86_expand_aligntest (countreg, 1);
10903 	  emit_insn (gen_strsetqi (destreg,
10904 				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
10905 	  emit_label (label);
10906 	  LABEL_NUSES (label) = 1;
10907 	}
10908     }
10909   return 1;
10910 }
10911 /* Expand strlen.  */
10912 int
ix86_expand_strlen(out,src,eoschar,align)10913 ix86_expand_strlen (out, src, eoschar, align)
10914      rtx out, src, eoschar, align;
10915 {
10916   rtx addr, scratch1, scratch2, scratch3, scratch4;
10917 
10918   /* The generic case of strlen expander is long.  Avoid it's
10919      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
10920 
10921   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10922       && !TARGET_INLINE_ALL_STRINGOPS
10923       && !optimize_size
10924       && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10925     return 0;
10926 
10927   addr = force_reg (Pmode, XEXP (src, 0));
10928   scratch1 = gen_reg_rtx (Pmode);
10929 
10930   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10931       && !optimize_size)
10932     {
10933       /* Well it seems that some optimizer does not combine a call like
10934          foo(strlen(bar), strlen(bar));
10935          when the move and the subtraction is done here.  It does calculate
10936          the length just once when these instructions are done inside of
10937          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
10938          often used and I use one fewer register for the lifetime of
10939          output_strlen_unroll() this is better.  */
10940 
10941       emit_move_insn (out, addr);
10942 
10943       ix86_expand_strlensi_unroll_1 (out, align);
10944 
10945       /* strlensi_unroll_1 returns the address of the zero at the end of
10946          the string, like memchr(), so compute the length by subtracting
10947          the start address.  */
10948       if (TARGET_64BIT)
10949 	emit_insn (gen_subdi3 (out, out, addr));
10950       else
10951 	emit_insn (gen_subsi3 (out, out, addr));
10952     }
10953   else
10954     {
10955       scratch2 = gen_reg_rtx (Pmode);
10956       scratch3 = gen_reg_rtx (Pmode);
10957       scratch4 = force_reg (Pmode, constm1_rtx);
10958 
10959       emit_move_insn (scratch3, addr);
10960       eoschar = force_reg (QImode, eoschar);
10961 
10962       emit_insn (gen_cld ());
10963       if (TARGET_64BIT)
10964 	{
10965 	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10966 					 align, scratch4, scratch3));
10967 	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10968 	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10969 	}
10970       else
10971 	{
10972 	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10973 				     align, scratch4, scratch3));
10974 	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10975 	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10976 	}
10977     }
10978   return 1;
10979 }
10980 
10981 /* Expand the appropriate insns for doing strlen if not just doing
10982    repnz; scasb
10983 
10984    out = result, initialized with the start address
10985    align_rtx = alignment of the address.
10986    scratch = scratch register, initialized with the startaddress when
10987 	not aligned, otherwise undefined
10988 
10989    This is just the body. It needs the initialisations mentioned above and
10990    some address computing at the end.  These things are done in i386.md.  */
10991 
10992 static void
ix86_expand_strlensi_unroll_1(out,align_rtx)10993 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10994      rtx out, align_rtx;
10995 {
10996   int align;
10997   rtx tmp;
10998   rtx align_2_label = NULL_RTX;
10999   rtx align_3_label = NULL_RTX;
11000   rtx align_4_label = gen_label_rtx ();
11001   rtx end_0_label = gen_label_rtx ();
11002   rtx mem;
11003   rtx tmpreg = gen_reg_rtx (SImode);
11004   rtx scratch = gen_reg_rtx (SImode);
11005 
11006   align = 0;
11007   if (GET_CODE (align_rtx) == CONST_INT)
11008     align = INTVAL (align_rtx);
11009 
11010   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
11011 
11012   /* Is there a known alignment and is it less than 4?  */
11013   if (align < 4)
11014     {
11015       rtx scratch1 = gen_reg_rtx (Pmode);
11016       emit_move_insn (scratch1, out);
11017       /* Is there a known alignment and is it not 2? */
11018       if (align != 2)
11019 	{
11020 	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11021 	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11022 
11023 	  /* Leave just the 3 lower bits.  */
11024 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11025 				    NULL_RTX, 0, OPTAB_WIDEN);
11026 
11027 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11028 				   Pmode, 1, align_4_label);
11029 	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11030 				   Pmode, 1, align_2_label);
11031 	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11032 				   Pmode, 1, align_3_label);
11033 	}
11034       else
11035         {
11036 	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
11037 	     check if is aligned to 4 - byte.  */
11038 
11039 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11040 				    NULL_RTX, 0, OPTAB_WIDEN);
11041 
11042 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11043 				   Pmode, 1, align_4_label);
11044         }
11045 
11046       mem = gen_rtx_MEM (QImode, out);
11047 
11048       /* Now compare the bytes.  */
11049 
11050       /* Compare the first n unaligned byte on a byte per byte basis.  */
11051       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11052 			       QImode, 1, end_0_label);
11053 
11054       /* Increment the address.  */
11055       if (TARGET_64BIT)
11056 	emit_insn (gen_adddi3 (out, out, const1_rtx));
11057       else
11058 	emit_insn (gen_addsi3 (out, out, const1_rtx));
11059 
11060       /* Not needed with an alignment of 2 */
11061       if (align != 2)
11062 	{
11063 	  emit_label (align_2_label);
11064 
11065 	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11066 				   end_0_label);
11067 
11068 	  if (TARGET_64BIT)
11069 	    emit_insn (gen_adddi3 (out, out, const1_rtx));
11070 	  else
11071 	    emit_insn (gen_addsi3 (out, out, const1_rtx));
11072 
11073 	  emit_label (align_3_label);
11074 	}
11075 
11076       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11077 			       end_0_label);
11078 
11079       if (TARGET_64BIT)
11080 	emit_insn (gen_adddi3 (out, out, const1_rtx));
11081       else
11082 	emit_insn (gen_addsi3 (out, out, const1_rtx));
11083     }
11084 
11085   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
11086      align this loop.  It gives only huge programs, but does not help to
11087      speed up.  */
11088   emit_label (align_4_label);
11089 
11090   mem = gen_rtx_MEM (SImode, out);
11091   emit_move_insn (scratch, mem);
11092   if (TARGET_64BIT)
11093     emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11094   else
11095     emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11096 
11097   /* This formula yields a nonzero result iff one of the bytes is zero.
11098      This saves three branches inside loop and many cycles.  */
11099 
11100   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11101   emit_insn (gen_one_cmplsi2 (scratch, scratch));
11102   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11103   emit_insn (gen_andsi3 (tmpreg, tmpreg,
11104 			 gen_int_mode (0x80808080, SImode)));
11105   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11106 			   align_4_label);
11107 
11108   if (TARGET_CMOVE)
11109     {
11110        rtx reg = gen_reg_rtx (SImode);
11111        rtx reg2 = gen_reg_rtx (Pmode);
11112        emit_move_insn (reg, tmpreg);
11113        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11114 
11115        /* If zero is not in the first two bytes, move two bytes forward.  */
11116        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11117        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11118        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11119        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11120 			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
11121 						     reg,
11122 						     tmpreg)));
11123        /* Emit lea manually to avoid clobbering of flags.  */
11124        emit_insn (gen_rtx_SET (SImode, reg2,
11125 			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11126 
11127        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11128        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11129        emit_insn (gen_rtx_SET (VOIDmode, out,
11130 			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11131 						     reg2,
11132 						     out)));
11133 
11134     }
11135   else
11136     {
11137        rtx end_2_label = gen_label_rtx ();
11138        /* Is zero in the first two bytes? */
11139 
11140        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11141        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11142        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11143        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11144                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11145                             pc_rtx);
11146        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11147        JUMP_LABEL (tmp) = end_2_label;
11148 
11149        /* Not in the first two.  Move two bytes forward.  */
11150        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11151        if (TARGET_64BIT)
11152 	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11153        else
11154 	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11155 
11156        emit_label (end_2_label);
11157 
11158     }
11159 
11160   /* Avoid branch in fixing the byte.  */
11161   tmpreg = gen_lowpart (QImode, tmpreg);
11162   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11163   if (TARGET_64BIT)
11164     emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11165   else
11166     emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11167 
11168   emit_label (end_0_label);
11169 }
11170 
11171 void
ix86_expand_call(retval,fnaddr,callarg1,callarg2,pop)11172 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
11173      rtx retval, fnaddr, callarg1, callarg2, pop;
11174 {
11175   rtx use = NULL, call;
11176 
11177   if (pop == const0_rtx)
11178     pop = NULL;
11179   if (TARGET_64BIT && pop)
11180     abort ();
11181 
11182 #if TARGET_MACHO
11183   if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11184     fnaddr = machopic_indirect_call_target (fnaddr);
11185 #else
11186   /* Static functions and indirect calls don't need the pic register.  */
11187   if (! TARGET_64BIT && flag_pic
11188       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11189       && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11190     use_reg (&use, pic_offset_table_rtx);
11191 
11192   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11193     {
11194       rtx al = gen_rtx_REG (QImode, 0);
11195       emit_move_insn (al, callarg2);
11196       use_reg (&use, al);
11197     }
11198 #endif /* TARGET_MACHO */
11199 
11200   if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11201     {
11202       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11203       fnaddr = gen_rtx_MEM (QImode, fnaddr);
11204     }
11205 
11206   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11207   if (retval)
11208     call = gen_rtx_SET (VOIDmode, retval, call);
11209   if (pop)
11210     {
11211       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11212       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11213       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11214     }
11215 
11216   call = emit_call_insn (call);
11217   if (use)
11218     CALL_INSN_FUNCTION_USAGE (call) = use;
11219 }
11220 
11221 
11222 /* Clear stack slot assignments remembered from previous functions.
11223    This is called from INIT_EXPANDERS once before RTL is emitted for each
11224    function.  */
11225 
11226 static struct machine_function *
ix86_init_machine_status()11227 ix86_init_machine_status ()
11228 {
11229   return ggc_alloc_cleared (sizeof (struct machine_function));
11230 }
11231 
11232 /* Return a MEM corresponding to a stack slot with mode MODE.
11233    Allocate a new slot if necessary.
11234 
11235    The RTL for a function can have several slots available: N is
11236    which slot to use.  */
11237 
11238 rtx
assign_386_stack_local(mode,n)11239 assign_386_stack_local (mode, n)
11240      enum machine_mode mode;
11241      int n;
11242 {
11243   if (n < 0 || n >= MAX_386_STACK_LOCALS)
11244     abort ();
11245 
11246   if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11247     ix86_stack_locals[(int) mode][n]
11248       = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11249 
11250   return ix86_stack_locals[(int) mode][n];
11251 }
11252 
11253 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
11254 
11255 static GTY(()) rtx ix86_tls_symbol;
11256 rtx
ix86_tls_get_addr()11257 ix86_tls_get_addr ()
11258 {
11259 
11260   if (!ix86_tls_symbol)
11261     {
11262       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11263 					    (TARGET_GNU_TLS && !TARGET_64BIT)
11264 					    ? "___tls_get_addr"
11265 					    : "__tls_get_addr");
11266     }
11267 
11268   return ix86_tls_symbol;
11269 }
11270 
11271 /* Calculate the length of the memory address in the instruction
11272    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
11273 
11274 static int
memory_address_length(addr)11275 memory_address_length (addr)
11276      rtx addr;
11277 {
11278   struct ix86_address parts;
11279   rtx base, index, disp;
11280   int len;
11281 
11282   if (GET_CODE (addr) == PRE_DEC
11283       || GET_CODE (addr) == POST_INC
11284       || GET_CODE (addr) == PRE_MODIFY
11285       || GET_CODE (addr) == POST_MODIFY)
11286     return 0;
11287 
11288   if (! ix86_decompose_address (addr, &parts))
11289     abort ();
11290 
11291   base = parts.base;
11292   index = parts.index;
11293   disp = parts.disp;
11294   len = 0;
11295 
11296   /* Rule of thumb:
11297        - esp as the base always wants an index,
11298        - ebp as the base always wants a displacement.  */
11299 
11300   /* Register Indirect.  */
11301   if (base && !index && !disp)
11302     {
11303       /* esp (for its index) and ebp (for its displacement) need
11304 	 the two-byte modrm form.  */
11305       if (addr == stack_pointer_rtx
11306 	  || addr == arg_pointer_rtx
11307 	  || addr == frame_pointer_rtx
11308 	  || addr == hard_frame_pointer_rtx)
11309 	len = 1;
11310     }
11311 
11312   /* Direct Addressing.  */
11313   else if (disp && !base && !index)
11314     len = 4;
11315 
11316   else
11317     {
11318       /* Find the length of the displacement constant.  */
11319       if (disp)
11320 	{
11321 	  if (GET_CODE (disp) == CONST_INT
11322 	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11323 	      && base)
11324 	    len = 1;
11325 	  else
11326 	    len = 4;
11327 	}
11328       /* ebp always wants a displacement.  */
11329       else if (base == hard_frame_pointer_rtx)
11330         len = 1;
11331 
11332       /* An index requires the two-byte modrm form...  */
11333       if (index
11334 	  /* ...like esp, which always wants an index.  */
11335 	  || base == stack_pointer_rtx
11336 	  || base == arg_pointer_rtx
11337 	  || base == frame_pointer_rtx)
11338 	len += 1;
11339     }
11340 
11341   return len;
11342 }
11343 
11344 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
11345    is set, expect that insn have 8bit immediate alternative.  */
11346 int
ix86_attr_length_immediate_default(insn,shortform)11347 ix86_attr_length_immediate_default (insn, shortform)
11348      rtx insn;
11349      int shortform;
11350 {
11351   int len = 0;
11352   int i;
11353   extract_insn_cached (insn);
11354   for (i = recog_data.n_operands - 1; i >= 0; --i)
11355     if (CONSTANT_P (recog_data.operand[i]))
11356       {
11357 	if (len)
11358 	  abort ();
11359 	if (shortform
11360 	    && GET_CODE (recog_data.operand[i]) == CONST_INT
11361 	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11362 	  len = 1;
11363 	else
11364 	  {
11365 	    switch (get_attr_mode (insn))
11366 	      {
11367 		case MODE_QI:
11368 		  len+=1;
11369 		  break;
11370 		case MODE_HI:
11371 		  len+=2;
11372 		  break;
11373 		case MODE_SI:
11374 		  len+=4;
11375 		  break;
11376 		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
11377 		case MODE_DI:
11378 		  len+=4;
11379 		  break;
11380 		default:
11381 		  fatal_insn ("unknown insn mode", insn);
11382 	      }
11383 	  }
11384       }
11385   return len;
11386 }
11387 /* Compute default value for "length_address" attribute.  */
11388 int
ix86_attr_length_address_default(insn)11389 ix86_attr_length_address_default (insn)
11390      rtx insn;
11391 {
11392   int i;
11393 
11394   if (get_attr_type (insn) == TYPE_LEA)
11395     {
11396       rtx set = PATTERN (insn);
11397       if (GET_CODE (set) == SET)
11398 	;
11399       else if (GET_CODE (set) == PARALLEL
11400 	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11401 	set = XVECEXP (set, 0, 0);
11402       else
11403 	{
11404 #ifdef ENABLE_CHECKING
11405 	  abort ();
11406 #endif
11407 	  return 0;
11408 	}
11409 
11410       return memory_address_length (SET_SRC (set));
11411     }
11412 
11413   extract_insn_cached (insn);
11414   for (i = recog_data.n_operands - 1; i >= 0; --i)
11415     if (GET_CODE (recog_data.operand[i]) == MEM)
11416       {
11417 	return memory_address_length (XEXP (recog_data.operand[i], 0));
11418 	break;
11419       }
11420   return 0;
11421 }
11422 
11423 /* Return the maximum number of instructions a cpu can issue.  */
11424 
11425 static int
ix86_issue_rate()11426 ix86_issue_rate ()
11427 {
11428   switch (ix86_cpu)
11429     {
11430     case PROCESSOR_PENTIUM:
11431     case PROCESSOR_K6:
11432       return 2;
11433 
11434     case PROCESSOR_PENTIUMPRO:
11435     case PROCESSOR_PENTIUM4:
11436     case PROCESSOR_ATHLON:
11437       return 3;
11438 
11439     default:
11440       return 1;
11441     }
11442 }
11443 
11444 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11445    by DEP_INSN and nothing set by DEP_INSN.  */
11446 
11447 static int
ix86_flags_dependant(insn,dep_insn,insn_type)11448 ix86_flags_dependant (insn, dep_insn, insn_type)
11449      rtx insn, dep_insn;
11450      enum attr_type insn_type;
11451 {
11452   rtx set, set2;
11453 
11454   /* Simplify the test for uninteresting insns.  */
11455   if (insn_type != TYPE_SETCC
11456       && insn_type != TYPE_ICMOV
11457       && insn_type != TYPE_FCMOV
11458       && insn_type != TYPE_IBR)
11459     return 0;
11460 
11461   if ((set = single_set (dep_insn)) != 0)
11462     {
11463       set = SET_DEST (set);
11464       set2 = NULL_RTX;
11465     }
11466   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11467 	   && XVECLEN (PATTERN (dep_insn), 0) == 2
11468 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11469 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11470     {
11471       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11472       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11473     }
11474   else
11475     return 0;
11476 
11477   if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11478     return 0;
11479 
11480   /* This test is true if the dependent insn reads the flags but
11481      not any other potentially set register.  */
11482   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11483     return 0;
11484 
11485   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11486     return 0;
11487 
11488   return 1;
11489 }
11490 
11491 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11492    address with operands set by DEP_INSN.  */
11493 
11494 static int
ix86_agi_dependant(insn,dep_insn,insn_type)11495 ix86_agi_dependant (insn, dep_insn, insn_type)
11496      rtx insn, dep_insn;
11497      enum attr_type insn_type;
11498 {
11499   rtx addr;
11500 
11501   if (insn_type == TYPE_LEA
11502       && TARGET_PENTIUM)
11503     {
11504       addr = PATTERN (insn);
11505       if (GET_CODE (addr) == SET)
11506 	;
11507       else if (GET_CODE (addr) == PARALLEL
11508 	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11509 	addr = XVECEXP (addr, 0, 0);
11510       else
11511 	abort ();
11512       addr = SET_SRC (addr);
11513     }
11514   else
11515     {
11516       int i;
11517       extract_insn_cached (insn);
11518       for (i = recog_data.n_operands - 1; i >= 0; --i)
11519 	if (GET_CODE (recog_data.operand[i]) == MEM)
11520 	  {
11521 	    addr = XEXP (recog_data.operand[i], 0);
11522 	    goto found;
11523 	  }
11524       return 0;
11525     found:;
11526     }
11527 
11528   return modified_in_p (addr, dep_insn);
11529 }
11530 
11531 static int
ix86_adjust_cost(insn,link,dep_insn,cost)11532 ix86_adjust_cost (insn, link, dep_insn, cost)
11533      rtx insn, link, dep_insn;
11534      int cost;
11535 {
11536   enum attr_type insn_type, dep_insn_type;
11537   enum attr_memory memory, dep_memory;
11538   rtx set, set2;
11539   int dep_insn_code_number;
11540 
11541   /* Anti and output depenancies have zero cost on all CPUs.  */
11542   if (REG_NOTE_KIND (link) != 0)
11543     return 0;
11544 
11545   dep_insn_code_number = recog_memoized (dep_insn);
11546 
11547   /* If we can't recognize the insns, we can't really do anything.  */
11548   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11549     return cost;
11550 
11551   insn_type = get_attr_type (insn);
11552   dep_insn_type = get_attr_type (dep_insn);
11553 
11554   switch (ix86_cpu)
11555     {
11556     case PROCESSOR_PENTIUM:
11557       /* Address Generation Interlock adds a cycle of latency.  */
11558       if (ix86_agi_dependant (insn, dep_insn, insn_type))
11559 	cost += 1;
11560 
11561       /* ??? Compares pair with jump/setcc.  */
11562       if (ix86_flags_dependant (insn, dep_insn, insn_type))
11563 	cost = 0;
11564 
11565       /* Floating point stores require value to be ready one cycle ealier.  */
11566       if (insn_type == TYPE_FMOV
11567 	  && get_attr_memory (insn) == MEMORY_STORE
11568 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11569 	cost += 1;
11570       break;
11571 
11572     case PROCESSOR_PENTIUMPRO:
11573       memory = get_attr_memory (insn);
11574       dep_memory = get_attr_memory (dep_insn);
11575 
11576       /* Since we can't represent delayed latencies of load+operation,
11577 	 increase the cost here for non-imov insns.  */
11578       if (dep_insn_type != TYPE_IMOV
11579           && dep_insn_type != TYPE_FMOV
11580           && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11581 	cost += 1;
11582 
11583       /* INT->FP conversion is expensive.  */
11584       if (get_attr_fp_int_src (dep_insn))
11585 	cost += 5;
11586 
11587       /* There is one cycle extra latency between an FP op and a store.  */
11588       if (insn_type == TYPE_FMOV
11589 	  && (set = single_set (dep_insn)) != NULL_RTX
11590 	  && (set2 = single_set (insn)) != NULL_RTX
11591 	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11592 	  && GET_CODE (SET_DEST (set2)) == MEM)
11593 	cost += 1;
11594 
11595       /* Show ability of reorder buffer to hide latency of load by executing
11596 	 in parallel with previous instruction in case
11597 	 previous instruction is not needed to compute the address.  */
11598       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11599 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11600  	{
11601 	  /* Claim moves to take one cycle, as core can issue one load
11602 	     at time and the next load can start cycle later.  */
11603 	  if (dep_insn_type == TYPE_IMOV
11604 	      || dep_insn_type == TYPE_FMOV)
11605 	    cost = 1;
11606 	  else if (cost > 1)
11607 	    cost--;
11608 	}
11609       break;
11610 
11611     case PROCESSOR_K6:
11612       memory = get_attr_memory (insn);
11613       dep_memory = get_attr_memory (dep_insn);
11614       /* The esp dependency is resolved before the instruction is really
11615          finished.  */
11616       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11617 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11618 	return 1;
11619 
11620       /* Since we can't represent delayed latencies of load+operation,
11621 	 increase the cost here for non-imov insns.  */
11622       if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11623 	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11624 
11625       /* INT->FP conversion is expensive.  */
11626       if (get_attr_fp_int_src (dep_insn))
11627 	cost += 5;
11628 
11629       /* Show ability of reorder buffer to hide latency of load by executing
11630 	 in parallel with previous instruction in case
11631 	 previous instruction is not needed to compute the address.  */
11632       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11633 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11634  	{
11635 	  /* Claim moves to take one cycle, as core can issue one load
11636 	     at time and the next load can start cycle later.  */
11637 	  if (dep_insn_type == TYPE_IMOV
11638 	      || dep_insn_type == TYPE_FMOV)
11639 	    cost = 1;
11640 	  else if (cost > 2)
11641 	    cost -= 2;
11642 	  else
11643 	    cost = 1;
11644 	}
11645       break;
11646 
11647     case PROCESSOR_ATHLON:
11648       memory = get_attr_memory (insn);
11649       dep_memory = get_attr_memory (dep_insn);
11650 
11651       if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11652 	{
11653 	  if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11654 	    cost += 2;
11655 	  else
11656 	    cost += 3;
11657         }
11658       /* Show ability of reorder buffer to hide latency of load by executing
11659 	 in parallel with previous instruction in case
11660 	 previous instruction is not needed to compute the address.  */
11661       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11662 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11663  	{
11664 	  /* Claim moves to take one cycle, as core can issue one load
11665 	     at time and the next load can start cycle later.  */
11666 	  if (dep_insn_type == TYPE_IMOV
11667 	      || dep_insn_type == TYPE_FMOV)
11668 	    cost = 0;
11669 	  else if (cost >= 3)
11670 	    cost -= 3;
11671 	  else
11672 	    cost = 0;
11673 	}
11674 
11675     default:
11676       break;
11677     }
11678 
11679   return cost;
11680 }
11681 
11682 static union
11683 {
11684   struct ppro_sched_data
11685   {
11686     rtx decode[3];
11687     int issued_this_cycle;
11688   } ppro;
11689 } ix86_sched_data;
11690 
11691 static enum attr_ppro_uops
ix86_safe_ppro_uops(insn)11692 ix86_safe_ppro_uops (insn)
11693      rtx insn;
11694 {
11695   if (recog_memoized (insn) >= 0)
11696     return get_attr_ppro_uops (insn);
11697   else
11698     return PPRO_UOPS_MANY;
11699 }
11700 
11701 static void
ix86_dump_ppro_packet(dump)11702 ix86_dump_ppro_packet (dump)
11703      FILE *dump;
11704 {
11705   if (ix86_sched_data.ppro.decode[0])
11706     {
11707       fprintf (dump, "PPRO packet: %d",
11708 	       INSN_UID (ix86_sched_data.ppro.decode[0]));
11709       if (ix86_sched_data.ppro.decode[1])
11710 	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11711       if (ix86_sched_data.ppro.decode[2])
11712 	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11713       fputc ('\n', dump);
11714     }
11715 }
11716 
11717 /* We're beginning a new block.  Initialize data structures as necessary.  */
11718 
11719 static void
ix86_sched_init(dump,sched_verbose,veclen)11720 ix86_sched_init (dump, sched_verbose, veclen)
11721      FILE *dump ATTRIBUTE_UNUSED;
11722      int sched_verbose ATTRIBUTE_UNUSED;
11723      int veclen ATTRIBUTE_UNUSED;
11724 {
11725   memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11726 }
11727 
11728 /* Shift INSN to SLOT, and shift everything else down.  */
11729 
11730 static void
ix86_reorder_insn(insnp,slot)11731 ix86_reorder_insn (insnp, slot)
11732      rtx *insnp, *slot;
11733 {
11734   if (insnp != slot)
11735     {
11736       rtx insn = *insnp;
11737       do
11738 	insnp[0] = insnp[1];
11739       while (++insnp != slot);
11740       *insnp = insn;
11741     }
11742 }
11743 
11744 static void
ix86_sched_reorder_ppro(ready,e_ready)11745 ix86_sched_reorder_ppro (ready, e_ready)
11746      rtx *ready;
11747      rtx *e_ready;
11748 {
11749   rtx decode[3];
11750   enum attr_ppro_uops cur_uops;
11751   int issued_this_cycle;
11752   rtx *insnp;
11753   int i;
11754 
11755   /* At this point .ppro.decode contains the state of the three
11756      decoders from last "cycle".  That is, those insns that were
11757      actually independent.  But here we're scheduling for the
11758      decoder, and we may find things that are decodable in the
11759      same cycle.  */
11760 
11761   memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11762   issued_this_cycle = 0;
11763 
11764   insnp = e_ready;
11765   cur_uops = ix86_safe_ppro_uops (*insnp);
11766 
11767   /* If the decoders are empty, and we've a complex insn at the
11768      head of the priority queue, let it issue without complaint.  */
11769   if (decode[0] == NULL)
11770     {
11771       if (cur_uops == PPRO_UOPS_MANY)
11772 	{
11773 	  decode[0] = *insnp;
11774 	  goto ppro_done;
11775 	}
11776 
11777       /* Otherwise, search for a 2-4 uop unsn to issue.  */
11778       while (cur_uops != PPRO_UOPS_FEW)
11779 	{
11780 	  if (insnp == ready)
11781 	    break;
11782 	  cur_uops = ix86_safe_ppro_uops (*--insnp);
11783 	}
11784 
11785       /* If so, move it to the head of the line.  */
11786       if (cur_uops == PPRO_UOPS_FEW)
11787 	ix86_reorder_insn (insnp, e_ready);
11788 
11789       /* Issue the head of the queue.  */
11790       issued_this_cycle = 1;
11791       decode[0] = *e_ready--;
11792     }
11793 
11794   /* Look for simple insns to fill in the other two slots.  */
11795   for (i = 1; i < 3; ++i)
11796     if (decode[i] == NULL)
11797       {
11798 	if (ready > e_ready)
11799 	  goto ppro_done;
11800 
11801 	insnp = e_ready;
11802 	cur_uops = ix86_safe_ppro_uops (*insnp);
11803 	while (cur_uops != PPRO_UOPS_ONE)
11804 	  {
11805 	    if (insnp == ready)
11806 	      break;
11807 	    cur_uops = ix86_safe_ppro_uops (*--insnp);
11808 	  }
11809 
11810 	/* Found one.  Move it to the head of the queue and issue it.  */
11811 	if (cur_uops == PPRO_UOPS_ONE)
11812 	  {
11813 	    ix86_reorder_insn (insnp, e_ready);
11814 	    decode[i] = *e_ready--;
11815 	    issued_this_cycle++;
11816 	    continue;
11817 	  }
11818 
11819 	/* ??? Didn't find one.  Ideally, here we would do a lazy split
11820 	   of 2-uop insns, issue one and queue the other.  */
11821       }
11822 
11823  ppro_done:
11824   if (issued_this_cycle == 0)
11825     issued_this_cycle = 1;
11826   ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11827 }
11828 
11829 /* We are about to being issuing insns for this clock cycle.
11830    Override the default sort algorithm to better slot instructions.  */
11831 static int
ix86_sched_reorder(dump,sched_verbose,ready,n_readyp,clock_var)11832 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11833      FILE *dump ATTRIBUTE_UNUSED;
11834      int sched_verbose ATTRIBUTE_UNUSED;
11835      rtx *ready;
11836      int *n_readyp;
11837      int clock_var ATTRIBUTE_UNUSED;
11838 {
11839   int n_ready = *n_readyp;
11840   rtx *e_ready = ready + n_ready - 1;
11841 
11842   /* Make sure to go ahead and initialize key items in
11843      ix86_sched_data if we are not going to bother trying to
11844      reorder the ready queue.  */
11845   if (n_ready < 2)
11846     {
11847       ix86_sched_data.ppro.issued_this_cycle = 1;
11848       goto out;
11849     }
11850 
11851   switch (ix86_cpu)
11852     {
11853     default:
11854       break;
11855 
11856     case PROCESSOR_PENTIUMPRO:
11857       ix86_sched_reorder_ppro (ready, e_ready);
11858       break;
11859     }
11860 
11861 out:
11862   return ix86_issue_rate ();
11863 }
11864 
11865 /* We are about to issue INSN.  Return the number of insns left on the
11866    ready queue that can be issued this cycle.  */
11867 
11868 static int
ix86_variable_issue(dump,sched_verbose,insn,can_issue_more)11869 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11870      FILE *dump;
11871      int sched_verbose;
11872      rtx insn;
11873      int can_issue_more;
11874 {
11875   int i;
11876   switch (ix86_cpu)
11877     {
11878     default:
11879       return can_issue_more - 1;
11880 
11881     case PROCESSOR_PENTIUMPRO:
11882       {
11883 	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11884 
11885 	if (uops == PPRO_UOPS_MANY)
11886 	  {
11887 	    if (sched_verbose)
11888 	      ix86_dump_ppro_packet (dump);
11889 	    ix86_sched_data.ppro.decode[0] = insn;
11890 	    ix86_sched_data.ppro.decode[1] = NULL;
11891 	    ix86_sched_data.ppro.decode[2] = NULL;
11892 	    if (sched_verbose)
11893 	      ix86_dump_ppro_packet (dump);
11894 	    ix86_sched_data.ppro.decode[0] = NULL;
11895 	  }
11896 	else if (uops == PPRO_UOPS_FEW)
11897 	  {
11898 	    if (sched_verbose)
11899 	      ix86_dump_ppro_packet (dump);
11900 	    ix86_sched_data.ppro.decode[0] = insn;
11901 	    ix86_sched_data.ppro.decode[1] = NULL;
11902 	    ix86_sched_data.ppro.decode[2] = NULL;
11903 	  }
11904 	else
11905 	  {
11906 	    for (i = 0; i < 3; ++i)
11907 	      if (ix86_sched_data.ppro.decode[i] == NULL)
11908 		{
11909 		  ix86_sched_data.ppro.decode[i] = insn;
11910 		  break;
11911 		}
11912 	    if (i == 3)
11913 	      abort ();
11914 	    if (i == 2)
11915 	      {
11916 	        if (sched_verbose)
11917 	          ix86_dump_ppro_packet (dump);
11918 		ix86_sched_data.ppro.decode[0] = NULL;
11919 		ix86_sched_data.ppro.decode[1] = NULL;
11920 		ix86_sched_data.ppro.decode[2] = NULL;
11921 	      }
11922 	  }
11923       }
11924       return --ix86_sched_data.ppro.issued_this_cycle;
11925     }
11926 }
11927 
11928 static int
ia32_use_dfa_pipeline_interface()11929 ia32_use_dfa_pipeline_interface ()
11930 {
11931   if (ix86_cpu == PROCESSOR_PENTIUM)
11932     return 1;
11933   return 0;
11934 }
11935 
11936 /* How many alternative schedules to try.  This should be as wide as the
11937    scheduling freedom in the DFA, but no wider.  Making this value too
11938    large results extra work for the scheduler.  */
11939 
11940 static int
ia32_multipass_dfa_lookahead()11941 ia32_multipass_dfa_lookahead ()
11942 {
11943   if (ix86_cpu == PROCESSOR_PENTIUM)
11944     return 2;
11945   else
11946    return 0;
11947 }
11948 
11949 
11950 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11951    SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11952    appropriate.  */
11953 
11954 void
ix86_set_move_mem_attrs(insns,dstref,srcref,dstreg,srcreg)11955 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11956      rtx insns;
11957      rtx dstref, srcref, dstreg, srcreg;
11958 {
11959   rtx insn;
11960 
11961   for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11962     if (INSN_P (insn))
11963       ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11964 				 dstreg, srcreg);
11965 }
11966 
11967 /* Subroutine of above to actually do the updating by recursively walking
11968    the rtx.  */
11969 
11970 static void
ix86_set_move_mem_attrs_1(x,dstref,srcref,dstreg,srcreg)11971 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11972      rtx x;
11973      rtx dstref, srcref, dstreg, srcreg;
11974 {
11975   enum rtx_code code = GET_CODE (x);
11976   const char *format_ptr = GET_RTX_FORMAT (code);
11977   int i, j;
11978 
11979   if (code == MEM && XEXP (x, 0) == dstreg)
11980     MEM_COPY_ATTRIBUTES (x, dstref);
11981   else if (code == MEM && XEXP (x, 0) == srcreg)
11982     MEM_COPY_ATTRIBUTES (x, srcref);
11983 
11984   for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11985     {
11986       if (*format_ptr == 'e')
11987 	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11988 				   dstreg, srcreg);
11989       else if (*format_ptr == 'E')
11990 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11991 	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11992 				     dstreg, srcreg);
11993     }
11994 }
11995 
11996 /* Compute the alignment given to a constant that is being placed in memory.
11997    EXP is the constant and ALIGN is the alignment that the object would
11998    ordinarily have.
11999    The value of this function is used instead of that alignment to align
12000    the object.  */
12001 
12002 int
ix86_constant_alignment(exp,align)12003 ix86_constant_alignment (exp, align)
12004      tree exp;
12005      int align;
12006 {
12007   if (TREE_CODE (exp) == REAL_CST)
12008     {
12009       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12010 	return 64;
12011       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12012 	return 128;
12013     }
12014   else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12015 	   && align < 256)
12016     return 256;
12017 
12018   return align;
12019 }
12020 
12021 /* Compute the alignment for a static variable.
12022    TYPE is the data type, and ALIGN is the alignment that
12023    the object would ordinarily have.  The value of this function is used
12024    instead of that alignment to align the object.  */
12025 
12026 int
ix86_data_alignment(type,align)12027 ix86_data_alignment (type, align)
12028      tree type;
12029      int align;
12030 {
12031   if (AGGREGATE_TYPE_P (type)
12032        && TYPE_SIZE (type)
12033        && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12034        && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12035 	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12036     return 256;
12037 
12038   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12039      to 16byte boundary.  */
12040   if (TARGET_64BIT)
12041     {
12042       if (AGGREGATE_TYPE_P (type)
12043 	   && TYPE_SIZE (type)
12044 	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12045 	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12046 	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12047 	return 128;
12048     }
12049 
12050   if (TREE_CODE (type) == ARRAY_TYPE)
12051     {
12052       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12053 	return 64;
12054       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12055 	return 128;
12056     }
12057   else if (TREE_CODE (type) == COMPLEX_TYPE)
12058     {
12059 
12060       if (TYPE_MODE (type) == DCmode && align < 64)
12061 	return 64;
12062       if (TYPE_MODE (type) == XCmode && align < 128)
12063 	return 128;
12064     }
12065   else if ((TREE_CODE (type) == RECORD_TYPE
12066 	    || TREE_CODE (type) == UNION_TYPE
12067 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12068 	   && TYPE_FIELDS (type))
12069     {
12070       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12071 	return 64;
12072       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12073 	return 128;
12074     }
12075   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12076 	   || TREE_CODE (type) == INTEGER_TYPE)
12077     {
12078       if (TYPE_MODE (type) == DFmode && align < 64)
12079 	return 64;
12080       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12081 	return 128;
12082     }
12083 
12084   return align;
12085 }
12086 
12087 /* Compute the alignment for a local variable.
12088    TYPE is the data type, and ALIGN is the alignment that
12089    the object would ordinarily have.  The value of this macro is used
12090    instead of that alignment to align the object.  */
12091 
12092 int
ix86_local_alignment(type,align)12093 ix86_local_alignment (type, align)
12094      tree type;
12095      int align;
12096 {
12097   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12098      to 16byte boundary.  */
12099   if (TARGET_64BIT)
12100     {
12101       if (AGGREGATE_TYPE_P (type)
12102 	   && TYPE_SIZE (type)
12103 	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12104 	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12105 	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12106 	return 128;
12107     }
12108   if (TREE_CODE (type) == ARRAY_TYPE)
12109     {
12110       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12111 	return 64;
12112       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12113 	return 128;
12114     }
12115   else if (TREE_CODE (type) == COMPLEX_TYPE)
12116     {
12117       if (TYPE_MODE (type) == DCmode && align < 64)
12118 	return 64;
12119       if (TYPE_MODE (type) == XCmode && align < 128)
12120 	return 128;
12121     }
12122   else if ((TREE_CODE (type) == RECORD_TYPE
12123 	    || TREE_CODE (type) == UNION_TYPE
12124 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12125 	   && TYPE_FIELDS (type))
12126     {
12127       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12128 	return 64;
12129       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12130 	return 128;
12131     }
12132   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12133 	   || TREE_CODE (type) == INTEGER_TYPE)
12134     {
12135 
12136       if (TYPE_MODE (type) == DFmode && align < 64)
12137 	return 64;
12138       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12139 	return 128;
12140     }
12141   return align;
12142 }
12143 
12144 /* Emit RTL insns to initialize the variable parts of a trampoline.
12145    FNADDR is an RTX for the address of the function's pure code.
12146    CXT is an RTX for the static chain value for the function.  */
12147 void
x86_initialize_trampoline(tramp,fnaddr,cxt)12148 x86_initialize_trampoline (tramp, fnaddr, cxt)
12149      rtx tramp, fnaddr, cxt;
12150 {
12151   if (!TARGET_64BIT)
12152     {
12153       /* Compute offset from the end of the jmp to the target function.  */
12154       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12155 			       plus_constant (tramp, 10),
12156 			       NULL_RTX, 1, OPTAB_DIRECT);
12157       emit_move_insn (gen_rtx_MEM (QImode, tramp),
12158 		      gen_int_mode (0xb9, QImode));
12159       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12160       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12161 		      gen_int_mode (0xe9, QImode));
12162       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12163     }
12164   else
12165     {
12166       int offset = 0;
12167       /* Try to load address using shorter movl instead of movabs.
12168          We may want to support movq for kernel mode, but kernel does not use
12169          trampolines at the moment.  */
12170       if (x86_64_zero_extended_value (fnaddr))
12171 	{
12172 	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
12173 	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12174 			  gen_int_mode (0xbb41, HImode));
12175 	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12176 			  gen_lowpart (SImode, fnaddr));
12177 	  offset += 6;
12178 	}
12179       else
12180 	{
12181 	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12182 			  gen_int_mode (0xbb49, HImode));
12183 	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12184 			  fnaddr);
12185 	  offset += 10;
12186 	}
12187       /* Load static chain using movabs to r10.  */
12188       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12189 		      gen_int_mode (0xba49, HImode));
12190       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12191 		      cxt);
12192       offset += 10;
12193       /* Jump to the r11 */
12194       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12195 		      gen_int_mode (0xff49, HImode));
12196       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12197 		      gen_int_mode (0xe3, QImode));
12198       offset += 3;
12199       if (offset > TRAMPOLINE_SIZE)
12200 	abort ();
12201     }
12202 
12203 #ifdef TRANSFER_FROM_TRAMPOLINE
12204   emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12205 		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12206 #endif
12207 }
12208 
12209 #define def_builtin(MASK, NAME, TYPE, CODE)			\
12210 do {								\
12211   if ((MASK) & target_flags					\
12212       && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
12213     builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
12214 		      NULL, NULL_TREE);				\
12215 } while (0)
12216 
12217 struct builtin_description
12218 {
12219   const unsigned int mask;
12220   const enum insn_code icode;
12221   const char *const name;
12222   const enum ix86_builtins code;
12223   const enum rtx_code comparison;
12224   const unsigned int flag;
12225 };
12226 
12227 static const struct builtin_description bdesc_comi[] =
12228 {
12229   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12230   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12231   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12232   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12233   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12234   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12235   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12236   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12237   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12238   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12239   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12240   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12241   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12242   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12243   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12244   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12245   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12246   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12247   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12248   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12249   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12250   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12251   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12252   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12253 };
12254 
12255 static const struct builtin_description bdesc_2arg[] =
12256 {
12257   /* SSE */
12258   { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12259   { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12260   { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12261   { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12262   { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12263   { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12264   { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12265   { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12266 
12267   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12268   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12269   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12270   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12271   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12272   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12273   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12274   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12275   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12276   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12277   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12278   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12279   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12280   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12281   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12282   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12283   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12284   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12285   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12286   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12287 
12288   { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12289   { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12290   { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12291   { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12292 
12293   { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12294   { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12295   { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12296   { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12297 
12298   { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12299   { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12300   { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12301   { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12302   { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12303 
12304   /* MMX */
12305   { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12306   { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12307   { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12308   { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12309   { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12310   { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12311   { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12312   { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12313 
12314   { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12315   { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12316   { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12317   { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12318   { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12319   { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12320   { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12321   { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12322 
12323   { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12324   { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12325   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12326 
12327   { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12328   { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12329   { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12330   { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12331 
12332   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12333   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12334 
12335   { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12336   { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12337   { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12338   { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12339   { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12340   { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12341 
12342   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12343   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12344   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12345   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12346 
12347   { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12348   { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12349   { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12350   { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12351   { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12352   { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12353 
12354   /* Special.  */
12355   { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12356   { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12357   { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12358 
12359   { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12360   { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12361   { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12362 
12363   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12364   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12365   { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12366   { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12367   { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12368   { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12369 
12370   { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12371   { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12372   { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12373   { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12374   { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12375   { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12376 
12377   { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12378   { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12379   { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12380   { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12381 
12382   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12383   { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12384 
12385   /* SSE2 */
12386   { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12387   { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12388   { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12389   { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12390   { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12391   { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12392   { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12393   { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12394 
12395   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12396   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12397   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12398   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12399   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12400   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12401   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12402   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12403   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12404   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12405   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12406   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12407   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12408   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12409   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12410   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12411   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12412   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12413   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12414   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12415 
12416   { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12417   { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12418   { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12419   { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12420 
12421   { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12422   { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12423   { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12424   { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12425 
12426   { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12427   { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12428   { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12429 
12430   /* SSE2 MMX */
12431   { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12432   { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12433   { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12434   { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12435   { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12436   { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12437   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12438   { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12439 
12440   { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12441   { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12442   { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12443   { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12444   { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12445   { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12446   { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12447   { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12448 
12449   { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12450   { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12451   { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12452   { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12453 
12454   { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12455   { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12456   { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12457   { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12458 
12459   { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12460   { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12461 
12462   { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12463   { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12464   { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12465   { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12466   { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12467   { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12468 
12469   { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12470   { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12471   { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12472   { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12473 
12474   { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12475   { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12476   { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12477   { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12478   { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12479   { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12480   { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12481   { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12482 
12483   { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12484   { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12485   { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12486 
12487   { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12488   { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12489 
12490   { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12491   { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12492   { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12493   { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12494   { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12495   { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12496 
12497   { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12498   { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12499   { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12500   { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12501   { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12502   { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12503 
12504   { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12505   { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12506   { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12507   { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12508 
12509   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12510 
12511   { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12512   { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12513   { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12514   { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12515 
12516   /* SSE3 MMX */
12517   { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12518   { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12519   { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12520   { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12521   { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12522   { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12523 };
12524 
12525 static const struct builtin_description bdesc_1arg[] =
12526 {
12527   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12528   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12529 
12530   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12531   { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12532   { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12533 
12534   { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12535   { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12536   { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12537   { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12538   { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12539   { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12540 
12541   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12542   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12543   { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12544   { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12545 
12546   { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12547 
12548   { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12549   { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12550 
12551   { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12552   { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12553   { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12554   { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12555   { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12556 
12557   { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12558 
12559   { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12560   { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12561   { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12562   { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12563 
12564   { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12565   { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12566   { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12567 
12568   { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12569 
12570   /* SSE3 */
12571   { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12572   { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12573   { MASK_SSE3, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12574 };
12575 
12576 void
ix86_init_builtins()12577 ix86_init_builtins ()
12578 {
12579   if (TARGET_MMX)
12580     ix86_init_mmx_sse_builtins ();
12581 }
12582 
12583 /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
12584    is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
12585    builtins.  */
12586 static void
ix86_init_mmx_sse_builtins()12587 ix86_init_mmx_sse_builtins ()
12588 {
12589   const struct builtin_description * d;
12590   size_t i;
12591 
12592   tree pchar_type_node = build_pointer_type (char_type_node);
12593   tree pcchar_type_node = build_pointer_type (
12594 			     build_type_variant (char_type_node, 1, 0));
12595   tree pfloat_type_node = build_pointer_type (float_type_node);
12596   tree pcfloat_type_node = build_pointer_type (
12597 			     build_type_variant (float_type_node, 1, 0));
12598   tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12599   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12600   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12601 
12602   /* Comparisons.  */
12603   tree int_ftype_v4sf_v4sf
12604     = build_function_type_list (integer_type_node,
12605 				V4SF_type_node, V4SF_type_node, NULL_TREE);
12606   tree v4si_ftype_v4sf_v4sf
12607     = build_function_type_list (V4SI_type_node,
12608 				V4SF_type_node, V4SF_type_node, NULL_TREE);
12609   /* MMX/SSE/integer conversions.  */
12610   tree int_ftype_v4sf
12611     = build_function_type_list (integer_type_node,
12612 				V4SF_type_node, NULL_TREE);
12613   tree int64_ftype_v4sf
12614     = build_function_type_list (long_long_integer_type_node,
12615 				V4SF_type_node, NULL_TREE);
12616   tree int_ftype_v8qi
12617     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12618   tree v4sf_ftype_v4sf_int
12619     = build_function_type_list (V4SF_type_node,
12620 				V4SF_type_node, integer_type_node, NULL_TREE);
12621   tree v4sf_ftype_v4sf_int64
12622     = build_function_type_list (V4SF_type_node,
12623 				V4SF_type_node, long_long_integer_type_node,
12624 				NULL_TREE);
12625   tree v4sf_ftype_v4sf_v2si
12626     = build_function_type_list (V4SF_type_node,
12627 				V4SF_type_node, V2SI_type_node, NULL_TREE);
12628   tree int_ftype_v4hi_int
12629     = build_function_type_list (integer_type_node,
12630 				V4HI_type_node, integer_type_node, NULL_TREE);
12631   tree v4hi_ftype_v4hi_int_int
12632     = build_function_type_list (V4HI_type_node, V4HI_type_node,
12633 				integer_type_node, integer_type_node,
12634 				NULL_TREE);
12635   /* Miscellaneous.  */
12636   tree v8qi_ftype_v4hi_v4hi
12637     = build_function_type_list (V8QI_type_node,
12638 				V4HI_type_node, V4HI_type_node, NULL_TREE);
12639   tree v4hi_ftype_v2si_v2si
12640     = build_function_type_list (V4HI_type_node,
12641 				V2SI_type_node, V2SI_type_node, NULL_TREE);
12642   tree v4sf_ftype_v4sf_v4sf_int
12643     = build_function_type_list (V4SF_type_node,
12644 				V4SF_type_node, V4SF_type_node,
12645 				integer_type_node, NULL_TREE);
12646   tree v2si_ftype_v4hi_v4hi
12647     = build_function_type_list (V2SI_type_node,
12648 				V4HI_type_node, V4HI_type_node, NULL_TREE);
12649   tree v4hi_ftype_v4hi_int
12650     = build_function_type_list (V4HI_type_node,
12651 				V4HI_type_node, integer_type_node, NULL_TREE);
12652   tree v4hi_ftype_v4hi_di
12653     = build_function_type_list (V4HI_type_node,
12654 				V4HI_type_node, long_long_unsigned_type_node,
12655 				NULL_TREE);
12656   tree v2si_ftype_v2si_di
12657     = build_function_type_list (V2SI_type_node,
12658 				V2SI_type_node, long_long_unsigned_type_node,
12659 				NULL_TREE);
12660   tree void_ftype_void
12661     = build_function_type (void_type_node, void_list_node);
12662   tree void_ftype_unsigned
12663     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12664   tree void_ftype_unsigned_unsigned
12665     = build_function_type_list (void_type_node, unsigned_type_node,
12666 				unsigned_type_node, NULL_TREE);
12667   tree void_ftype_pcvoid_unsigned_unsigned
12668     = build_function_type_list (void_type_node, const_ptr_type_node,
12669 				unsigned_type_node, unsigned_type_node,
12670 				NULL_TREE);
12671   tree unsigned_ftype_void
12672     = build_function_type (unsigned_type_node, void_list_node);
12673   tree di_ftype_void
12674     = build_function_type (long_long_unsigned_type_node, void_list_node);
12675   tree v4sf_ftype_void
12676     = build_function_type (V4SF_type_node, void_list_node);
12677   tree v2si_ftype_v4sf
12678     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12679   /* Loads/stores.  */
12680   tree void_ftype_v8qi_v8qi_pchar
12681     = build_function_type_list (void_type_node,
12682 				V8QI_type_node, V8QI_type_node,
12683 				pchar_type_node, NULL_TREE);
12684   tree v4sf_ftype_pcfloat
12685     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12686   /* @@@ the type is bogus */
12687   tree v4sf_ftype_v4sf_pv2si
12688     = build_function_type_list (V4SF_type_node,
12689 				V4SF_type_node, pv2si_type_node, NULL_TREE);
12690   tree void_ftype_pv2si_v4sf
12691     = build_function_type_list (void_type_node,
12692 				pv2si_type_node, V4SF_type_node, NULL_TREE);
12693   tree void_ftype_pfloat_v4sf
12694     = build_function_type_list (void_type_node,
12695 				pfloat_type_node, V4SF_type_node, NULL_TREE);
12696   tree void_ftype_pdi_di
12697     = build_function_type_list (void_type_node,
12698 				pdi_type_node, long_long_unsigned_type_node,
12699 				NULL_TREE);
12700   tree void_ftype_pv2di_v2di
12701     = build_function_type_list (void_type_node,
12702 				pv2di_type_node, V2DI_type_node, NULL_TREE);
12703   /* Normal vector unops.  */
12704   tree v4sf_ftype_v4sf
12705     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12706 
12707   /* Normal vector binops.  */
12708   tree v4sf_ftype_v4sf_v4sf
12709     = build_function_type_list (V4SF_type_node,
12710 				V4SF_type_node, V4SF_type_node, NULL_TREE);
12711   tree v8qi_ftype_v8qi_v8qi
12712     = build_function_type_list (V8QI_type_node,
12713 				V8QI_type_node, V8QI_type_node, NULL_TREE);
12714   tree v4hi_ftype_v4hi_v4hi
12715     = build_function_type_list (V4HI_type_node,
12716 				V4HI_type_node, V4HI_type_node, NULL_TREE);
12717   tree v2si_ftype_v2si_v2si
12718     = build_function_type_list (V2SI_type_node,
12719 				V2SI_type_node, V2SI_type_node, NULL_TREE);
12720   tree di_ftype_di_di
12721     = build_function_type_list (long_long_unsigned_type_node,
12722 				long_long_unsigned_type_node,
12723 				long_long_unsigned_type_node, NULL_TREE);
12724 
12725   tree v2si_ftype_v2sf
12726     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12727   tree v2sf_ftype_v2si
12728     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12729   tree v2si_ftype_v2si
12730     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12731   tree v2sf_ftype_v2sf
12732     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12733   tree v2sf_ftype_v2sf_v2sf
12734     = build_function_type_list (V2SF_type_node,
12735 				V2SF_type_node, V2SF_type_node, NULL_TREE);
12736   tree v2si_ftype_v2sf_v2sf
12737     = build_function_type_list (V2SI_type_node,
12738 				V2SF_type_node, V2SF_type_node, NULL_TREE);
12739   tree pint_type_node    = build_pointer_type (integer_type_node);
12740   tree pcint_type_node = build_pointer_type (
12741 			     build_type_variant (integer_type_node, 1, 0));
12742   tree pdouble_type_node = build_pointer_type (double_type_node);
12743   tree pcdouble_type_node = build_pointer_type (
12744 				build_type_variant (double_type_node, 1, 0));
12745   tree int_ftype_v2df_v2df
12746     = build_function_type_list (integer_type_node,
12747 				V2DF_type_node, V2DF_type_node, NULL_TREE);
12748 
12749   tree ti_ftype_void
12750     = build_function_type (intTI_type_node, void_list_node);
12751   tree v2di_ftype_void
12752     = build_function_type (V2DI_type_node, void_list_node);
12753   tree ti_ftype_ti_ti
12754     = build_function_type_list (intTI_type_node,
12755 				intTI_type_node, intTI_type_node, NULL_TREE);
12756   tree void_ftype_pcvoid
12757     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12758   tree v2di_ftype_di
12759     = build_function_type_list (V2DI_type_node,
12760 				long_long_unsigned_type_node, NULL_TREE);
12761   tree di_ftype_v2di
12762     = build_function_type_list (long_long_unsigned_type_node,
12763 				V2DI_type_node, NULL_TREE);
12764   tree v4sf_ftype_v4si
12765     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12766   tree v4si_ftype_v4sf
12767     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12768   tree v2df_ftype_v4si
12769     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12770   tree v4si_ftype_v2df
12771     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12772   tree v2si_ftype_v2df
12773     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12774   tree v4sf_ftype_v2df
12775     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12776   tree v2df_ftype_v2si
12777     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12778   tree v2df_ftype_v4sf
12779     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12780   tree int_ftype_v2df
12781     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12782   tree int64_ftype_v2df
12783     = build_function_type_list (long_long_integer_type_node,
12784 		    		V2DF_type_node, NULL_TREE);
12785   tree v2df_ftype_v2df_int
12786     = build_function_type_list (V2DF_type_node,
12787 				V2DF_type_node, integer_type_node, NULL_TREE);
12788   tree v2df_ftype_v2df_int64
12789     = build_function_type_list (V2DF_type_node,
12790 				V2DF_type_node, long_long_integer_type_node,
12791 				NULL_TREE);
12792   tree v4sf_ftype_v4sf_v2df
12793     = build_function_type_list (V4SF_type_node,
12794 				V4SF_type_node, V2DF_type_node, NULL_TREE);
12795   tree v2df_ftype_v2df_v4sf
12796     = build_function_type_list (V2DF_type_node,
12797 				V2DF_type_node, V4SF_type_node, NULL_TREE);
12798   tree v2df_ftype_v2df_v2df_int
12799     = build_function_type_list (V2DF_type_node,
12800 				V2DF_type_node, V2DF_type_node,
12801 				integer_type_node,
12802 				NULL_TREE);
12803   tree v2df_ftype_v2df_pv2si
12804     = build_function_type_list (V2DF_type_node,
12805 				V2DF_type_node, pv2si_type_node, NULL_TREE);
12806   tree void_ftype_pv2si_v2df
12807     = build_function_type_list (void_type_node,
12808 				pv2si_type_node, V2DF_type_node, NULL_TREE);
12809   tree void_ftype_pdouble_v2df
12810     = build_function_type_list (void_type_node,
12811 				pdouble_type_node, V2DF_type_node, NULL_TREE);
12812   tree void_ftype_pint_int
12813     = build_function_type_list (void_type_node,
12814 				pint_type_node, integer_type_node, NULL_TREE);
12815   tree void_ftype_v16qi_v16qi_pchar
12816     = build_function_type_list (void_type_node,
12817 				V16QI_type_node, V16QI_type_node,
12818 				pchar_type_node, NULL_TREE);
12819   tree v2df_ftype_pcdouble
12820     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12821   tree v2df_ftype_v2df_v2df
12822     = build_function_type_list (V2DF_type_node,
12823 				V2DF_type_node, V2DF_type_node, NULL_TREE);
12824   tree v16qi_ftype_v16qi_v16qi
12825     = build_function_type_list (V16QI_type_node,
12826 				V16QI_type_node, V16QI_type_node, NULL_TREE);
12827   tree v8hi_ftype_v8hi_v8hi
12828     = build_function_type_list (V8HI_type_node,
12829 				V8HI_type_node, V8HI_type_node, NULL_TREE);
12830   tree v4si_ftype_v4si_v4si
12831     = build_function_type_list (V4SI_type_node,
12832 				V4SI_type_node, V4SI_type_node, NULL_TREE);
12833   tree v2di_ftype_v2di_v2di
12834     = build_function_type_list (V2DI_type_node,
12835 				V2DI_type_node, V2DI_type_node, NULL_TREE);
12836   tree v2di_ftype_v2df_v2df
12837     = build_function_type_list (V2DI_type_node,
12838 				V2DF_type_node, V2DF_type_node, NULL_TREE);
12839   tree v2df_ftype_v2df
12840     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12841   tree v2df_ftype_double
12842     = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12843   tree v2df_ftype_double_double
12844     = build_function_type_list (V2DF_type_node,
12845 				double_type_node, double_type_node, NULL_TREE);
12846   tree int_ftype_v8hi_int
12847     = build_function_type_list (integer_type_node,
12848 				V8HI_type_node, integer_type_node, NULL_TREE);
12849   tree v8hi_ftype_v8hi_int_int
12850     = build_function_type_list (V8HI_type_node,
12851 				V8HI_type_node, integer_type_node,
12852 				integer_type_node, NULL_TREE);
12853   tree v2di_ftype_v2di_int
12854     = build_function_type_list (V2DI_type_node,
12855 				V2DI_type_node, integer_type_node, NULL_TREE);
12856   tree v4si_ftype_v4si_int
12857     = build_function_type_list (V4SI_type_node,
12858 				V4SI_type_node, integer_type_node, NULL_TREE);
12859   tree v8hi_ftype_v8hi_int
12860     = build_function_type_list (V8HI_type_node,
12861 				V8HI_type_node, integer_type_node, NULL_TREE);
12862   tree v8hi_ftype_v8hi_v2di
12863     = build_function_type_list (V8HI_type_node,
12864 				V8HI_type_node, V2DI_type_node, NULL_TREE);
12865   tree v4si_ftype_v4si_v2di
12866     = build_function_type_list (V4SI_type_node,
12867 				V4SI_type_node, V2DI_type_node, NULL_TREE);
12868   tree v4si_ftype_v8hi_v8hi
12869     = build_function_type_list (V4SI_type_node,
12870 				V8HI_type_node, V8HI_type_node, NULL_TREE);
12871   tree di_ftype_v8qi_v8qi
12872     = build_function_type_list (long_long_unsigned_type_node,
12873 				V8QI_type_node, V8QI_type_node, NULL_TREE);
12874   tree v2di_ftype_v16qi_v16qi
12875     = build_function_type_list (V2DI_type_node,
12876 				V16QI_type_node, V16QI_type_node, NULL_TREE);
12877   tree int_ftype_v16qi
12878     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12879   tree v16qi_ftype_pcchar
12880     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12881   tree void_ftype_pchar_v16qi
12882     = build_function_type_list (void_type_node,
12883 			        pchar_type_node, V16QI_type_node, NULL_TREE);
12884   tree v4si_ftype_pcint
12885     = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12886   tree void_ftype_pcint_v4si
12887     = build_function_type_list (void_type_node,
12888 			        pcint_type_node, V4SI_type_node, NULL_TREE);
12889   tree v2di_ftype_v2di
12890     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12891 
12892   /* Add all builtins that are more or less simple operations on two
12893      operands.  */
12894   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12895     {
12896       /* Use one of the operands; the target can have a different mode for
12897 	 mask-generating compares.  */
12898       enum machine_mode mode;
12899       tree type;
12900 
12901       if (d->name == 0)
12902 	continue;
12903       mode = insn_data[d->icode].operand[1].mode;
12904 
12905       switch (mode)
12906 	{
12907 	case V16QImode:
12908 	  type = v16qi_ftype_v16qi_v16qi;
12909 	  break;
12910 	case V8HImode:
12911 	  type = v8hi_ftype_v8hi_v8hi;
12912 	  break;
12913 	case V4SImode:
12914 	  type = v4si_ftype_v4si_v4si;
12915 	  break;
12916 	case V2DImode:
12917 	  type = v2di_ftype_v2di_v2di;
12918 	  break;
12919 	case V2DFmode:
12920 	  type = v2df_ftype_v2df_v2df;
12921 	  break;
12922 	case TImode:
12923 	  type = ti_ftype_ti_ti;
12924 	  break;
12925 	case V4SFmode:
12926 	  type = v4sf_ftype_v4sf_v4sf;
12927 	  break;
12928 	case V8QImode:
12929 	  type = v8qi_ftype_v8qi_v8qi;
12930 	  break;
12931 	case V4HImode:
12932 	  type = v4hi_ftype_v4hi_v4hi;
12933 	  break;
12934 	case V2SImode:
12935 	  type = v2si_ftype_v2si_v2si;
12936 	  break;
12937 	case DImode:
12938 	  type = di_ftype_di_di;
12939 	  break;
12940 
12941 	default:
12942 	  abort ();
12943 	}
12944 
12945       /* Override for comparisons.  */
12946       if (d->icode == CODE_FOR_maskcmpv4sf3
12947 	  || d->icode == CODE_FOR_maskncmpv4sf3
12948 	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
12949 	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12950 	type = v4si_ftype_v4sf_v4sf;
12951 
12952       if (d->icode == CODE_FOR_maskcmpv2df3
12953 	  || d->icode == CODE_FOR_maskncmpv2df3
12954 	  || d->icode == CODE_FOR_vmmaskcmpv2df3
12955 	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
12956 	type = v2di_ftype_v2df_v2df;
12957 
12958       def_builtin (d->mask, d->name, type, d->code);
12959     }
12960 
12961   /* Add the remaining MMX insns with somewhat more complicated types.  */
12962   def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12963   def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12964   def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12965   def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12966   def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12967 
12968   def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12969   def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12970   def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12971 
12972   def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12973   def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12974 
12975   def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12976   def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12977 
12978   /* comi/ucomi insns.  */
12979   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12980     if (d->mask == MASK_SSE2)
12981       def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12982     else
12983       def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12984 
12985   def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12986   def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12987   def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12988 
12989   def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12990   def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12991   def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12992   def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12993   def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12994   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12995   def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12996   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12997   def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12998   def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12999   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13000 
13001   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13002   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13003 
13004   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13005 
13006   def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13007   def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13008   def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13009   def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13010   def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13011   def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13012 
13013   def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13014   def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13015   def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13016   def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13017 
13018   def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13019   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13020   def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13021   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13022 
13023   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13024 
13025   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13026 
13027   def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13028   def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13029   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13030   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13031   def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13032   def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13033 
13034   def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13035 
13036   /* Original 3DNow!  */
13037   def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13038   def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13039   def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13040   def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13041   def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13042   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13043   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13044   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13045   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13046   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13047   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13048   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13049   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13050   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13051   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13052   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13053   def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13054   def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13055   def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13056   def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13057 
13058   /* 3DNow! extension as used in the Athlon CPU.  */
13059   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13060   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13061   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13062   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13063   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13064   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13065 
13066   def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13067 
13068   /* SSE2 */
13069   def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13070   def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13071 
13072   def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13073   def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13074   def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13075 
13076   def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13077   def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13078   def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13079   def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13080   def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13081   def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13082 
13083   def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13084   def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13085   def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13086   def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13087 
13088   def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13089   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13090   def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13091   def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13092   def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13093 
13094   def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13095   def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13096   def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13097   def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13098 
13099   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13100   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13101 
13102   def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13103 
13104   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13105   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13106 
13107   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13108   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13109   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13110   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13111   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13112 
13113   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13114 
13115   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13116   def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13117   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13118   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13119 
13120   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13121   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13122   def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13123 
13124   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13125   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13126   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13127   def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13128 
13129   def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13130   def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13131   def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13132   def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13133   def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13134   def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13135   def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13136 
13137   def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13138   def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13139   def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13140 
13141   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13142   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13143   def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13144   def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13145   def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13146   def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13147   def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13148 
13149   def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13150 
13151   def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13152   def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13153   def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13154 
13155   def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13156   def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13157   def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13158 
13159   def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13160   def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13161 
13162   def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13163   def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13164   def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13165   def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13166 
13167   def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13168   def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13169   def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13170   def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13171 
13172   def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13173   def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13174 
13175   def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13176 
13177   /* Prescott New Instructions.  */
13178   def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13179 	       void_ftype_pcvoid_unsigned_unsigned,
13180 	       IX86_BUILTIN_MONITOR);
13181   def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13182 	       void_ftype_unsigned_unsigned,
13183 	       IX86_BUILTIN_MWAIT);
13184   def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13185 	       v4sf_ftype_v4sf,
13186 	       IX86_BUILTIN_MOVSHDUP);
13187   def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13188 	       v4sf_ftype_v4sf,
13189 	       IX86_BUILTIN_MOVSLDUP);
13190   def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13191 	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13192   def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13193 	       v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13194   def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13195 	       v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13196 }
13197 
13198 /* Errors in the source file can cause expand_expr to return const0_rtx
13199    where we expect a vector.  To avoid crashing, use one of the vector
13200    clear instructions.  */
13201 static rtx
safe_vector_operand(x,mode)13202 safe_vector_operand (x, mode)
13203      rtx x;
13204      enum machine_mode mode;
13205 {
13206   if (x != const0_rtx)
13207     return x;
13208   x = gen_reg_rtx (mode);
13209 
13210   if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13211     emit_insn (gen_mmx_clrdi (mode == DImode ? x
13212 			      : gen_rtx_SUBREG (DImode, x, 0)));
13213   else
13214     emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13215 				: gen_rtx_SUBREG (V4SFmode, x, 0)));
13216   return x;
13217 }
13218 
13219 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
13220 
13221 static rtx
ix86_expand_binop_builtin(icode,arglist,target)13222 ix86_expand_binop_builtin (icode, arglist, target)
13223      enum insn_code icode;
13224      tree arglist;
13225      rtx target;
13226 {
13227   rtx pat;
13228   tree arg0 = TREE_VALUE (arglist);
13229   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13230   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13231   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13232   enum machine_mode tmode = insn_data[icode].operand[0].mode;
13233   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13234   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13235 
13236   if (VECTOR_MODE_P (mode0))
13237     op0 = safe_vector_operand (op0, mode0);
13238   if (VECTOR_MODE_P (mode1))
13239     op1 = safe_vector_operand (op1, mode1);
13240 
13241   if (! target
13242       || GET_MODE (target) != tmode
13243       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13244     target = gen_reg_rtx (tmode);
13245 
13246   if (GET_MODE (op1) == SImode && mode1 == TImode)
13247     {
13248       rtx x = gen_reg_rtx (V4SImode);
13249       emit_insn (gen_sse2_loadd (x, op1));
13250       op1 = gen_lowpart (TImode, x);
13251     }
13252 
13253   /* In case the insn wants input operands in modes different from
13254      the result, abort.  */
13255   if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13256     abort ();
13257 
13258   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13259     op0 = copy_to_mode_reg (mode0, op0);
13260   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13261     op1 = copy_to_mode_reg (mode1, op1);
13262 
13263   /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13264      yet one of the two must not be a memory.  This is normally enforced
13265      by expanders, but we didn't bother to create one here.  */
13266   if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13267     op0 = copy_to_mode_reg (mode0, op0);
13268 
13269   pat = GEN_FCN (icode) (target, op0, op1);
13270   if (! pat)
13271     return 0;
13272   emit_insn (pat);
13273   return target;
13274 }
13275 
13276 /* Subroutine of ix86_expand_builtin to take care of stores.  */
13277 
13278 static rtx
ix86_expand_store_builtin(icode,arglist)13279 ix86_expand_store_builtin (icode, arglist)
13280      enum insn_code icode;
13281      tree arglist;
13282 {
13283   rtx pat;
13284   tree arg0 = TREE_VALUE (arglist);
13285   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13286   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13287   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13288   enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13289   enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13290 
13291   if (VECTOR_MODE_P (mode1))
13292     op1 = safe_vector_operand (op1, mode1);
13293 
13294   op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13295   op1 = copy_to_mode_reg (mode1, op1);
13296 
13297   pat = GEN_FCN (icode) (op0, op1);
13298   if (pat)
13299     emit_insn (pat);
13300   return 0;
13301 }
13302 
13303 /* Subroutine of ix86_expand_builtin to take care of unop insns.  */
13304 
13305 static rtx
ix86_expand_unop_builtin(icode,arglist,target,do_load)13306 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13307      enum insn_code icode;
13308      tree arglist;
13309      rtx target;
13310      int do_load;
13311 {
13312   rtx pat;
13313   tree arg0 = TREE_VALUE (arglist);
13314   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13315   enum machine_mode tmode = insn_data[icode].operand[0].mode;
13316   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13317 
13318   if (! target
13319       || GET_MODE (target) != tmode
13320       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13321     target = gen_reg_rtx (tmode);
13322   if (do_load)
13323     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13324   else
13325     {
13326       if (VECTOR_MODE_P (mode0))
13327 	op0 = safe_vector_operand (op0, mode0);
13328 
13329       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13330 	op0 = copy_to_mode_reg (mode0, op0);
13331     }
13332 
13333   pat = GEN_FCN (icode) (target, op0);
13334   if (! pat)
13335     return 0;
13336   emit_insn (pat);
13337   return target;
13338 }
13339 
13340 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13341    sqrtss, rsqrtss, rcpss.  */
13342 
13343 static rtx
ix86_expand_unop1_builtin(icode,arglist,target)13344 ix86_expand_unop1_builtin (icode, arglist, target)
13345      enum insn_code icode;
13346      tree arglist;
13347      rtx target;
13348 {
13349   rtx pat;
13350   tree arg0 = TREE_VALUE (arglist);
13351   rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13352   enum machine_mode tmode = insn_data[icode].operand[0].mode;
13353   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13354 
13355   if (! target
13356       || GET_MODE (target) != tmode
13357       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13358     target = gen_reg_rtx (tmode);
13359 
13360   if (VECTOR_MODE_P (mode0))
13361     op0 = safe_vector_operand (op0, mode0);
13362 
13363   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13364     op0 = copy_to_mode_reg (mode0, op0);
13365 
13366   op1 = op0;
13367   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13368     op1 = copy_to_mode_reg (mode0, op1);
13369 
13370   pat = GEN_FCN (icode) (target, op0, op1);
13371   if (! pat)
13372     return 0;
13373   emit_insn (pat);
13374   return target;
13375 }
13376 
13377 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
13378 
13379 static rtx
ix86_expand_sse_compare(d,arglist,target)13380 ix86_expand_sse_compare (d, arglist, target)
13381      const struct builtin_description *d;
13382      tree arglist;
13383      rtx target;
13384 {
13385   rtx pat;
13386   tree arg0 = TREE_VALUE (arglist);
13387   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13388   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13389   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13390   rtx op2;
13391   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13392   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13393   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13394   enum rtx_code comparison = d->comparison;
13395 
13396   if (VECTOR_MODE_P (mode0))
13397     op0 = safe_vector_operand (op0, mode0);
13398   if (VECTOR_MODE_P (mode1))
13399     op1 = safe_vector_operand (op1, mode1);
13400 
13401   /* Swap operands if we have a comparison that isn't available in
13402      hardware.  */
13403   if (d->flag)
13404     {
13405       rtx tmp = gen_reg_rtx (mode1);
13406       emit_move_insn (tmp, op1);
13407       op1 = op0;
13408       op0 = tmp;
13409     }
13410 
13411   if (! target
13412       || GET_MODE (target) != tmode
13413       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13414     target = gen_reg_rtx (tmode);
13415 
13416   if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13417     op0 = copy_to_mode_reg (mode0, op0);
13418   if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13419     op1 = copy_to_mode_reg (mode1, op1);
13420 
13421   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13422   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13423   if (! pat)
13424     return 0;
13425   emit_insn (pat);
13426   return target;
13427 }
13428 
13429 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
13430 
13431 static rtx
ix86_expand_sse_comi(d,arglist,target)13432 ix86_expand_sse_comi (d, arglist, target)
13433      const struct builtin_description *d;
13434      tree arglist;
13435      rtx target;
13436 {
13437   rtx pat;
13438   tree arg0 = TREE_VALUE (arglist);
13439   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13440   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13441   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13442   rtx op2;
13443   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13444   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13445   enum rtx_code comparison = d->comparison;
13446 
13447   if (VECTOR_MODE_P (mode0))
13448     op0 = safe_vector_operand (op0, mode0);
13449   if (VECTOR_MODE_P (mode1))
13450     op1 = safe_vector_operand (op1, mode1);
13451 
13452   /* Swap operands if we have a comparison that isn't available in
13453      hardware.  */
13454   if (d->flag)
13455     {
13456       rtx tmp = op1;
13457       op1 = op0;
13458       op0 = tmp;
13459     }
13460 
13461   target = gen_reg_rtx (SImode);
13462   emit_move_insn (target, const0_rtx);
13463   target = gen_rtx_SUBREG (QImode, target, 0);
13464 
13465   if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13466     op0 = copy_to_mode_reg (mode0, op0);
13467   if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13468     op1 = copy_to_mode_reg (mode1, op1);
13469 
13470   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13471   pat = GEN_FCN (d->icode) (op0, op1);
13472   if (! pat)
13473     return 0;
13474   emit_insn (pat);
13475   emit_insn (gen_rtx_SET (VOIDmode,
13476 			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13477 			  gen_rtx_fmt_ee (comparison, QImode,
13478 					  SET_DEST (pat),
13479 					  const0_rtx)));
13480 
13481   return SUBREG_REG (target);
13482 }
13483 
13484 /* Expand an expression EXP that calls a built-in function,
13485    with result going to TARGET if that's convenient
13486    (and in mode MODE if that's convenient).
13487    SUBTARGET may be used as the target for computing one of EXP's operands.
13488    IGNORE is nonzero if the value is to be ignored.  */
13489 
13490 rtx
ix86_expand_builtin(exp,target,subtarget,mode,ignore)13491 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13492      tree exp;
13493      rtx target;
13494      rtx subtarget ATTRIBUTE_UNUSED;
13495      enum machine_mode mode ATTRIBUTE_UNUSED;
13496      int ignore ATTRIBUTE_UNUSED;
13497 {
13498   const struct builtin_description *d;
13499   size_t i;
13500   enum insn_code icode;
13501   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13502   tree arglist = TREE_OPERAND (exp, 1);
13503   tree arg0, arg1, arg2;
13504   rtx op0, op1, op2, pat;
13505   enum machine_mode tmode, mode0, mode1, mode2;
13506   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13507 
13508   switch (fcode)
13509     {
13510     case IX86_BUILTIN_EMMS:
13511       emit_insn (gen_emms ());
13512       return 0;
13513 
13514     case IX86_BUILTIN_SFENCE:
13515       emit_insn (gen_sfence ());
13516       return 0;
13517 
13518     case IX86_BUILTIN_PEXTRW:
13519     case IX86_BUILTIN_PEXTRW128:
13520       icode = (fcode == IX86_BUILTIN_PEXTRW
13521 	       ? CODE_FOR_mmx_pextrw
13522 	       : CODE_FOR_sse2_pextrw);
13523       arg0 = TREE_VALUE (arglist);
13524       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13525       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13526       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13527       tmode = insn_data[icode].operand[0].mode;
13528       mode0 = insn_data[icode].operand[1].mode;
13529       mode1 = insn_data[icode].operand[2].mode;
13530 
13531       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13532 	op0 = copy_to_mode_reg (mode0, op0);
13533       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13534 	{
13535 	  /* @@@ better error message */
13536 	  error ("selector must be an immediate");
13537 	  return gen_reg_rtx (tmode);
13538 	}
13539       if (target == 0
13540 	  || GET_MODE (target) != tmode
13541 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13542 	target = gen_reg_rtx (tmode);
13543       pat = GEN_FCN (icode) (target, op0, op1);
13544       if (! pat)
13545 	return 0;
13546       emit_insn (pat);
13547       return target;
13548 
13549     case IX86_BUILTIN_PINSRW:
13550     case IX86_BUILTIN_PINSRW128:
13551       icode = (fcode == IX86_BUILTIN_PINSRW
13552 	       ? CODE_FOR_mmx_pinsrw
13553 	       : CODE_FOR_sse2_pinsrw);
13554       arg0 = TREE_VALUE (arglist);
13555       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13556       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13557       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13558       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13559       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13560       tmode = insn_data[icode].operand[0].mode;
13561       mode0 = insn_data[icode].operand[1].mode;
13562       mode1 = insn_data[icode].operand[2].mode;
13563       mode2 = insn_data[icode].operand[3].mode;
13564 
13565       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13566 	op0 = copy_to_mode_reg (mode0, op0);
13567       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13568 	op1 = copy_to_mode_reg (mode1, op1);
13569       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13570 	{
13571 	  /* @@@ better error message */
13572 	  error ("selector must be an immediate");
13573 	  return const0_rtx;
13574 	}
13575       if (target == 0
13576 	  || GET_MODE (target) != tmode
13577 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13578 	target = gen_reg_rtx (tmode);
13579       pat = GEN_FCN (icode) (target, op0, op1, op2);
13580       if (! pat)
13581 	return 0;
13582       emit_insn (pat);
13583       return target;
13584 
13585     case IX86_BUILTIN_MASKMOVQ:
13586     case IX86_BUILTIN_MASKMOVDQU:
13587       icode = (fcode == IX86_BUILTIN_MASKMOVQ
13588 	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13589 	       : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13590 		  : CODE_FOR_sse2_maskmovdqu));
13591       /* Note the arg order is different from the operand order.  */
13592       arg1 = TREE_VALUE (arglist);
13593       arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13594       arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13595       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13596       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13597       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13598       mode0 = insn_data[icode].operand[0].mode;
13599       mode1 = insn_data[icode].operand[1].mode;
13600       mode2 = insn_data[icode].operand[2].mode;
13601 
13602       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13603 	op0 = copy_to_mode_reg (mode0, op0);
13604       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13605 	op1 = copy_to_mode_reg (mode1, op1);
13606       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13607 	op2 = copy_to_mode_reg (mode2, op2);
13608       pat = GEN_FCN (icode) (op0, op1, op2);
13609       if (! pat)
13610 	return 0;
13611       emit_insn (pat);
13612       return 0;
13613 
13614     case IX86_BUILTIN_SQRTSS:
13615       return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13616     case IX86_BUILTIN_RSQRTSS:
13617       return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13618     case IX86_BUILTIN_RCPSS:
13619       return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13620 
13621     case IX86_BUILTIN_LOADAPS:
13622       return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13623 
13624     case IX86_BUILTIN_LOADUPS:
13625       return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13626 
13627     case IX86_BUILTIN_STOREAPS:
13628       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13629 
13630     case IX86_BUILTIN_STOREUPS:
13631       return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13632 
13633     case IX86_BUILTIN_LOADSS:
13634       return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13635 
13636     case IX86_BUILTIN_STORESS:
13637       return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13638 
13639     case IX86_BUILTIN_LOADHPS:
13640     case IX86_BUILTIN_LOADLPS:
13641     case IX86_BUILTIN_LOADHPD:
13642     case IX86_BUILTIN_LOADLPD:
13643       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13644 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13645 	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13646 	       : CODE_FOR_sse2_movlpd);
13647       arg0 = TREE_VALUE (arglist);
13648       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13649       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13650       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13651       tmode = insn_data[icode].operand[0].mode;
13652       mode0 = insn_data[icode].operand[1].mode;
13653       mode1 = insn_data[icode].operand[2].mode;
13654 
13655       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13656 	op0 = copy_to_mode_reg (mode0, op0);
13657       op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13658       if (target == 0
13659 	  || GET_MODE (target) != tmode
13660 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13661 	target = gen_reg_rtx (tmode);
13662       pat = GEN_FCN (icode) (target, op0, op1);
13663       if (! pat)
13664 	return 0;
13665       emit_insn (pat);
13666       return target;
13667 
13668     case IX86_BUILTIN_STOREHPS:
13669     case IX86_BUILTIN_STORELPS:
13670     case IX86_BUILTIN_STOREHPD:
13671     case IX86_BUILTIN_STORELPD:
13672       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13673 	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13674 	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13675 	       : CODE_FOR_sse2_movlpd);
13676       arg0 = TREE_VALUE (arglist);
13677       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13678       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13679       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13680       mode0 = insn_data[icode].operand[1].mode;
13681       mode1 = insn_data[icode].operand[2].mode;
13682 
13683       op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13684       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13685 	op1 = copy_to_mode_reg (mode1, op1);
13686 
13687       pat = GEN_FCN (icode) (op0, op0, op1);
13688       if (! pat)
13689 	return 0;
13690       emit_insn (pat);
13691       return 0;
13692 
13693     case IX86_BUILTIN_MOVNTPS:
13694       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13695     case IX86_BUILTIN_MOVNTQ:
13696       return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13697 
13698     case IX86_BUILTIN_LDMXCSR:
13699       op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13700       target = assign_386_stack_local (SImode, 0);
13701       emit_move_insn (target, op0);
13702       emit_insn (gen_ldmxcsr (target));
13703       return 0;
13704 
13705     case IX86_BUILTIN_STMXCSR:
13706       target = assign_386_stack_local (SImode, 0);
13707       emit_insn (gen_stmxcsr (target));
13708       return copy_to_mode_reg (SImode, target);
13709 
13710     case IX86_BUILTIN_SHUFPS:
13711     case IX86_BUILTIN_SHUFPD:
13712       icode = (fcode == IX86_BUILTIN_SHUFPS
13713 	       ? CODE_FOR_sse_shufps
13714 	       : CODE_FOR_sse2_shufpd);
13715       arg0 = TREE_VALUE (arglist);
13716       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13717       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13718       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13719       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13720       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13721       tmode = insn_data[icode].operand[0].mode;
13722       mode0 = insn_data[icode].operand[1].mode;
13723       mode1 = insn_data[icode].operand[2].mode;
13724       mode2 = insn_data[icode].operand[3].mode;
13725 
13726       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13727 	op0 = copy_to_mode_reg (mode0, op0);
13728       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13729 	op1 = copy_to_mode_reg (mode1, op1);
13730       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13731 	{
13732 	  /* @@@ better error message */
13733 	  error ("mask must be an immediate");
13734 	  return gen_reg_rtx (tmode);
13735 	}
13736       if (target == 0
13737 	  || GET_MODE (target) != tmode
13738 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13739 	target = gen_reg_rtx (tmode);
13740       pat = GEN_FCN (icode) (target, op0, op1, op2);
13741       if (! pat)
13742 	return 0;
13743       emit_insn (pat);
13744       return target;
13745 
13746     case IX86_BUILTIN_PSHUFW:
13747     case IX86_BUILTIN_PSHUFD:
13748     case IX86_BUILTIN_PSHUFHW:
13749     case IX86_BUILTIN_PSHUFLW:
13750       icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13751 	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13752 	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13753 	       : CODE_FOR_mmx_pshufw);
13754       arg0 = TREE_VALUE (arglist);
13755       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13756       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13757       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13758       tmode = insn_data[icode].operand[0].mode;
13759       mode1 = insn_data[icode].operand[1].mode;
13760       mode2 = insn_data[icode].operand[2].mode;
13761 
13762       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13763 	op0 = copy_to_mode_reg (mode1, op0);
13764       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13765 	{
13766 	  /* @@@ better error message */
13767 	  error ("mask must be an immediate");
13768 	  return const0_rtx;
13769 	}
13770       if (target == 0
13771 	  || GET_MODE (target) != tmode
13772 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13773 	target = gen_reg_rtx (tmode);
13774       pat = GEN_FCN (icode) (target, op0, op1);
13775       if (! pat)
13776 	return 0;
13777       emit_insn (pat);
13778       return target;
13779 
13780     case IX86_BUILTIN_PSLLDQI128:
13781     case IX86_BUILTIN_PSRLDQI128:
13782       icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13783 	       : CODE_FOR_sse2_lshrti3);
13784       arg0 = TREE_VALUE (arglist);
13785       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13786       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13787       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13788       tmode = insn_data[icode].operand[0].mode;
13789       mode1 = insn_data[icode].operand[1].mode;
13790       mode2 = insn_data[icode].operand[2].mode;
13791 
13792       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13793 	{
13794 	  op0 = copy_to_reg (op0);
13795 	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13796 	}
13797       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13798 	{
13799 	  error ("shift must be an immediate");
13800 	  return const0_rtx;
13801 	}
13802       target = gen_reg_rtx (V2DImode);
13803       pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13804       if (! pat)
13805 	return 0;
13806       emit_insn (pat);
13807       return target;
13808 
13809     case IX86_BUILTIN_FEMMS:
13810       emit_insn (gen_femms ());
13811       return NULL_RTX;
13812 
13813     case IX86_BUILTIN_PAVGUSB:
13814       return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13815 
13816     case IX86_BUILTIN_PF2ID:
13817       return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13818 
13819     case IX86_BUILTIN_PFACC:
13820       return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13821 
13822     case IX86_BUILTIN_PFADD:
13823      return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13824 
13825     case IX86_BUILTIN_PFCMPEQ:
13826       return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13827 
13828     case IX86_BUILTIN_PFCMPGE:
13829       return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13830 
13831     case IX86_BUILTIN_PFCMPGT:
13832       return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13833 
13834     case IX86_BUILTIN_PFMAX:
13835       return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13836 
13837     case IX86_BUILTIN_PFMIN:
13838       return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13839 
13840     case IX86_BUILTIN_PFMUL:
13841       return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13842 
13843     case IX86_BUILTIN_PFRCP:
13844       return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13845 
13846     case IX86_BUILTIN_PFRCPIT1:
13847       return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13848 
13849     case IX86_BUILTIN_PFRCPIT2:
13850       return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13851 
13852     case IX86_BUILTIN_PFRSQIT1:
13853       return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13854 
13855     case IX86_BUILTIN_PFRSQRT:
13856       return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13857 
13858     case IX86_BUILTIN_PFSUB:
13859       return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13860 
13861     case IX86_BUILTIN_PFSUBR:
13862       return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13863 
13864     case IX86_BUILTIN_PI2FD:
13865       return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13866 
13867     case IX86_BUILTIN_PMULHRW:
13868       return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13869 
13870     case IX86_BUILTIN_PF2IW:
13871       return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13872 
13873     case IX86_BUILTIN_PFNACC:
13874       return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13875 
13876     case IX86_BUILTIN_PFPNACC:
13877       return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13878 
13879     case IX86_BUILTIN_PI2FW:
13880       return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13881 
13882     case IX86_BUILTIN_PSWAPDSI:
13883       return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13884 
13885     case IX86_BUILTIN_PSWAPDSF:
13886       return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13887 
13888     case IX86_BUILTIN_SSE_ZERO:
13889       target = gen_reg_rtx (V4SFmode);
13890       emit_insn (gen_sse_clrv4sf (target));
13891       return target;
13892 
13893     case IX86_BUILTIN_MMX_ZERO:
13894       target = gen_reg_rtx (DImode);
13895       emit_insn (gen_mmx_clrdi (target));
13896       return target;
13897 
13898     case IX86_BUILTIN_CLRTI:
13899       target = gen_reg_rtx (V2DImode);
13900       emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13901       return target;
13902 
13903 
13904     case IX86_BUILTIN_SQRTSD:
13905       return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13906     case IX86_BUILTIN_LOADAPD:
13907       return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13908     case IX86_BUILTIN_LOADUPD:
13909       return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13910 
13911     case IX86_BUILTIN_STOREAPD:
13912       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13913     case IX86_BUILTIN_STOREUPD:
13914       return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13915 
13916     case IX86_BUILTIN_LOADSD:
13917       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13918 
13919     case IX86_BUILTIN_STORESD:
13920       return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13921 
13922     case IX86_BUILTIN_SETPD1:
13923       target = assign_386_stack_local (DFmode, 0);
13924       arg0 = TREE_VALUE (arglist);
13925       emit_move_insn (adjust_address (target, DFmode, 0),
13926 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13927       op0 = gen_reg_rtx (V2DFmode);
13928       emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13929       emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13930       return op0;
13931 
13932     case IX86_BUILTIN_SETPD:
13933       target = assign_386_stack_local (V2DFmode, 0);
13934       arg0 = TREE_VALUE (arglist);
13935       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13936       emit_move_insn (adjust_address (target, DFmode, 0),
13937 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13938       emit_move_insn (adjust_address (target, DFmode, 8),
13939 		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13940       op0 = gen_reg_rtx (V2DFmode);
13941       emit_insn (gen_sse2_movapd (op0, target));
13942       return op0;
13943 
13944     case IX86_BUILTIN_LOADRPD:
13945       target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13946 					 gen_reg_rtx (V2DFmode), 1);
13947       emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13948       return target;
13949 
13950     case IX86_BUILTIN_LOADPD1:
13951       target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13952 					 gen_reg_rtx (V2DFmode), 1);
13953       emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13954       return target;
13955 
13956     case IX86_BUILTIN_STOREPD1:
13957       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13958     case IX86_BUILTIN_STORERPD:
13959       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13960 
13961     case IX86_BUILTIN_CLRPD:
13962       target = gen_reg_rtx (V2DFmode);
13963       emit_insn (gen_sse_clrv2df (target));
13964       return target;
13965 
13966     case IX86_BUILTIN_MFENCE:
13967 	emit_insn (gen_sse2_mfence ());
13968 	return 0;
13969     case IX86_BUILTIN_LFENCE:
13970 	emit_insn (gen_sse2_lfence ());
13971 	return 0;
13972 
13973     case IX86_BUILTIN_CLFLUSH:
13974 	arg0 = TREE_VALUE (arglist);
13975 	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13976 	icode = CODE_FOR_sse2_clflush;
13977 	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13978 	    op0 = copy_to_mode_reg (Pmode, op0);
13979 
13980 	emit_insn (gen_sse2_clflush (op0));
13981 	return 0;
13982 
13983     case IX86_BUILTIN_MOVNTPD:
13984       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13985     case IX86_BUILTIN_MOVNTDQ:
13986       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13987     case IX86_BUILTIN_MOVNTI:
13988       return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13989 
13990     case IX86_BUILTIN_LOADDQA:
13991       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13992     case IX86_BUILTIN_LOADDQU:
13993       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13994     case IX86_BUILTIN_LOADD:
13995       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13996 
13997     case IX86_BUILTIN_STOREDQA:
13998       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13999     case IX86_BUILTIN_STOREDQU:
14000       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14001     case IX86_BUILTIN_STORED:
14002       return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14003 
14004     case IX86_BUILTIN_MONITOR:
14005       arg0 = TREE_VALUE (arglist);
14006       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14007       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14008       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14009       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14010       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14011       if (!REG_P (op0))
14012 	op0 = copy_to_mode_reg (SImode, op0);
14013       if (!REG_P (op1))
14014 	op1 = copy_to_mode_reg (SImode, op1);
14015       if (!REG_P (op2))
14016 	op2 = copy_to_mode_reg (SImode, op2);
14017       emit_insn (gen_monitor (op0, op1, op2));
14018       return 0;
14019 
14020     case IX86_BUILTIN_MWAIT:
14021       arg0 = TREE_VALUE (arglist);
14022       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14023       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14024       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14025       if (!REG_P (op0))
14026 	op0 = copy_to_mode_reg (SImode, op0);
14027       if (!REG_P (op1))
14028 	op1 = copy_to_mode_reg (SImode, op1);
14029       emit_insn (gen_mwait (op0, op1));
14030       return 0;
14031 
14032     case IX86_BUILTIN_LOADDDUP:
14033       return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14034 
14035     case IX86_BUILTIN_LDDQU:
14036       return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14037 				       1);
14038 
14039     default:
14040       break;
14041     }
14042 
14043   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14044     if (d->code == fcode)
14045       {
14046 	/* Compares are treated specially.  */
14047 	if (d->icode == CODE_FOR_maskcmpv4sf3
14048 	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
14049 	    || d->icode == CODE_FOR_maskncmpv4sf3
14050 	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
14051 	    || d->icode == CODE_FOR_maskcmpv2df3
14052 	    || d->icode == CODE_FOR_vmmaskcmpv2df3
14053 	    || d->icode == CODE_FOR_maskncmpv2df3
14054 	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
14055 	  return ix86_expand_sse_compare (d, arglist, target);
14056 
14057 	return ix86_expand_binop_builtin (d->icode, arglist, target);
14058       }
14059 
14060   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14061     if (d->code == fcode)
14062       return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14063 
14064   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14065     if (d->code == fcode)
14066       return ix86_expand_sse_comi (d, arglist, target);
14067 
14068   /* @@@ Should really do something sensible here.  */
14069   return 0;
14070 }
14071 
14072 /* Store OPERAND to the memory after reload is completed.  This means
14073    that we can't easily use assign_stack_local.  */
14074 rtx
ix86_force_to_memory(mode,operand)14075 ix86_force_to_memory (mode, operand)
14076      enum machine_mode mode;
14077      rtx operand;
14078 {
14079   rtx result;
14080   if (!reload_completed)
14081     abort ();
14082   if (TARGET_64BIT && TARGET_RED_ZONE)
14083     {
14084       result = gen_rtx_MEM (mode,
14085 			    gen_rtx_PLUS (Pmode,
14086 					  stack_pointer_rtx,
14087 					  GEN_INT (-RED_ZONE_SIZE)));
14088       emit_move_insn (result, operand);
14089     }
14090   else if (TARGET_64BIT && !TARGET_RED_ZONE)
14091     {
14092       switch (mode)
14093 	{
14094 	case HImode:
14095 	case SImode:
14096 	  operand = gen_lowpart (DImode, operand);
14097 	  /* FALLTHRU */
14098 	case DImode:
14099 	  emit_insn (
14100 		      gen_rtx_SET (VOIDmode,
14101 				   gen_rtx_MEM (DImode,
14102 						gen_rtx_PRE_DEC (DImode,
14103 							stack_pointer_rtx)),
14104 				   operand));
14105 	  break;
14106 	default:
14107 	  abort ();
14108 	}
14109       result = gen_rtx_MEM (mode, stack_pointer_rtx);
14110     }
14111   else
14112     {
14113       switch (mode)
14114 	{
14115 	case DImode:
14116 	  {
14117 	    rtx operands[2];
14118 	    split_di (&operand, 1, operands, operands + 1);
14119 	    emit_insn (
14120 			gen_rtx_SET (VOIDmode,
14121 				     gen_rtx_MEM (SImode,
14122 						  gen_rtx_PRE_DEC (Pmode,
14123 							stack_pointer_rtx)),
14124 				     operands[1]));
14125 	    emit_insn (
14126 			gen_rtx_SET (VOIDmode,
14127 				     gen_rtx_MEM (SImode,
14128 						  gen_rtx_PRE_DEC (Pmode,
14129 							stack_pointer_rtx)),
14130 				     operands[0]));
14131 	  }
14132 	  break;
14133 	case HImode:
14134 	  /* It is better to store HImodes as SImodes.  */
14135 	  if (!TARGET_PARTIAL_REG_STALL)
14136 	    operand = gen_lowpart (SImode, operand);
14137 	  /* FALLTHRU */
14138 	case SImode:
14139 	  emit_insn (
14140 		      gen_rtx_SET (VOIDmode,
14141 				   gen_rtx_MEM (GET_MODE (operand),
14142 						gen_rtx_PRE_DEC (SImode,
14143 							stack_pointer_rtx)),
14144 				   operand));
14145 	  break;
14146 	default:
14147 	  abort ();
14148 	}
14149       result = gen_rtx_MEM (mode, stack_pointer_rtx);
14150     }
14151   return result;
14152 }
14153 
14154 /* Free operand from the memory.  */
14155 void
ix86_free_from_memory(mode)14156 ix86_free_from_memory (mode)
14157      enum machine_mode mode;
14158 {
14159   if (!TARGET_64BIT || !TARGET_RED_ZONE)
14160     {
14161       int size;
14162 
14163       if (mode == DImode || TARGET_64BIT)
14164 	size = 8;
14165       else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14166 	size = 2;
14167       else
14168 	size = 4;
14169       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
14170          to pop or add instruction if registers are available.  */
14171       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14172 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14173 					    GEN_INT (size))));
14174     }
14175 }
14176 
14177 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14178    QImode must go into class Q_REGS.
14179    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
14180    movdf to do mem-to-mem moves through integer regs.  */
14181 enum reg_class
ix86_preferred_reload_class(x,class)14182 ix86_preferred_reload_class (x, class)
14183      rtx x;
14184      enum reg_class class;
14185 {
14186   if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14187     return NO_REGS;
14188   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14189     {
14190       /* SSE can't load any constant directly yet.  */
14191       if (SSE_CLASS_P (class))
14192 	return NO_REGS;
14193       /* Floats can load 0 and 1.  */
14194       if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14195 	{
14196 	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
14197 	  if (MAYBE_SSE_CLASS_P (class))
14198 	    return (reg_class_subset_p (class, GENERAL_REGS)
14199 		    ? GENERAL_REGS : FLOAT_REGS);
14200 	  else
14201 	    return class;
14202 	}
14203       /* General regs can load everything.  */
14204       if (reg_class_subset_p (class, GENERAL_REGS))
14205 	return GENERAL_REGS;
14206       /* In case we haven't resolved FLOAT or SSE yet, give up.  */
14207       if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14208 	return NO_REGS;
14209     }
14210   if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14211     return NO_REGS;
14212   if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14213     return Q_REGS;
14214   return class;
14215 }
14216 
14217 /* If we are copying between general and FP registers, we need a memory
14218    location. The same is true for SSE and MMX registers.
14219 
14220    The macro can't work reliably when one of the CLASSES is class containing
14221    registers from multiple units (SSE, MMX, integer).  We avoid this by never
14222    combining those units in single alternative in the machine description.
14223    Ensure that this constraint holds to avoid unexpected surprises.
14224 
14225    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14226    enforce these sanity checks.  */
14227 int
ix86_secondary_memory_needed(class1,class2,mode,strict)14228 ix86_secondary_memory_needed (class1, class2, mode, strict)
14229      enum reg_class class1, class2;
14230      enum machine_mode mode;
14231      int strict;
14232 {
14233   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14234       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14235       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14236       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14237       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14238       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14239     {
14240       if (strict)
14241 	abort ();
14242       else
14243 	return 1;
14244     }
14245   return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14246 	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14247 	      && (mode) != SImode)
14248 	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14249 	      && (mode) != SImode));
14250 }
14251 /* Return the cost of moving data from a register in class CLASS1 to
14252    one in class CLASS2.
14253 
14254    It is not required that the cost always equal 2 when FROM is the same as TO;
14255    on some machines it is expensive to move between registers if they are not
14256    general registers.  */
14257 int
ix86_register_move_cost(mode,class1,class2)14258 ix86_register_move_cost (mode, class1, class2)
14259      enum machine_mode mode;
14260      enum reg_class class1, class2;
14261 {
14262   /* In case we require secondary memory, compute cost of the store followed
14263      by load.  In order to avoid bad register allocation choices, we need
14264      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
14265 
14266   if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14267     {
14268       int cost = 1;
14269 
14270       cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14271 		   MEMORY_MOVE_COST (mode, class1, 1));
14272       cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14273 		   MEMORY_MOVE_COST (mode, class2, 1));
14274 
14275       /* In case of copying from general_purpose_register we may emit multiple
14276          stores followed by single load causing memory size mismatch stall.
14277          Count this as arbitarily high cost of 20.  */
14278       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14279 	cost += 20;
14280 
14281       /* In the case of FP/MMX moves, the registers actually overlap, and we
14282 	 have to switch modes in order to treat them differently.  */
14283       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14284           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14285 	cost += 20;
14286 
14287       return cost;
14288     }
14289 
14290   /* Moves between SSE/MMX and integer unit are expensive.  */
14291   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14292       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14293     return ix86_cost->mmxsse_to_integer;
14294   if (MAYBE_FLOAT_CLASS_P (class1))
14295     return ix86_cost->fp_move;
14296   if (MAYBE_SSE_CLASS_P (class1))
14297     return ix86_cost->sse_move;
14298   if (MAYBE_MMX_CLASS_P (class1))
14299     return ix86_cost->mmx_move;
14300   return 2;
14301 }
14302 
14303 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
14304 int
ix86_hard_regno_mode_ok(regno,mode)14305 ix86_hard_regno_mode_ok (regno, mode)
14306      int regno;
14307      enum machine_mode mode;
14308 {
14309   /* Flags and only flags can only hold CCmode values.  */
14310   if (CC_REGNO_P (regno))
14311     return GET_MODE_CLASS (mode) == MODE_CC;
14312   if (GET_MODE_CLASS (mode) == MODE_CC
14313       || GET_MODE_CLASS (mode) == MODE_RANDOM
14314       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14315     return 0;
14316   if (FP_REGNO_P (regno))
14317     return VALID_FP_MODE_P (mode);
14318   if (SSE_REGNO_P (regno))
14319     return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14320   if (MMX_REGNO_P (regno))
14321     return (TARGET_MMX
14322 	    ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14323   /* We handle both integer and floats in the general purpose registers.
14324      In future we should be able to handle vector modes as well.  */
14325   if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14326     return 0;
14327   /* Take care for QImode values - they can be in non-QI regs, but then
14328      they do cause partial register stalls.  */
14329   if (regno < 4 || mode != QImode || TARGET_64BIT)
14330     return 1;
14331   return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14332 }
14333 
14334 /* Return the cost of moving data of mode M between a
14335    register and memory.  A value of 2 is the default; this cost is
14336    relative to those in `REGISTER_MOVE_COST'.
14337 
14338    If moving between registers and memory is more expensive than
14339    between two registers, you should define this macro to express the
14340    relative cost.
14341 
14342    Model also increased moving costs of QImode registers in non
14343    Q_REGS classes.
14344  */
14345 int
ix86_memory_move_cost(mode,class,in)14346 ix86_memory_move_cost (mode, class, in)
14347      enum machine_mode mode;
14348      enum reg_class class;
14349      int in;
14350 {
14351   if (FLOAT_CLASS_P (class))
14352     {
14353       int index;
14354       switch (mode)
14355 	{
14356 	  case SFmode:
14357 	    index = 0;
14358 	    break;
14359 	  case DFmode:
14360 	    index = 1;
14361 	    break;
14362 	  case XFmode:
14363 	  case TFmode:
14364 	    index = 2;
14365 	    break;
14366 	  default:
14367 	    return 100;
14368 	}
14369       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14370     }
14371   if (SSE_CLASS_P (class))
14372     {
14373       int index;
14374       switch (GET_MODE_SIZE (mode))
14375 	{
14376 	  case 4:
14377 	    index = 0;
14378 	    break;
14379 	  case 8:
14380 	    index = 1;
14381 	    break;
14382 	  case 16:
14383 	    index = 2;
14384 	    break;
14385 	  default:
14386 	    return 100;
14387 	}
14388       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14389     }
14390   if (MMX_CLASS_P (class))
14391     {
14392       int index;
14393       switch (GET_MODE_SIZE (mode))
14394 	{
14395 	  case 4:
14396 	    index = 0;
14397 	    break;
14398 	  case 8:
14399 	    index = 1;
14400 	    break;
14401 	  default:
14402 	    return 100;
14403 	}
14404       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14405     }
14406   switch (GET_MODE_SIZE (mode))
14407     {
14408       case 1:
14409 	if (in)
14410 	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14411 		  : ix86_cost->movzbl_load);
14412 	else
14413 	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14414 		  : ix86_cost->int_store[0] + 4);
14415 	break;
14416       case 2:
14417 	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14418       default:
14419 	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
14420 	if (mode == TFmode)
14421 	  mode = XFmode;
14422 	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14423 		* ((int) GET_MODE_SIZE (mode)
14424 		   + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14425     }
14426 }
14427 
14428 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14429 static void
ix86_svr3_asm_out_constructor(symbol,priority)14430 ix86_svr3_asm_out_constructor (symbol, priority)
14431      rtx symbol;
14432      int priority ATTRIBUTE_UNUSED;
14433 {
14434   init_section ();
14435   fputs ("\tpushl $", asm_out_file);
14436   assemble_name (asm_out_file, XSTR (symbol, 0));
14437   fputc ('\n', asm_out_file);
14438 }
14439 #endif
14440 
14441 #if TARGET_MACHO
14442 
14443 static int current_machopic_label_num;
14444 
14445 /* Given a symbol name and its associated stub, write out the
14446    definition of the stub.  */
14447 
14448 void
machopic_output_stub(file,symb,stub)14449 machopic_output_stub (file, symb, stub)
14450      FILE *file;
14451      const char *symb, *stub;
14452 {
14453   unsigned int length;
14454   char *binder_name, *symbol_name, lazy_ptr_name[32];
14455   int label = ++current_machopic_label_num;
14456 
14457   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
14458   symb = (*targetm.strip_name_encoding) (symb);
14459 
14460   length = strlen (stub);
14461   binder_name = alloca (length + 32);
14462   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14463 
14464   length = strlen (symb);
14465   symbol_name = alloca (length + 32);
14466   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14467 
14468   sprintf (lazy_ptr_name, "L%d$lz", label);
14469 
14470   if (MACHOPIC_PURE)
14471     machopic_picsymbol_stub_section ();
14472   else
14473     machopic_symbol_stub_section ();
14474 
14475   fprintf (file, "%s:\n", stub);
14476   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14477 
14478   if (MACHOPIC_PURE)
14479     {
14480       fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14481       fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14482       fprintf (file, "\tjmp %%edx\n");
14483     }
14484   else
14485     fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14486 
14487   fprintf (file, "%s:\n", binder_name);
14488 
14489   if (MACHOPIC_PURE)
14490     {
14491       fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14492       fprintf (file, "\tpushl %%eax\n");
14493     }
14494   else
14495     fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14496 
14497   fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14498 
14499   machopic_lazy_symbol_ptr_section ();
14500   fprintf (file, "%s:\n", lazy_ptr_name);
14501   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14502   fprintf (file, "\t.long %s\n", binder_name);
14503 }
14504 #endif /* TARGET_MACHO */
14505 
14506 /* Order the registers for register allocator.  */
14507 
14508 void
x86_order_regs_for_local_alloc()14509 x86_order_regs_for_local_alloc ()
14510 {
14511    int pos = 0;
14512    int i;
14513 
14514    /* First allocate the local general purpose registers.  */
14515    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14516      if (GENERAL_REGNO_P (i) && call_used_regs[i])
14517 	reg_alloc_order [pos++] = i;
14518 
14519    /* Global general purpose registers.  */
14520    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14521      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14522 	reg_alloc_order [pos++] = i;
14523 
14524    /* x87 registers come first in case we are doing FP math
14525       using them.  */
14526    if (!TARGET_SSE_MATH)
14527      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14528        reg_alloc_order [pos++] = i;
14529 
14530    /* SSE registers.  */
14531    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14532      reg_alloc_order [pos++] = i;
14533    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14534      reg_alloc_order [pos++] = i;
14535 
14536    /* x87 registerts.  */
14537    if (TARGET_SSE_MATH)
14538      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14539        reg_alloc_order [pos++] = i;
14540 
14541    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14542      reg_alloc_order [pos++] = i;
14543 
14544    /* Initialize the rest of array as we do not allocate some registers
14545       at all.  */
14546    while (pos < FIRST_PSEUDO_REGISTER)
14547      reg_alloc_order [pos++] = 0;
14548 }
14549 
14550 /* Returns an expression indicating where the this parameter is
14551    located on entry to the FUNCTION.  */
14552 
14553 static rtx
x86_this_parameter(function)14554 x86_this_parameter (function)
14555      tree function;
14556 {
14557   tree type = TREE_TYPE (function);
14558 
14559   if (TARGET_64BIT)
14560     {
14561       int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14562       return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14563     }
14564 
14565   if (ix86_fntype_regparm (type) > 0)
14566     {
14567       tree parm;
14568 
14569       parm = TYPE_ARG_TYPES (type);
14570       /* Figure out whether or not the function has a variable number of
14571 	 arguments.  */
14572       for (; parm; parm = TREE_CHAIN (parm))
14573 	if (TREE_VALUE (parm) == void_type_node)
14574 	  break;
14575       /* If not, the this parameter is in %eax.  */
14576       if (parm)
14577 	return gen_rtx_REG (SImode, 0);
14578     }
14579 
14580   if (aggregate_value_p (TREE_TYPE (type)))
14581     return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14582   else
14583     return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14584 }
14585 
14586 /* Determine whether x86_output_mi_thunk can succeed.  */
14587 
14588 static bool
x86_can_output_mi_thunk(thunk,delta,vcall_offset,function)14589 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14590      tree thunk ATTRIBUTE_UNUSED;
14591      HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14592      HOST_WIDE_INT vcall_offset;
14593      tree function;
14594 {
14595   /* 64-bit can handle anything.  */
14596   if (TARGET_64BIT)
14597     return true;
14598 
14599   /* For 32-bit, everything's fine if we have one free register.  */
14600   if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14601     return true;
14602 
14603   /* Need a free register for vcall_offset.  */
14604   if (vcall_offset)
14605     return false;
14606 
14607   /* Need a free register for GOT references.  */
14608   if (flag_pic && !(*targetm.binds_local_p) (function))
14609     return false;
14610 
14611   /* Otherwise ok.  */
14612   return true;
14613 }
14614 
14615 /* Output the assembler code for a thunk function.  THUNK_DECL is the
14616    declaration for the thunk function itself, FUNCTION is the decl for
14617    the target function.  DELTA is an immediate constant offset to be
14618    added to THIS.  If VCALL_OFFSET is non-zero, the word at
14619    *(*this + vcall_offset) should be added to THIS.  */
14620 
14621 static void
x86_output_mi_thunk(file,thunk,delta,vcall_offset,function)14622 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14623      FILE *file ATTRIBUTE_UNUSED;
14624      tree thunk ATTRIBUTE_UNUSED;
14625      HOST_WIDE_INT delta;
14626      HOST_WIDE_INT vcall_offset;
14627      tree function;
14628 {
14629   rtx xops[3];
14630   rtx this = x86_this_parameter (function);
14631   rtx this_reg, tmp;
14632 
14633   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
14634      pull it in now and let DELTA benefit.  */
14635   if (REG_P (this))
14636     this_reg = this;
14637   else if (vcall_offset)
14638     {
14639       /* Put the this parameter into %eax.  */
14640       xops[0] = this;
14641       xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14642       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14643     }
14644   else
14645     this_reg = NULL_RTX;
14646 
14647   /* Adjust the this parameter by a fixed constant.  */
14648   if (delta)
14649     {
14650       xops[0] = GEN_INT (delta);
14651       xops[1] = this_reg ? this_reg : this;
14652       if (TARGET_64BIT)
14653 	{
14654 	  if (!x86_64_general_operand (xops[0], DImode))
14655 	    {
14656 	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14657 	      xops[1] = tmp;
14658 	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14659 	      xops[0] = tmp;
14660 	      xops[1] = this;
14661 	    }
14662 	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14663 	}
14664       else
14665 	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14666     }
14667 
14668   /* Adjust the this parameter by a value stored in the vtable.  */
14669   if (vcall_offset)
14670     {
14671       if (TARGET_64BIT)
14672 	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14673       else
14674 	tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14675 
14676       xops[0] = gen_rtx_MEM (Pmode, this_reg);
14677       xops[1] = tmp;
14678       if (TARGET_64BIT)
14679 	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14680       else
14681 	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14682 
14683       /* Adjust the this parameter.  */
14684       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14685       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14686 	{
14687 	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14688 	  xops[0] = GEN_INT (vcall_offset);
14689 	  xops[1] = tmp2;
14690 	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14691 	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14692 	}
14693       xops[1] = this_reg;
14694       if (TARGET_64BIT)
14695 	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14696       else
14697 	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14698     }
14699 
14700   /* If necessary, drop THIS back to its stack slot.  */
14701   if (this_reg && this_reg != this)
14702     {
14703       xops[0] = this_reg;
14704       xops[1] = this;
14705       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14706     }
14707 
14708   xops[0] = DECL_RTL (function);
14709   if (TARGET_64BIT)
14710     {
14711       if (!flag_pic || (*targetm.binds_local_p) (function))
14712 	output_asm_insn ("jmp\t%P0", xops);
14713       else
14714 	{
14715 	  tmp = XEXP (xops[0], 0);
14716 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14717 	  tmp = gen_rtx_CONST (Pmode, tmp);
14718 	  tmp = gen_rtx_MEM (QImode, tmp);
14719 	  xops[0] = tmp;
14720 	  output_asm_insn ("jmp\t%A0", xops);
14721 	}
14722     }
14723   else
14724     {
14725       if (!flag_pic || (*targetm.binds_local_p) (function))
14726 	output_asm_insn ("jmp\t%P0", xops);
14727       else
14728 	{
14729 	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14730 	  output_set_got (tmp);
14731 
14732 	  xops[1] = tmp;
14733 	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14734 	  output_asm_insn ("jmp\t{*}%1", xops);
14735 	}
14736     }
14737 }
14738 
14739 int
x86_field_alignment(field,computed)14740 x86_field_alignment (field, computed)
14741      tree field;
14742      int computed;
14743 {
14744   enum machine_mode mode;
14745   tree type = TREE_TYPE (field);
14746 
14747   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14748     return computed;
14749   mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14750 		    ? get_inner_array_type (type) : type);
14751   if (mode == DFmode || mode == DCmode
14752       || GET_MODE_CLASS (mode) == MODE_INT
14753       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14754     return MIN (32, computed);
14755   return computed;
14756 }
14757 
14758 /* Output assembler code to FILE to increment profiler label # LABELNO
14759    for profiling a function entry.  */
14760 void
x86_function_profiler(file,labelno)14761 x86_function_profiler (file, labelno)
14762      FILE *file;
14763      int labelno;
14764 {
14765   if (TARGET_64BIT)
14766     if (flag_pic)
14767       {
14768 #ifndef NO_PROFILE_COUNTERS
14769 	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14770 #endif
14771 	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14772       }
14773     else
14774       {
14775 #ifndef NO_PROFILE_COUNTERS
14776 	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14777 #endif
14778 	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14779       }
14780   else if (flag_pic)
14781     {
14782 #ifndef NO_PROFILE_COUNTERS
14783       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14784 	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14785 #endif
14786       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14787     }
14788   else
14789     {
14790 #ifndef NO_PROFILE_COUNTERS
14791       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14792 	       PROFILE_COUNT_REGISTER);
14793 #endif
14794       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14795     }
14796 }
14797 
14798 /* Implement machine specific optimizations.
14799    At the moment we implement single transformation: AMD Athlon works faster
14800    when RET is not destination of conditional jump or directly preceeded
14801    by other jump instruction.  We avoid the penalty by inserting NOP just
14802    before the RET instructions in such cases.  */
14803 void
x86_machine_dependent_reorg(first)14804 x86_machine_dependent_reorg (first)
14805      rtx first ATTRIBUTE_UNUSED;
14806 {
14807   edge e;
14808 
14809   if (!TARGET_ATHLON || !optimize || optimize_size)
14810     return;
14811   for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14812   {
14813     basic_block bb = e->src;
14814     rtx ret = bb->end;
14815     rtx prev;
14816     bool insert = false;
14817 
14818     if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14819       continue;
14820     prev = prev_nonnote_insn (ret);
14821     if (prev && GET_CODE (prev) == CODE_LABEL)
14822       {
14823 	edge e;
14824 	for (e = bb->pred; e; e = e->pred_next)
14825 	  if (EDGE_FREQUENCY (e) && e->src->index > 0
14826 	      && !(e->flags & EDGE_FALLTHRU))
14827 	    insert = 1;
14828       }
14829     if (!insert)
14830       {
14831 	prev = prev_real_insn (ret);
14832 	if (prev && GET_CODE (prev) == JUMP_INSN
14833 	    && any_condjump_p (prev))
14834 	  insert = 1;
14835       }
14836     if (insert)
14837       emit_insn_before (gen_nop (), ret);
14838   }
14839 }
14840 
14841 /* Return if we do not know how to pass TYPE solely in registers.  */
14842 bool
ix86_must_pass_in_stack(mode,type)14843 ix86_must_pass_in_stack (mode, type)
14844 	enum machine_mode mode;
14845 	tree type;
14846 {
14847    if (default_must_pass_in_stack (mode, type))
14848      return true;
14849    return (!TARGET_64BIT && type && mode == TImode);
14850 }
14851 
14852 #include "gt-i386.h"
14853