1 /* Subroutines used for code generation on IA-32.
2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
53 
54 /* Return index of given mode in mult and division cost tables.  */
55 #define MODE_INDEX(mode)					\
56   ((mode) == QImode ? 0						\
57    : (mode) == HImode ? 1					\
58    : (mode) == SImode ? 2					\
59    : (mode) == DImode ? 3					\
60    : 4)
61 
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = {	/* costs for tunning for size */
65   2,					/* cost of an add instruction */
66   3,					/* cost of a lea instruction */
67   2,					/* variable shift costs */
68   3,					/* constant shift costs */
69   {3, 3, 3, 3, 5},			/* cost of starting a multiply */
70   0,					/* cost of multiply per each bit set */
71   {3, 3, 3, 3, 5},			/* cost of a divide/mod */
72   3,					/* cost of movsx */
73   3,					/* cost of movzx */
74   0,					/* "large" insn */
75   2,					/* MOVE_RATIO */
76   2,					/* cost for loading QImode using movzbl */
77   {2, 2, 2},				/* cost of loading integer registers
78 					   in QImode, HImode and SImode.
79 					   Relative to reg-reg move (2).  */
80   {2, 2, 2},				/* cost of storing integer registers */
81   2,					/* cost of reg,reg fld/fst */
82   {2, 2, 2},				/* cost of loading fp registers
83 					   in SFmode, DFmode and XFmode */
84   {2, 2, 2},				/* cost of loading integer registers */
85   3,					/* cost of moving MMX register */
86   {3, 3},				/* cost of loading MMX registers
87 					   in SImode and DImode */
88   {3, 3},				/* cost of storing MMX registers
89 					   in SImode and DImode */
90   3,					/* cost of moving SSE register */
91   {3, 3, 3},				/* cost of loading SSE registers
92 					   in SImode, DImode and TImode */
93   {3, 3, 3},				/* cost of storing SSE registers
94 					   in SImode, DImode and TImode */
95   3,					/* MMX or SSE register to integer */
96   0,					/* size of prefetch block */
97   0,					/* number of parallel prefetches */
98   1,					/* Branch cost */
99   2,					/* cost of FADD and FSUB insns.  */
100   2,					/* cost of FMUL instruction.  */
101   2,					/* cost of FDIV instruction.  */
102   2,					/* cost of FABS instruction.  */
103   2,					/* cost of FCHS instruction.  */
104   2,					/* cost of FSQRT instruction.  */
105 };
106 
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = {	/* 386 specific costs */
110   1,					/* cost of an add instruction */
111   1,					/* cost of a lea instruction */
112   3,					/* variable shift costs */
113   2,					/* constant shift costs */
114   {6, 6, 6, 6, 6},			/* cost of starting a multiply */
115   1,					/* cost of multiply per each bit set */
116   {23, 23, 23, 23, 23},			/* cost of a divide/mod */
117   3,					/* cost of movsx */
118   2,					/* cost of movzx */
119   15,					/* "large" insn */
120   3,					/* MOVE_RATIO */
121   4,					/* cost for loading QImode using movzbl */
122   {2, 4, 2},				/* cost of loading integer registers
123 					   in QImode, HImode and SImode.
124 					   Relative to reg-reg move (2).  */
125   {2, 4, 2},				/* cost of storing integer registers */
126   2,					/* cost of reg,reg fld/fst */
127   {8, 8, 8},				/* cost of loading fp registers
128 					   in SFmode, DFmode and XFmode */
129   {8, 8, 8},				/* cost of loading integer registers */
130   2,					/* cost of moving MMX register */
131   {4, 8},				/* cost of loading MMX registers
132 					   in SImode and DImode */
133   {4, 8},				/* cost of storing MMX registers
134 					   in SImode and DImode */
135   2,					/* cost of moving SSE register */
136   {4, 8, 16},				/* cost of loading SSE registers
137 					   in SImode, DImode and TImode */
138   {4, 8, 16},				/* cost of storing SSE registers
139 					   in SImode, DImode and TImode */
140   3,					/* MMX or SSE register to integer */
141   0,					/* size of prefetch block */
142   0,					/* number of parallel prefetches */
143   1,					/* Branch cost */
144   23,					/* cost of FADD and FSUB insns.  */
145   27,					/* cost of FMUL instruction.  */
146   88,					/* cost of FDIV instruction.  */
147   22,					/* cost of FABS instruction.  */
148   24,					/* cost of FCHS instruction.  */
149   122,					/* cost of FSQRT instruction.  */
150 };
151 
152 static const
153 struct processor_costs i486_cost = {	/* 486 specific costs */
154   1,					/* cost of an add instruction */
155   1,					/* cost of a lea instruction */
156   3,					/* variable shift costs */
157   2,					/* constant shift costs */
158   {12, 12, 12, 12, 12},			/* cost of starting a multiply */
159   1,					/* cost of multiply per each bit set */
160   {40, 40, 40, 40, 40},			/* cost of a divide/mod */
161   3,					/* cost of movsx */
162   2,					/* cost of movzx */
163   15,					/* "large" insn */
164   3,					/* MOVE_RATIO */
165   4,					/* cost for loading QImode using movzbl */
166   {2, 4, 2},				/* cost of loading integer registers
167 					   in QImode, HImode and SImode.
168 					   Relative to reg-reg move (2).  */
169   {2, 4, 2},				/* cost of storing integer registers */
170   2,					/* cost of reg,reg fld/fst */
171   {8, 8, 8},				/* cost of loading fp registers
172 					   in SFmode, DFmode and XFmode */
173   {8, 8, 8},				/* cost of loading integer registers */
174   2,					/* cost of moving MMX register */
175   {4, 8},				/* cost of loading MMX registers
176 					   in SImode and DImode */
177   {4, 8},				/* cost of storing MMX registers
178 					   in SImode and DImode */
179   2,					/* cost of moving SSE register */
180   {4, 8, 16},				/* cost of loading SSE registers
181 					   in SImode, DImode and TImode */
182   {4, 8, 16},				/* cost of storing SSE registers
183 					   in SImode, DImode and TImode */
184   3,					/* MMX or SSE register to integer */
185   0,					/* size of prefetch block */
186   0,					/* number of parallel prefetches */
187   1,					/* Branch cost */
188   8,					/* cost of FADD and FSUB insns.  */
189   16,					/* cost of FMUL instruction.  */
190   73,					/* cost of FDIV instruction.  */
191   3,					/* cost of FABS instruction.  */
192   3,					/* cost of FCHS instruction.  */
193   83,					/* cost of FSQRT instruction.  */
194 };
195 
196 static const
197 struct processor_costs pentium_cost = {
198   1,					/* cost of an add instruction */
199   1,					/* cost of a lea instruction */
200   4,					/* variable shift costs */
201   1,					/* constant shift costs */
202   {11, 11, 11, 11, 11},			/* cost of starting a multiply */
203   0,					/* cost of multiply per each bit set */
204   {25, 25, 25, 25, 25},			/* cost of a divide/mod */
205   3,					/* cost of movsx */
206   2,					/* cost of movzx */
207   8,					/* "large" insn */
208   6,					/* MOVE_RATIO */
209   6,					/* cost for loading QImode using movzbl */
210   {2, 4, 2},				/* cost of loading integer registers
211 					   in QImode, HImode and SImode.
212 					   Relative to reg-reg move (2).  */
213   {2, 4, 2},				/* cost of storing integer registers */
214   2,					/* cost of reg,reg fld/fst */
215   {2, 2, 6},				/* cost of loading fp registers
216 					   in SFmode, DFmode and XFmode */
217   {4, 4, 6},				/* cost of loading integer registers */
218   8,					/* cost of moving MMX register */
219   {8, 8},				/* cost of loading MMX registers
220 					   in SImode and DImode */
221   {8, 8},				/* cost of storing MMX registers
222 					   in SImode and DImode */
223   2,					/* cost of moving SSE register */
224   {4, 8, 16},				/* cost of loading SSE registers
225 					   in SImode, DImode and TImode */
226   {4, 8, 16},				/* cost of storing SSE registers
227 					   in SImode, DImode and TImode */
228   3,					/* MMX or SSE register to integer */
229   0,					/* size of prefetch block */
230   0,					/* number of parallel prefetches */
231   2,					/* Branch cost */
232   3,					/* cost of FADD and FSUB insns.  */
233   3,					/* cost of FMUL instruction.  */
234   39,					/* cost of FDIV instruction.  */
235   1,					/* cost of FABS instruction.  */
236   1,					/* cost of FCHS instruction.  */
237   70,					/* cost of FSQRT instruction.  */
238 };
239 
240 static const
241 struct processor_costs pentiumpro_cost = {
242   1,					/* cost of an add instruction */
243   1,					/* cost of a lea instruction */
244   1,					/* variable shift costs */
245   1,					/* constant shift costs */
246   {4, 4, 4, 4, 4},			/* cost of starting a multiply */
247   0,					/* cost of multiply per each bit set */
248   {17, 17, 17, 17, 17},			/* cost of a divide/mod */
249   1,					/* cost of movsx */
250   1,					/* cost of movzx */
251   8,					/* "large" insn */
252   6,					/* MOVE_RATIO */
253   2,					/* cost for loading QImode using movzbl */
254   {4, 4, 4},				/* cost of loading integer registers
255 					   in QImode, HImode and SImode.
256 					   Relative to reg-reg move (2).  */
257   {2, 2, 2},				/* cost of storing integer registers */
258   2,					/* cost of reg,reg fld/fst */
259   {2, 2, 6},				/* cost of loading fp registers
260 					   in SFmode, DFmode and XFmode */
261   {4, 4, 6},				/* cost of loading integer registers */
262   2,					/* cost of moving MMX register */
263   {2, 2},				/* cost of loading MMX registers
264 					   in SImode and DImode */
265   {2, 2},				/* cost of storing MMX registers
266 					   in SImode and DImode */
267   2,					/* cost of moving SSE register */
268   {2, 2, 8},				/* cost of loading SSE registers
269 					   in SImode, DImode and TImode */
270   {2, 2, 8},				/* cost of storing SSE registers
271 					   in SImode, DImode and TImode */
272   3,					/* MMX or SSE register to integer */
273   32,					/* size of prefetch block */
274   6,					/* number of parallel prefetches */
275   2,					/* Branch cost */
276   3,					/* cost of FADD and FSUB insns.  */
277   5,					/* cost of FMUL instruction.  */
278   56,					/* cost of FDIV instruction.  */
279   2,					/* cost of FABS instruction.  */
280   2,					/* cost of FCHS instruction.  */
281   56,					/* cost of FSQRT instruction.  */
282 };
283 
284 static const
285 struct processor_costs k6_cost = {
286   1,					/* cost of an add instruction */
287   2,					/* cost of a lea instruction */
288   1,					/* variable shift costs */
289   1,					/* constant shift costs */
290   {3, 3, 3, 3, 3},			/* cost of starting a multiply */
291   0,					/* cost of multiply per each bit set */
292   {18, 18, 18, 18, 18},			/* cost of a divide/mod */
293   2,					/* cost of movsx */
294   2,					/* cost of movzx */
295   8,					/* "large" insn */
296   4,					/* MOVE_RATIO */
297   3,					/* cost for loading QImode using movzbl */
298   {4, 5, 4},				/* cost of loading integer registers
299 					   in QImode, HImode and SImode.
300 					   Relative to reg-reg move (2).  */
301   {2, 3, 2},				/* cost of storing integer registers */
302   4,					/* cost of reg,reg fld/fst */
303   {6, 6, 6},				/* cost of loading fp registers
304 					   in SFmode, DFmode and XFmode */
305   {4, 4, 4},				/* cost of loading integer registers */
306   2,					/* cost of moving MMX register */
307   {2, 2},				/* cost of loading MMX registers
308 					   in SImode and DImode */
309   {2, 2},				/* cost of storing MMX registers
310 					   in SImode and DImode */
311   2,					/* cost of moving SSE register */
312   {2, 2, 8},				/* cost of loading SSE registers
313 					   in SImode, DImode and TImode */
314   {2, 2, 8},				/* cost of storing SSE registers
315 					   in SImode, DImode and TImode */
316   6,					/* MMX or SSE register to integer */
317   32,					/* size of prefetch block */
318   1,					/* number of parallel prefetches */
319   1,					/* Branch cost */
320   2,					/* cost of FADD and FSUB insns.  */
321   2,					/* cost of FMUL instruction.  */
322   56,					/* cost of FDIV instruction.  */
323   2,					/* cost of FABS instruction.  */
324   2,					/* cost of FCHS instruction.  */
325   56,					/* cost of FSQRT instruction.  */
326 };
327 
328 static const
329 struct processor_costs athlon_cost = {
330   1,					/* cost of an add instruction */
331   2,					/* cost of a lea instruction */
332   1,					/* variable shift costs */
333   1,					/* constant shift costs */
334   {5, 5, 5, 5, 5},			/* cost of starting a multiply */
335   0,					/* cost of multiply per each bit set */
336   {18, 26, 42, 74, 74},			/* cost of a divide/mod */
337   1,					/* cost of movsx */
338   1,					/* cost of movzx */
339   8,					/* "large" insn */
340   9,					/* MOVE_RATIO */
341   4,					/* cost for loading QImode using movzbl */
342   {3, 4, 3},				/* cost of loading integer registers
343 					   in QImode, HImode and SImode.
344 					   Relative to reg-reg move (2).  */
345   {3, 4, 3},				/* cost of storing integer registers */
346   4,					/* cost of reg,reg fld/fst */
347   {4, 4, 12},				/* cost of loading fp registers
348 					   in SFmode, DFmode and XFmode */
349   {6, 6, 8},				/* cost of loading integer registers */
350   2,					/* cost of moving MMX register */
351   {4, 4},				/* cost of loading MMX registers
352 					   in SImode and DImode */
353   {4, 4},				/* cost of storing MMX registers
354 					   in SImode and DImode */
355   2,					/* cost of moving SSE register */
356   {4, 4, 6},				/* cost of loading SSE registers
357 					   in SImode, DImode and TImode */
358   {4, 4, 5},				/* cost of storing SSE registers
359 					   in SImode, DImode and TImode */
360   5,					/* MMX or SSE register to integer */
361   64,					/* size of prefetch block */
362   6,					/* number of parallel prefetches */
363   2,					/* Branch cost */
364   4,					/* cost of FADD and FSUB insns.  */
365   4,					/* cost of FMUL instruction.  */
366   24,					/* cost of FDIV instruction.  */
367   2,					/* cost of FABS instruction.  */
368   2,					/* cost of FCHS instruction.  */
369   35,					/* cost of FSQRT instruction.  */
370 };
371 
372 static const
373 struct processor_costs k8_cost = {
374   1,					/* cost of an add instruction */
375   2,					/* cost of a lea instruction */
376   1,					/* variable shift costs */
377   1,					/* constant shift costs */
378   {3, 4, 3, 4, 5},			/* cost of starting a multiply */
379   0,					/* cost of multiply per each bit set */
380   {18, 26, 42, 74, 74},			/* cost of a divide/mod */
381   1,					/* cost of movsx */
382   1,					/* cost of movzx */
383   8,					/* "large" insn */
384   9,					/* MOVE_RATIO */
385   4,					/* cost for loading QImode using movzbl */
386   {3, 4, 3},				/* cost of loading integer registers
387 					   in QImode, HImode and SImode.
388 					   Relative to reg-reg move (2).  */
389   {3, 4, 3},				/* cost of storing integer registers */
390   4,					/* cost of reg,reg fld/fst */
391   {4, 4, 12},				/* cost of loading fp registers
392 					   in SFmode, DFmode and XFmode */
393   {6, 6, 8},				/* cost of loading integer registers */
394   2,					/* cost of moving MMX register */
395   {3, 3},				/* cost of loading MMX registers
396 					   in SImode and DImode */
397   {4, 4},				/* cost of storing MMX registers
398 					   in SImode and DImode */
399   2,					/* cost of moving SSE register */
400   {4, 3, 6},				/* cost of loading SSE registers
401 					   in SImode, DImode and TImode */
402   {4, 4, 5},				/* cost of storing SSE registers
403 					   in SImode, DImode and TImode */
404   5,					/* MMX or SSE register to integer */
405   64,					/* size of prefetch block */
406   6,					/* number of parallel prefetches */
407   2,					/* Branch cost */
408   4,					/* cost of FADD and FSUB insns.  */
409   4,					/* cost of FMUL instruction.  */
410   19,					/* cost of FDIV instruction.  */
411   2,					/* cost of FABS instruction.  */
412   2,					/* cost of FCHS instruction.  */
413   35,					/* cost of FSQRT instruction.  */
414 };
415 
416 static const
417 struct processor_costs pentium4_cost = {
418   1,					/* cost of an add instruction */
419   1,					/* cost of a lea instruction */
420   4,					/* variable shift costs */
421   4,					/* constant shift costs */
422   {15, 15, 15, 15, 15},			/* cost of starting a multiply */
423   0,					/* cost of multiply per each bit set */
424   {56, 56, 56, 56, 56},			/* cost of a divide/mod */
425   1,					/* cost of movsx */
426   1,					/* cost of movzx */
427   16,					/* "large" insn */
428   6,					/* MOVE_RATIO */
429   2,					/* cost for loading QImode using movzbl */
430   {4, 5, 4},				/* cost of loading integer registers
431 					   in QImode, HImode and SImode.
432 					   Relative to reg-reg move (2).  */
433   {2, 3, 2},				/* cost of storing integer registers */
434   2,					/* cost of reg,reg fld/fst */
435   {2, 2, 6},				/* cost of loading fp registers
436 					   in SFmode, DFmode and XFmode */
437   {4, 4, 6},				/* cost of loading integer registers */
438   2,					/* cost of moving MMX register */
439   {2, 2},				/* cost of loading MMX registers
440 					   in SImode and DImode */
441   {2, 2},				/* cost of storing MMX registers
442 					   in SImode and DImode */
443   12,					/* cost of moving SSE register */
444   {12, 12, 12},				/* cost of loading SSE registers
445 					   in SImode, DImode and TImode */
446   {2, 2, 8},				/* cost of storing SSE registers
447 					   in SImode, DImode and TImode */
448   10,					/* MMX or SSE register to integer */
449   64,					/* size of prefetch block */
450   6,					/* number of parallel prefetches */
451   2,					/* Branch cost */
452   5,					/* cost of FADD and FSUB insns.  */
453   7,					/* cost of FMUL instruction.  */
454   43,					/* cost of FDIV instruction.  */
455   2,					/* cost of FABS instruction.  */
456   2,					/* cost of FCHS instruction.  */
457   43,					/* cost of FSQRT instruction.  */
458 };
459 
460 const struct processor_costs *ix86_cost = &pentium_cost;
461 
462 /* Processor feature/optimization bitmasks.  */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6  (1<<PROCESSOR_K6)
468 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
469 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
470 #define m_K8  (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
472 
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515    parts instead of whole registers, so we may maintain just lower part of
516    scalar values in proper format leaving the upper part undefined.  */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519    need for extra instructions beforehand  */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 
528 /* In case the average insn count for single function invocation is
529    lower than this constant, emit fast (but longer) prologue and
530    epilogue code.  */
531 #define FAST_PROLOGUE_INSN_COUNT 20
532 
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
537 
538 /* Array of the smallest class containing reg number REGNO, indexed by
539    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
540 
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
542 {
543   /* ax, dx, cx, bx */
544   AREG, DREG, CREG, BREG,
545   /* si, di, bp, sp */
546   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
547   /* FP registers */
548   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
550   /* arg pointer */
551   NON_Q_REGS,
552   /* flags, fpsr, dirflag, frame */
553   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555   SSE_REGS, SSE_REGS,
556   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
557   MMX_REGS, MMX_REGS,
558   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561   SSE_REGS, SSE_REGS,
562 };
563 
564 /* The "default" register map used in 32bit mode.  */
565 
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
567 {
568   0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
569   12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
570   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
571   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
572   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
573   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
574   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
575 };
576 
577 static int const x86_64_int_parameter_registers[6] =
578 {
579   5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
581 };
582 
583 static int const x86_64_int_return_registers[4] =
584 {
585   0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
586 };
587 
588 /* The "default" register map used in 64bit mode.  */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
590 {
591   0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
592   33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
593   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
594   17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
595   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
596   8,9,10,11,12,13,14,15,		/* extended integer registers */
597   25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
598 };
599 
600 /* Define the register numbers to be used in Dwarf debugging information.
601    The SVR4 reference port C compiler uses the following register numbers
602    in its Dwarf output code:
603 	0 for %eax (gcc regno = 0)
604 	1 for %ecx (gcc regno = 2)
605 	2 for %edx (gcc regno = 1)
606 	3 for %ebx (gcc regno = 3)
607 	4 for %esp (gcc regno = 7)
608 	5 for %ebp (gcc regno = 6)
609 	6 for %esi (gcc regno = 4)
610 	7 for %edi (gcc regno = 5)
611    The following three DWARF register numbers are never generated by
612    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613    believes these numbers have these meanings.
614 	8  for %eip    (no gcc equivalent)
615 	9  for %eflags (gcc regno = 17)
616 	10 for %trapno (no gcc equivalent)
617    It is not at all clear how we should number the FP stack registers
618    for the x86 architecture.  If the version of SDB on x86/svr4 were
619    a bit less brain dead with respect to floating-point then we would
620    have a precedent to follow with respect to DWARF register numbers
621    for x86 FP registers, but the SDB on x86/svr4 is so completely
622    broken with respect to FP registers that it is hardly worth thinking
623    of it as something to strive for compatibility with.
624    The version of x86/svr4 SDB I have at the moment does (partially)
625    seem to believe that DWARF register number 11 is associated with
626    the x86 register %st(0), but that's about all.  Higher DWARF
627    register numbers don't seem to be associated with anything in
628    particular, and even for DWARF regno 11, SDB only seems to under-
629    stand that it should say that a variable lives in %st(0) (when
630    asked via an `=' command) if we said it was in DWARF regno 11,
631    but SDB still prints garbage when asked for the value of the
632    variable in question (via a `/' command).
633    (Also note that the labels SDB prints for various FP stack regs
634    when doing an `x' command are all wrong.)
635    Note that these problems generally don't affect the native SVR4
636    C compiler because it doesn't allow the use of -O with -g and
637    because when it is *not* optimizing, it allocates a memory
638    location for each floating-point variable, and the memory
639    location is what gets described in the DWARF AT_location
640    attribute for the variable in question.
641    Regardless of the severe mental illness of the x86/svr4 SDB, we
642    do something sensible here and we use the following DWARF
643    register numbers.  Note that these are all stack-top-relative
644    numbers.
645 	11 for %st(0) (gcc regno = 8)
646 	12 for %st(1) (gcc regno = 9)
647 	13 for %st(2) (gcc regno = 10)
648 	14 for %st(3) (gcc regno = 11)
649 	15 for %st(4) (gcc regno = 12)
650 	16 for %st(5) (gcc regno = 13)
651 	17 for %st(6) (gcc regno = 14)
652 	18 for %st(7) (gcc regno = 15)
653 */
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
655 {
656   0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
657   11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
658   -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
659   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
660   29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
661   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
662   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
663 };
664 
665 /* Test and compare insns in i386.md store the information needed to
666    generate branch and scc insns here.  */
667 
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
670 
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area.  */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
674 
675 /* Define the structure for the machine field in struct function.  */
676 
677 struct stack_local_entry GTY(())
678 {
679   unsigned short mode;
680   unsigned short n;
681   rtx rtl;
682   struct stack_local_entry *next;
683 };
684 
685 /* Structure describing stack frame layout.
686    Stack grows downward:
687 
688    [arguments]
689 					      <- ARG_POINTER
690    saved pc
691 
692    saved frame pointer if frame_pointer_needed
693 					      <- HARD_FRAME_POINTER
694    [saved regs]
695 
696    [padding1]          \
697 		        )
698    [va_arg registers]  (
699 		        > to_allocate	      <- FRAME_POINTER
700    [frame]	       (
701 		        )
702    [padding2]	       /
703   */
704 struct ix86_frame
705 {
706   int nregs;
707   int padding1;
708   int va_arg_size;
709   HOST_WIDE_INT frame;
710   int padding2;
711   int outgoing_arguments_size;
712   int red_zone_size;
713 
714   HOST_WIDE_INT to_allocate;
715   /* The offsets relative to ARG_POINTER.  */
716   HOST_WIDE_INT frame_pointer_offset;
717   HOST_WIDE_INT hard_frame_pointer_offset;
718   HOST_WIDE_INT stack_pointer_offset;
719 
720   /* When save_regs_using_mov is set, emit prologue using
721      move instead of push instructions.  */
722   bool save_regs_using_mov;
723 };
724 
725 /* Used to enable/disable debugging features.  */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user.  */
728 const char *ix86_cmodel_string;
729 /* Parsed value.  */
730 enum cmodel ix86_cmodel;
731 /* Asm dialect.  */
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
734 /* TLS dialext.  */
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
737 
738 /* Which unit we are generating floating point math for.  */
739 enum fpmath_unit ix86_fpmath;
740 
741 /* Which cpu are we scheduling for.  */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use.  */
744 enum processor_type ix86_arch;
745 
746 /* Strings to hold which cpu and instruction set architecture  to use.  */
747 const char *ix86_tune_string;		/* for -mtune=<xxx> */
748 const char *ix86_arch_string;		/* for -march=<xxx> */
749 const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
750 
751 /* # of registers to use to pass arguments.  */
752 const char *ix86_regparm_string;
753 
754 /* true if sse prefetch instruction is not NOOP.  */
755 int x86_prefetch_sse;
756 
757 /* ix86_regparm_string as a number */
758 int ix86_regparm;
759 
760 /* Alignment to use for loops and jumps:  */
761 
762 /* Power of two alignment for loops.  */
763 const char *ix86_align_loops_string;
764 
765 /* Power of two alignment for non-loop jumps.  */
766 const char *ix86_align_jumps_string;
767 
768 /* Power of two alignment for stack boundary in bytes.  */
769 const char *ix86_preferred_stack_boundary_string;
770 
771 /* Preferred alignment for stack boundary in bits.  */
772 int ix86_preferred_stack_boundary;
773 
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
777 
778 /* Power of two alignment for functions.  */
779 const char *ix86_align_funcs_string;
780 
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
784 
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
789 				int, int, FILE *);
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 						   rtx *);
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
798 						   enum machine_mode);
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 				 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
837 
838 struct ix86_address
839 {
840   rtx base, index, disp;
841   HOST_WIDE_INT scale;
842   enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 };
844 
845 static int ix86_decompose_address (rtx, struct ix86_address *);
846 static int ix86_address_cost (rtx);
847 static bool ix86_cannot_force_const_mem (rtx);
848 static rtx ix86_delegitimize_address (rtx);
849 
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi (const struct builtin_description *,
852 				 tree, rtx);
853 static rtx ix86_expand_sse_compare (const struct builtin_description *,
854 				    tree, rtx);
855 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
857 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_store_builtin (enum insn_code, tree);
859 static rtx safe_vector_operand (rtx, enum machine_mode);
860 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
861 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
862 				      enum rtx_code *, enum rtx_code *);
863 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
867 static int ix86_fp_comparison_cost (enum rtx_code code);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame *);
871 static int ix86_comp_type_attributes (tree, tree);
872 static int ix86_function_regparm (tree, tree);
873 const struct attribute_spec ix86_attribute_table[];
874 static bool ix86_function_ok_for_sibcall (tree, tree);
875 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
876 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
877 static int ix86_value_regno (enum machine_mode);
878 static bool contains_128bit_aligned_vector_p (tree);
879 static bool ix86_ms_bitfield_layout_p (tree);
880 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
881 static int extended_reg_mentioned_1 (rtx *, void *);
882 static bool ix86_rtx_costs (rtx, int, int, int *);
883 static int min_insn_size (rtx);
884 static void k8_avoid_jump_misspredicts (void);
885 
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx, int);
888 #endif
889 
890 /* Register class used for passing given 64bit part of the argument.
891    These represent classes as documented by the PS ABI, with the exception
892    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893    use SF or DFmode move instead of DImode to avoid reformatting penalties.
894 
895    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896    whenever possible (upper half does contain padding).
897  */
898 enum x86_64_reg_class
899   {
900     X86_64_NO_CLASS,
901     X86_64_INTEGER_CLASS,
902     X86_64_INTEGERSI_CLASS,
903     X86_64_SSE_CLASS,
904     X86_64_SSESF_CLASS,
905     X86_64_SSEDF_CLASS,
906     X86_64_SSEUP_CLASS,
907     X86_64_X87_CLASS,
908     X86_64_X87UP_CLASS,
909     X86_64_MEMORY_CLASS
910   };
911 static const char * const x86_64_reg_class_name[] =
912    {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
913 
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode, tree,
916 			      enum x86_64_reg_class [MAX_CLASSES], int);
917 static int examine_argument (enum machine_mode, tree, int, int *, int *);
918 static rtx construct_container (enum machine_mode, tree, int, int, int,
919 				const int *, int);
920 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
921 					    enum x86_64_reg_class);
922 
923 /* Table of constants used by fldpi, fldln2, etc....  */
924 static REAL_VALUE_TYPE ext_80387_constants_table [5];
925 static bool ext_80387_constants_init = 0;
926 static void init_ext_80387_constants (void);
927 
928 /* Initialize the GCC target structure.  */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 #  undef TARGET_MERGE_DECL_ATTRIBUTES
933 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #endif
935 
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
938 
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
941 
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
944 
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
947 
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
952 
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
957 #ifdef ASM_QUAD
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #endif
961 
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
968 
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981   ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984   ia32_multipass_dfa_lookahead
985 
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
988 
989 #ifdef HAVE_AS_TLS
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
992 #endif
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
995 
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
998 
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1001 
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1006 
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1009 
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1014 
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1019 
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1022 
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1025 
1026 struct gcc_target targetm = TARGET_INITIALIZER;
1027 
1028 /* The svr4 ABI for the i386 says that records and unions are returned
1029    in memory.  */
1030 #ifndef DEFAULT_PCC_STRUCT_RETURN
1031 #define DEFAULT_PCC_STRUCT_RETURN 1
1032 #endif
1033 
1034 /* Sometimes certain combinations of command options do not make
1035    sense on a particular target machine.  You can define a macro
1036    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1037    defined, is executed once just after all the command options have
1038    been parsed.
1039 
1040    Don't use this macro to turn on various extra optimizations for
1041    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1042 
1043 void
override_options(void)1044 override_options (void)
1045 {
1046   int i;
1047   /* Comes from final.c -- no real reason to change it.  */
1048 #define MAX_CODE_ALIGN 16
1049 
1050   static struct ptt
1051     {
1052       const struct processor_costs *cost;	/* Processor costs */
1053       const int target_enable;			/* Target flags to enable.  */
1054       const int target_disable;			/* Target flags to disable.  */
1055       const int align_loop;			/* Default alignments.  */
1056       const int align_loop_max_skip;
1057       const int align_jump;
1058       const int align_jump_max_skip;
1059       const int align_func;
1060     }
1061   const processor_target_table[PROCESSOR_max] =
1062     {
1063       {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1064       {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1065       {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1066       {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1067       {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1068       {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1069       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1070       {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1071     };
1072 
1073   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1074   static struct pta
1075     {
1076       const char *const name;		/* processor name or nickname.  */
1077       const enum processor_type processor;
1078       const enum pta_flags
1079 	{
1080 	  PTA_SSE = 1,
1081 	  PTA_SSE2 = 2,
1082 	  PTA_SSE3 = 4,
1083 	  PTA_MMX = 8,
1084 	  PTA_PREFETCH_SSE = 16,
1085 	  PTA_3DNOW = 32,
1086 	  PTA_3DNOW_A = 64,
1087 	  PTA_64BIT = 128
1088 	} flags;
1089     }
1090   const processor_alias_table[] =
1091     {
1092       {"i386", PROCESSOR_I386, 0},
1093       {"i486", PROCESSOR_I486, 0},
1094       {"i586", PROCESSOR_PENTIUM, 0},
1095       {"pentium", PROCESSOR_PENTIUM, 0},
1096       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1097       {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1098       {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1099       {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1100       {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1101       {"i686", PROCESSOR_PENTIUMPRO, 0},
1102       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1103       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1104       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1105       {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1106       {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1107       {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1108 				       | PTA_MMX | PTA_PREFETCH_SSE},
1109       {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1110 				        | PTA_MMX | PTA_PREFETCH_SSE},
1111       {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1112 				        | PTA_MMX | PTA_PREFETCH_SSE},
1113       {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1114 				     | PTA_MMX | PTA_PREFETCH_SSE},
1115       {"k6", PROCESSOR_K6, PTA_MMX},
1116       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1117       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118       {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1119 				   | PTA_3DNOW_A},
1120       {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1121 					 | PTA_3DNOW | PTA_3DNOW_A},
1122       {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1123 				    | PTA_3DNOW_A | PTA_SSE},
1124       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1125 				      | PTA_3DNOW_A | PTA_SSE},
1126       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127 				      | PTA_3DNOW_A | PTA_SSE},
1128       {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1129 			       | PTA_SSE | PTA_SSE2 },
1130       {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1131 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1132       {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1133 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1134       {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1135 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1137 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1138     };
1139 
1140   int const pta_size = ARRAY_SIZE (processor_alias_table);
1141 
1142   /* Set the default values for switches whose default depends on TARGET_64BIT
1143      in case they weren't overwritten by command line options.  */
1144   if (TARGET_64BIT)
1145     {
1146       if (flag_omit_frame_pointer == 2)
1147 	flag_omit_frame_pointer = 1;
1148       if (flag_asynchronous_unwind_tables == 2)
1149 	flag_asynchronous_unwind_tables = 1;
1150       if (flag_pcc_struct_return == 2)
1151 	flag_pcc_struct_return = 0;
1152     }
1153   else
1154     {
1155       if (flag_omit_frame_pointer == 2)
1156 	flag_omit_frame_pointer = 0;
1157       if (flag_asynchronous_unwind_tables == 2)
1158 	flag_asynchronous_unwind_tables = 0;
1159       if (flag_pcc_struct_return == 2)
1160 	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1161     }
1162 
1163 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1164   SUBTARGET_OVERRIDE_OPTIONS;
1165 #endif
1166 
1167   if (!ix86_tune_string && ix86_arch_string)
1168     ix86_tune_string = ix86_arch_string;
1169   if (!ix86_tune_string)
1170     ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1171   if (!ix86_arch_string)
1172     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1173 
1174   if (ix86_cmodel_string != 0)
1175     {
1176       if (!strcmp (ix86_cmodel_string, "small"))
1177 	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1178       else if (flag_pic)
1179 	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1180       else if (!strcmp (ix86_cmodel_string, "32"))
1181 	ix86_cmodel = CM_32;
1182       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1183 	ix86_cmodel = CM_KERNEL;
1184       else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1185 	ix86_cmodel = CM_MEDIUM;
1186       else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1187 	ix86_cmodel = CM_LARGE;
1188       else
1189 	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1190     }
1191   else
1192     {
1193       ix86_cmodel = CM_32;
1194       if (TARGET_64BIT)
1195 	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1196     }
1197   if (ix86_asm_string != 0)
1198     {
1199       if (!strcmp (ix86_asm_string, "intel"))
1200 	ix86_asm_dialect = ASM_INTEL;
1201       else if (!strcmp (ix86_asm_string, "att"))
1202 	ix86_asm_dialect = ASM_ATT;
1203       else
1204 	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1205     }
1206   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1207     error ("code model `%s' not supported in the %s bit mode",
1208 	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1209   if (ix86_cmodel == CM_LARGE)
1210     sorry ("code model `large' not supported yet");
1211   if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1212     sorry ("%i-bit mode not compiled in",
1213 	   (target_flags & MASK_64BIT) ? 64 : 32);
1214 
1215   for (i = 0; i < pta_size; i++)
1216     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1217       {
1218 	ix86_arch = processor_alias_table[i].processor;
1219 	/* Default cpu tuning to the architecture.  */
1220 	ix86_tune = ix86_arch;
1221 	if (processor_alias_table[i].flags & PTA_MMX
1222 	    && !(target_flags_explicit & MASK_MMX))
1223 	  target_flags |= MASK_MMX;
1224 	if (processor_alias_table[i].flags & PTA_3DNOW
1225 	    && !(target_flags_explicit & MASK_3DNOW))
1226 	  target_flags |= MASK_3DNOW;
1227 	if (processor_alias_table[i].flags & PTA_3DNOW_A
1228 	    && !(target_flags_explicit & MASK_3DNOW_A))
1229 	  target_flags |= MASK_3DNOW_A;
1230 	if (processor_alias_table[i].flags & PTA_SSE
1231 	    && !(target_flags_explicit & MASK_SSE))
1232 	  target_flags |= MASK_SSE;
1233 	if (processor_alias_table[i].flags & PTA_SSE2
1234 	    && !(target_flags_explicit & MASK_SSE2))
1235 	  target_flags |= MASK_SSE2;
1236 	if (processor_alias_table[i].flags & PTA_SSE3
1237 	    && !(target_flags_explicit & MASK_SSE3))
1238 	  target_flags |= MASK_SSE3;
1239 	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1240 	  x86_prefetch_sse = true;
1241 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1242 	  error ("CPU you selected does not support x86-64 instruction set");
1243 	break;
1244       }
1245 
1246   if (i == pta_size)
1247     error ("bad value (%s) for -march= switch", ix86_arch_string);
1248 
1249   for (i = 0; i < pta_size; i++)
1250     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1251       {
1252 	ix86_tune = processor_alias_table[i].processor;
1253 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1254 	  error ("CPU you selected does not support x86-64 instruction set");
1255 
1256 	/* Intel CPUs have always interpreted SSE prefetch instructions as
1257 	   NOPs; so, we can enable SSE prefetch instructions even when
1258 	   -mtune (rather than -march) points us to a processor that has them.
1259 	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1260 	   higher processors.  */
1261 	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1262 	  x86_prefetch_sse = true;
1263 	break;
1264       }
1265   if (i == pta_size)
1266     error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1267 
1268   if (optimize_size)
1269     ix86_cost = &size_cost;
1270   else
1271     ix86_cost = processor_target_table[ix86_tune].cost;
1272   target_flags |= processor_target_table[ix86_tune].target_enable;
1273   target_flags &= ~processor_target_table[ix86_tune].target_disable;
1274 
1275   /* Arrange to set up i386_stack_locals for all functions.  */
1276   init_machine_status = ix86_init_machine_status;
1277 
1278   /* Validate -mregparm= value.  */
1279   if (ix86_regparm_string)
1280     {
1281       i = atoi (ix86_regparm_string);
1282       if (i < 0 || i > REGPARM_MAX)
1283 	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1284       else
1285 	ix86_regparm = i;
1286     }
1287   else
1288    if (TARGET_64BIT)
1289      ix86_regparm = REGPARM_MAX;
1290 
1291   /* If the user has provided any of the -malign-* options,
1292      warn and use that value only if -falign-* is not set.
1293      Remove this code in GCC 3.2 or later.  */
1294   if (ix86_align_loops_string)
1295     {
1296       warning ("-malign-loops is obsolete, use -falign-loops");
1297       if (align_loops == 0)
1298 	{
1299 	  i = atoi (ix86_align_loops_string);
1300 	  if (i < 0 || i > MAX_CODE_ALIGN)
1301 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1302 	  else
1303 	    align_loops = 1 << i;
1304 	}
1305     }
1306 
1307   if (ix86_align_jumps_string)
1308     {
1309       warning ("-malign-jumps is obsolete, use -falign-jumps");
1310       if (align_jumps == 0)
1311 	{
1312 	  i = atoi (ix86_align_jumps_string);
1313 	  if (i < 0 || i > MAX_CODE_ALIGN)
1314 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1315 	  else
1316 	    align_jumps = 1 << i;
1317 	}
1318     }
1319 
1320   if (ix86_align_funcs_string)
1321     {
1322       warning ("-malign-functions is obsolete, use -falign-functions");
1323       if (align_functions == 0)
1324 	{
1325 	  i = atoi (ix86_align_funcs_string);
1326 	  if (i < 0 || i > MAX_CODE_ALIGN)
1327 	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1328 	  else
1329 	    align_functions = 1 << i;
1330 	}
1331     }
1332 
1333   /* Default align_* from the processor table.  */
1334   if (align_loops == 0)
1335     {
1336       align_loops = processor_target_table[ix86_tune].align_loop;
1337       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1338     }
1339   if (align_jumps == 0)
1340     {
1341       align_jumps = processor_target_table[ix86_tune].align_jump;
1342       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1343     }
1344   if (align_functions == 0)
1345     {
1346       align_functions = processor_target_table[ix86_tune].align_func;
1347     }
1348 
1349   /* Validate -mpreferred-stack-boundary= value, or provide default.
1350      The default of 128 bits is for Pentium III's SSE __m128, but we
1351      don't want additional code to keep the stack aligned when
1352      optimizing for code size.  */
1353   ix86_preferred_stack_boundary = (optimize_size
1354 				   ? TARGET_64BIT ? 128 : 32
1355 				   : 128);
1356   if (ix86_preferred_stack_boundary_string)
1357     {
1358       i = atoi (ix86_preferred_stack_boundary_string);
1359       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1360 	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1361 	       TARGET_64BIT ? 4 : 2);
1362       else
1363 	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1364     }
1365 
1366   /* Validate -mbranch-cost= value, or provide default.  */
1367   ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1368   if (ix86_branch_cost_string)
1369     {
1370       i = atoi (ix86_branch_cost_string);
1371       if (i < 0 || i > 5)
1372 	error ("-mbranch-cost=%d is not between 0 and 5", i);
1373       else
1374 	ix86_branch_cost = i;
1375     }
1376 
1377   if (ix86_tls_dialect_string)
1378     {
1379       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1380 	ix86_tls_dialect = TLS_DIALECT_GNU;
1381       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1382 	ix86_tls_dialect = TLS_DIALECT_SUN;
1383       else
1384 	error ("bad value (%s) for -mtls-dialect= switch",
1385 	       ix86_tls_dialect_string);
1386     }
1387 
1388   /* Keep nonleaf frame pointers.  */
1389   if (TARGET_OMIT_LEAF_FRAME_POINTER)
1390     flag_omit_frame_pointer = 1;
1391 
1392   /* If we're doing fast math, we don't care about comparison order
1393      wrt NaNs.  This lets us use a shorter comparison sequence.  */
1394   if (flag_unsafe_math_optimizations)
1395     target_flags &= ~MASK_IEEE_FP;
1396 
1397   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1398      since the insns won't need emulation.  */
1399   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1400     target_flags &= ~MASK_NO_FANCY_MATH_387;
1401 
1402   /* Turn on SSE2 builtins for -msse3.  */
1403   if (TARGET_SSE3)
1404     target_flags |= MASK_SSE2;
1405 
1406   /* Turn on SSE builtins for -msse2.  */
1407   if (TARGET_SSE2)
1408     target_flags |= MASK_SSE;
1409 
1410   if (TARGET_64BIT)
1411     {
1412       if (TARGET_ALIGN_DOUBLE)
1413 	error ("-malign-double makes no sense in the 64bit mode");
1414       if (TARGET_RTD)
1415 	error ("-mrtd calling convention not supported in the 64bit mode");
1416       /* Enable by default the SSE and MMX builtins.  */
1417       target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1418       ix86_fpmath = FPMATH_SSE;
1419      }
1420   else
1421     {
1422       ix86_fpmath = FPMATH_387;
1423       /* i386 ABI does not specify red zone.  It still makes sense to use it
1424          when programmer takes care to stack from being destroyed.  */
1425       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1426         target_flags |= MASK_NO_RED_ZONE;
1427     }
1428 
1429   if (ix86_fpmath_string != 0)
1430     {
1431       if (! strcmp (ix86_fpmath_string, "387"))
1432 	ix86_fpmath = FPMATH_387;
1433       else if (! strcmp (ix86_fpmath_string, "sse"))
1434 	{
1435 	  if (!TARGET_SSE)
1436 	    {
1437 	      warning ("SSE instruction set disabled, using 387 arithmetics");
1438 	      ix86_fpmath = FPMATH_387;
1439 	    }
1440 	  else
1441 	    ix86_fpmath = FPMATH_SSE;
1442 	}
1443       else if (! strcmp (ix86_fpmath_string, "387,sse")
1444 	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1445 	{
1446 	  if (!TARGET_SSE)
1447 	    {
1448 	      warning ("SSE instruction set disabled, using 387 arithmetics");
1449 	      ix86_fpmath = FPMATH_387;
1450 	    }
1451 	  else if (!TARGET_80387)
1452 	    {
1453 	      warning ("387 instruction set disabled, using SSE arithmetics");
1454 	      ix86_fpmath = FPMATH_SSE;
1455 	    }
1456 	  else
1457 	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1458 	}
1459       else
1460 	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1461     }
1462 
1463   /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1464      on by -msse.  */
1465   if (TARGET_SSE)
1466     {
1467       target_flags |= MASK_MMX;
1468       x86_prefetch_sse = true;
1469     }
1470 
1471   /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1472   if (TARGET_3DNOW)
1473     {
1474       target_flags |= MASK_MMX;
1475       /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1476 	 extensions it adds.  */
1477       if (x86_3dnow_a & (1 << ix86_arch))
1478 	target_flags |= MASK_3DNOW_A;
1479     }
1480   if ((x86_accumulate_outgoing_args & TUNEMASK)
1481       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1482       && !optimize_size)
1483     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1484 
1485   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1486   {
1487     char *p;
1488     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1489     p = strchr (internal_label_prefix, 'X');
1490     internal_label_prefix_len = p - internal_label_prefix;
1491     *p = '\0';
1492   }
1493 }
1494 
1495 void
optimization_options(int level,int size ATTRIBUTE_UNUSED)1496 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1497 {
1498   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1499      make the problem with not enough registers even worse.  */
1500 #ifdef INSN_SCHEDULING
1501   if (level > 1)
1502     flag_schedule_insns = 0;
1503 #endif
1504 
1505   /* The default values of these switches depend on the TARGET_64BIT
1506      that is not known at this moment.  Mark these values with 2 and
1507      let user the to override these.  In case there is no command line option
1508      specifying them, we will set the defaults in override_options.  */
1509   if (optimize >= 1)
1510     flag_omit_frame_pointer = 2;
1511   flag_pcc_struct_return = 2;
1512   flag_asynchronous_unwind_tables = 2;
1513 }
1514 
1515 /* Table of valid machine attributes.  */
1516 const struct attribute_spec ix86_attribute_table[] =
1517 {
1518   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1519   /* Stdcall attribute says callee is responsible for popping arguments
1520      if they are not variable.  */
1521   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1522   /* Fastcall attribute says callee is responsible for popping arguments
1523      if they are not variable.  */
1524   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1525   /* Cdecl attribute says the callee is a normal C declaration */
1526   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1527   /* Regparm attribute specifies how many integer arguments are to be
1528      passed in registers.  */
1529   { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1530 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1531   { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1532   { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1533   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1534 #endif
1535   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1536   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1537   { NULL,        0, 0, false, false, false, NULL }
1538 };
1539 
1540 /* Decide whether we can make a sibling call to a function.  DECL is the
1541    declaration of the function being targeted by the call and EXP is the
1542    CALL_EXPR representing the call.  */
1543 
1544 static bool
ix86_function_ok_for_sibcall(tree decl,tree exp)1545 ix86_function_ok_for_sibcall (tree decl, tree exp)
1546 {
1547   /* If we are generating position-independent code, we cannot sibcall
1548      optimize any indirect call, or a direct call to a global function,
1549      as the PLT requires %ebx be live.  */
1550   if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1551     return false;
1552 
1553   /* If we are returning floats on the 80387 register stack, we cannot
1554      make a sibcall from a function that doesn't return a float to a
1555      function that does or, conversely, from a function that does return
1556      a float to a function that doesn't; the necessary stack adjustment
1557      would not be executed.  */
1558   if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1559       != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1560     return false;
1561 
1562   /* If this call is indirect, we'll need to be able to use a call-clobbered
1563      register for the address of the target function.  Make sure that all
1564      such registers are not used for passing parameters.  */
1565   if (!decl && !TARGET_64BIT)
1566     {
1567       tree type;
1568 
1569       /* We're looking at the CALL_EXPR, we need the type of the function.  */
1570       type = TREE_OPERAND (exp, 0);		/* pointer expression */
1571       type = TREE_TYPE (type);			/* pointer type */
1572       type = TREE_TYPE (type);			/* function type */
1573 
1574       if (ix86_function_regparm (type, NULL) >= 3)
1575 	{
1576 	  /* ??? Need to count the actual number of registers to be used,
1577 	     not the possible number of registers.  Fix later.  */
1578 	  return false;
1579 	}
1580     }
1581 
1582   /* Otherwise okay.  That also includes certain types of indirect calls.  */
1583   return true;
1584 }
1585 
1586 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1587    arguments as in struct attribute_spec.handler.  */
1588 static tree
ix86_handle_cdecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1589 ix86_handle_cdecl_attribute (tree *node, tree name,
1590 			     tree args ATTRIBUTE_UNUSED,
1591 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1592 {
1593   if (TREE_CODE (*node) != FUNCTION_TYPE
1594       && TREE_CODE (*node) != METHOD_TYPE
1595       && TREE_CODE (*node) != FIELD_DECL
1596       && TREE_CODE (*node) != TYPE_DECL)
1597     {
1598       warning ("`%s' attribute only applies to functions",
1599 	       IDENTIFIER_POINTER (name));
1600       *no_add_attrs = true;
1601     }
1602   else
1603     {
1604       if (is_attribute_p ("fastcall", name))
1605         {
1606           if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1607             {
1608               error ("fastcall and stdcall attributes are not compatible");
1609             }
1610            else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1611             {
1612               error ("fastcall and regparm attributes are not compatible");
1613             }
1614         }
1615       else if (is_attribute_p ("stdcall", name))
1616         {
1617           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1618             {
1619               error ("fastcall and stdcall attributes are not compatible");
1620             }
1621         }
1622     }
1623 
1624   if (TARGET_64BIT)
1625     {
1626       warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1627       *no_add_attrs = true;
1628     }
1629 
1630   return NULL_TREE;
1631 }
1632 
1633 /* Handle a "regparm" attribute;
1634    arguments as in struct attribute_spec.handler.  */
1635 static tree
ix86_handle_regparm_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1636 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1637 			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1638 {
1639   if (TREE_CODE (*node) != FUNCTION_TYPE
1640       && TREE_CODE (*node) != METHOD_TYPE
1641       && TREE_CODE (*node) != FIELD_DECL
1642       && TREE_CODE (*node) != TYPE_DECL)
1643     {
1644       warning ("`%s' attribute only applies to functions",
1645 	       IDENTIFIER_POINTER (name));
1646       *no_add_attrs = true;
1647     }
1648   else
1649     {
1650       tree cst;
1651 
1652       cst = TREE_VALUE (args);
1653       if (TREE_CODE (cst) != INTEGER_CST)
1654 	{
1655 	  warning ("`%s' attribute requires an integer constant argument",
1656 		   IDENTIFIER_POINTER (name));
1657 	  *no_add_attrs = true;
1658 	}
1659       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1660 	{
1661 	  warning ("argument to `%s' attribute larger than %d",
1662 		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1663 	  *no_add_attrs = true;
1664 	}
1665 
1666       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1667 	{
1668 	  error ("fastcall and regparm attributes are not compatible");
1669 	}
1670     }
1671 
1672   return NULL_TREE;
1673 }
1674 
1675 /* Return 0 if the attributes for two types are incompatible, 1 if they
1676    are compatible, and 2 if they are nearly compatible (which causes a
1677    warning to be generated).  */
1678 
1679 static int
ix86_comp_type_attributes(tree type1,tree type2)1680 ix86_comp_type_attributes (tree type1, tree type2)
1681 {
1682   /* Check for mismatch of non-default calling convention.  */
1683   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1684 
1685   if (TREE_CODE (type1) != FUNCTION_TYPE)
1686     return 1;
1687 
1688   /*  Check for mismatched fastcall types */
1689   if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1690       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1691     return 0;
1692 
1693   /* Check for mismatched return types (cdecl vs stdcall).  */
1694   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1695       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1696     return 0;
1697   if (ix86_function_regparm (type1, NULL)
1698       != ix86_function_regparm (type2, NULL))
1699     return 0;
1700   return 1;
1701 }
1702 
1703 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1704    DECL may be NULL when calling function indirectly
1705    or considering a libcall.  */
1706 
1707 static int
ix86_function_regparm(tree type,tree decl)1708 ix86_function_regparm (tree type, tree decl)
1709 {
1710   tree attr;
1711   int regparm = ix86_regparm;
1712   bool user_convention = false;
1713 
1714   if (!TARGET_64BIT)
1715     {
1716       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1717       if (attr)
1718 	{
1719 	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1720 	  user_convention = true;
1721 	}
1722 
1723       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1724 	{
1725 	  regparm = 2;
1726 	  user_convention = true;
1727 	}
1728 
1729       /* Use register calling convention for local functions when possible.  */
1730       if (!TARGET_64BIT && !user_convention && decl
1731 	  && flag_unit_at_a_time && !profile_flag)
1732 	{
1733 	  struct cgraph_local_info *i = cgraph_local_info (decl);
1734 	  if (i && i->local)
1735 	    {
1736 	      /* We can't use regparm(3) for nested functions as these use
1737 		 static chain pointer in third argument.  */
1738 	      if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1739 		regparm = 2;
1740 	      else
1741 		regparm = 3;
1742 	    }
1743 	}
1744     }
1745   return regparm;
1746 }
1747 
1748 /* Return true if EAX is live at the start of the function.  Used by
1749    ix86_expand_prologue to determine if we need special help before
1750    calling allocate_stack_worker.  */
1751 
1752 static bool
ix86_eax_live_at_start_p(void)1753 ix86_eax_live_at_start_p (void)
1754 {
1755   /* Cheat.  Don't bother working forward from ix86_function_regparm
1756      to the function type to whether an actual argument is located in
1757      eax.  Instead just look at cfg info, which is still close enough
1758      to correct at this point.  This gives false positives for broken
1759      functions that might use uninitialized data that happens to be
1760      allocated in eax, but who cares?  */
1761   return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1762 }
1763 
1764 /* Value is the number of bytes of arguments automatically
1765    popped when returning from a subroutine call.
1766    FUNDECL is the declaration node of the function (as a tree),
1767    FUNTYPE is the data type of the function (as a tree),
1768    or for a library call it is an identifier node for the subroutine name.
1769    SIZE is the number of bytes of arguments passed on the stack.
1770 
1771    On the 80386, the RTD insn may be used to pop them if the number
1772      of args is fixed, but if the number is variable then the caller
1773      must pop them all.  RTD can't be used for library calls now
1774      because the library is compiled with the Unix compiler.
1775    Use of RTD is a selectable option, since it is incompatible with
1776    standard Unix calling sequences.  If the option is not selected,
1777    the caller must always pop the args.
1778 
1779    The attribute stdcall is equivalent to RTD on a per module basis.  */
1780 
1781 int
ix86_return_pops_args(tree fundecl,tree funtype,int size)1782 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1783 {
1784   int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1785 
1786   /* Cdecl functions override -mrtd, and never pop the stack.  */
1787   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1788 
1789     /* Stdcall and fastcall functions will pop the stack if not
1790        variable args.  */
1791     if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1792         || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1793       rtd = 1;
1794 
1795     if (rtd
1796         && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1797 	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1798 		== void_type_node)))
1799       return size;
1800   }
1801 
1802   /* Lose any fake structure return argument if it is passed on the stack.  */
1803   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1804       && !TARGET_64BIT)
1805     {
1806       int nregs = ix86_function_regparm (funtype, fundecl);
1807 
1808       if (!nregs)
1809 	return GET_MODE_SIZE (Pmode);
1810     }
1811 
1812   return 0;
1813 }
1814 
1815 /* Argument support functions.  */
1816 
1817 /* Return true when register may be used to pass function parameters.  */
1818 bool
ix86_function_arg_regno_p(int regno)1819 ix86_function_arg_regno_p (int regno)
1820 {
1821   int i;
1822   if (!TARGET_64BIT)
1823     return (regno < REGPARM_MAX
1824 	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1825   if (SSE_REGNO_P (regno) && TARGET_SSE)
1826     return true;
1827   /* RAX is used as hidden argument to va_arg functions.  */
1828   if (!regno)
1829     return true;
1830   for (i = 0; i < REGPARM_MAX; i++)
1831     if (regno == x86_64_int_parameter_registers[i])
1832       return true;
1833   return false;
1834 }
1835 
1836 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1837    for a call to a function whose data type is FNTYPE.
1838    For a library call, FNTYPE is 0.  */
1839 
1840 void
init_cumulative_args(CUMULATIVE_ARGS * cum,tree fntype,rtx libname,tree fndecl)1841 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
1842 		      tree fntype,	/* tree ptr for function decl */
1843 		      rtx libname,	/* SYMBOL_REF of library name or 0 */
1844 		      tree fndecl)
1845 {
1846   static CUMULATIVE_ARGS zero_cum;
1847   tree param, next_param;
1848 
1849   if (TARGET_DEBUG_ARG)
1850     {
1851       fprintf (stderr, "\ninit_cumulative_args (");
1852       if (fntype)
1853 	fprintf (stderr, "fntype code = %s, ret code = %s",
1854 		 tree_code_name[(int) TREE_CODE (fntype)],
1855 		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1856       else
1857 	fprintf (stderr, "no fntype");
1858 
1859       if (libname)
1860 	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1861     }
1862 
1863   *cum = zero_cum;
1864 
1865   /* Set up the number of registers to use for passing arguments.  */
1866   if (fntype)
1867     cum->nregs = ix86_function_regparm (fntype, fndecl);
1868   else
1869     cum->nregs = ix86_regparm;
1870   cum->sse_nregs = SSE_REGPARM_MAX;
1871   cum->mmx_nregs = MMX_REGPARM_MAX;
1872   cum->warn_sse = true;
1873   cum->warn_mmx = true;
1874   cum->maybe_vaarg = false;
1875 
1876   /* Use ecx and edx registers if function has fastcall attribute */
1877   if (fntype && !TARGET_64BIT)
1878     {
1879       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1880 	{
1881 	  cum->nregs = 2;
1882 	  cum->fastcall = 1;
1883 	}
1884     }
1885 
1886 
1887   /* Determine if this function has variable arguments.  This is
1888      indicated by the last argument being 'void_type_mode' if there
1889      are no variable arguments.  If there are variable arguments, then
1890      we won't pass anything in registers */
1891 
1892   if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1893     {
1894       for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1895 	   param != 0; param = next_param)
1896 	{
1897 	  next_param = TREE_CHAIN (param);
1898 	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1899 	    {
1900 	      if (!TARGET_64BIT)
1901 		{
1902 		  cum->nregs = 0;
1903 		  cum->sse_nregs = 0;
1904 		  cum->mmx_nregs = 0;
1905 		  cum->warn_sse = 0;
1906 		  cum->warn_mmx = 0;
1907 		  cum->fastcall = 0;
1908 		}
1909 	      cum->maybe_vaarg = true;
1910 	    }
1911 	}
1912     }
1913   if ((!fntype && !libname)
1914       || (fntype && !TYPE_ARG_TYPES (fntype)))
1915     cum->maybe_vaarg = 1;
1916 
1917   if (TARGET_DEBUG_ARG)
1918     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1919 
1920   return;
1921 }
1922 
1923 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
1924    of this code is to classify each 8bytes of incoming argument by the register
1925    class and assign registers accordingly.  */
1926 
1927 /* Return the union class of CLASS1 and CLASS2.
1928    See the x86-64 PS ABI for details.  */
1929 
1930 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)1931 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1932 {
1933   /* Rule #1: If both classes are equal, this is the resulting class.  */
1934   if (class1 == class2)
1935     return class1;
1936 
1937   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1938      the other class.  */
1939   if (class1 == X86_64_NO_CLASS)
1940     return class2;
1941   if (class2 == X86_64_NO_CLASS)
1942     return class1;
1943 
1944   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1945   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1946     return X86_64_MEMORY_CLASS;
1947 
1948   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1949   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1950       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1951     return X86_64_INTEGERSI_CLASS;
1952   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1953       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1954     return X86_64_INTEGER_CLASS;
1955 
1956   /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1957   if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1958       || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1959     return X86_64_MEMORY_CLASS;
1960 
1961   /* Rule #6: Otherwise class SSE is used.  */
1962   return X86_64_SSE_CLASS;
1963 }
1964 
1965 /* Classify the argument of type TYPE and mode MODE.
1966    CLASSES will be filled by the register class used to pass each word
1967    of the operand.  The number of words is returned.  In case the parameter
1968    should be passed in memory, 0 is returned. As a special case for zero
1969    sized containers, classes[0] will be NO_CLASS and 1 is returned.
1970 
1971    BIT_OFFSET is used internally for handling records and specifies offset
1972    of the offset in bits modulo 256 to avoid overflow cases.
1973 
1974    See the x86-64 PS ABI for details.
1975 */
1976 
1977 static int
classify_argument(enum machine_mode mode,tree type,enum x86_64_reg_class classes[MAX_CLASSES],int bit_offset)1978 classify_argument (enum machine_mode mode, tree type,
1979 		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1980 {
1981   HOST_WIDE_INT bytes =
1982     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1983   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1984 
1985   /* Variable sized entities are always passed/returned in memory.  */
1986   if (bytes < 0)
1987     return 0;
1988 
1989   if (mode != VOIDmode
1990       && MUST_PASS_IN_STACK (mode, type))
1991     return 0;
1992 
1993   if (type && AGGREGATE_TYPE_P (type))
1994     {
1995       int i;
1996       tree field;
1997       enum x86_64_reg_class subclasses[MAX_CLASSES];
1998 
1999       /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2000       if (bytes > 16)
2001 	return 0;
2002 
2003       for (i = 0; i < words; i++)
2004 	classes[i] = X86_64_NO_CLASS;
2005 
2006       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2007 	 signalize memory class, so handle it as special case.  */
2008       if (!words)
2009 	{
2010 	  classes[0] = X86_64_NO_CLASS;
2011 	  return 1;
2012 	}
2013 
2014       /* Classify each field of record and merge classes.  */
2015       if (TREE_CODE (type) == RECORD_TYPE)
2016 	{
2017 	  /* For classes first merge in the field of the subclasses.  */
2018 	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2019 	    {
2020 	      tree bases = TYPE_BINFO_BASETYPES (type);
2021 	      int n_bases = TREE_VEC_LENGTH (bases);
2022 	      int i;
2023 
2024 	      for (i = 0; i < n_bases; ++i)
2025 		{
2026 		   tree binfo = TREE_VEC_ELT (bases, i);
2027 		   int num;
2028 		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2029 		   tree type = BINFO_TYPE (binfo);
2030 
2031 		   num = classify_argument (TYPE_MODE (type),
2032 					    type, subclasses,
2033 					    (offset + bit_offset) % 256);
2034 		   if (!num)
2035 		     return 0;
2036 		   for (i = 0; i < num; i++)
2037 		     {
2038 		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2039 		       classes[i + pos] =
2040 			 merge_classes (subclasses[i], classes[i + pos]);
2041 		     }
2042 		}
2043 	    }
2044 	  /* And now merge the fields of structure.  */
2045 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2046 	    {
2047 	      if (TREE_CODE (field) == FIELD_DECL)
2048 		{
2049 		  int num;
2050 
2051 		  /* Bitfields are always classified as integer.  Handle them
2052 		     early, since later code would consider them to be
2053 		     misaligned integers.  */
2054 		  if (DECL_BIT_FIELD (field))
2055 		    {
2056 		      for (i = int_bit_position (field) / 8 / 8;
2057 			   i < (int_bit_position (field)
2058 			        + tree_low_cst (DECL_SIZE (field), 0)
2059 				+ 63) / 8 / 8; i++)
2060 			classes[i] =
2061 			  merge_classes (X86_64_INTEGER_CLASS,
2062 					 classes[i]);
2063 		    }
2064 		  else
2065 		    {
2066 		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2067 					       TREE_TYPE (field), subclasses,
2068 					       (int_bit_position (field)
2069 						+ bit_offset) % 256);
2070 		      if (!num)
2071 			return 0;
2072 		      for (i = 0; i < num; i++)
2073 			{
2074 			  int pos =
2075 			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2076 			  classes[i + pos] =
2077 			    merge_classes (subclasses[i], classes[i + pos]);
2078 			}
2079 		    }
2080 		}
2081 	    }
2082 	}
2083       /* Arrays are handled as small records.  */
2084       else if (TREE_CODE (type) == ARRAY_TYPE)
2085 	{
2086 	  int num;
2087 	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2088 				   TREE_TYPE (type), subclasses, bit_offset);
2089 	  if (!num)
2090 	    return 0;
2091 
2092 	  /* The partial classes are now full classes.  */
2093 	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2094 	    subclasses[0] = X86_64_SSE_CLASS;
2095 	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2096 	    subclasses[0] = X86_64_INTEGER_CLASS;
2097 
2098 	  for (i = 0; i < words; i++)
2099 	    classes[i] = subclasses[i % num];
2100 	}
2101       /* Unions are similar to RECORD_TYPE but offset is always 0.  */
2102       else if (TREE_CODE (type) == UNION_TYPE
2103 	       || TREE_CODE (type) == QUAL_UNION_TYPE)
2104 	{
2105 	  /* For classes first merge in the field of the subclasses.  */
2106 	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2107 	    {
2108 	      tree bases = TYPE_BINFO_BASETYPES (type);
2109 	      int n_bases = TREE_VEC_LENGTH (bases);
2110 	      int i;
2111 
2112 	      for (i = 0; i < n_bases; ++i)
2113 		{
2114 		   tree binfo = TREE_VEC_ELT (bases, i);
2115 		   int num;
2116 		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2117 		   tree type = BINFO_TYPE (binfo);
2118 
2119 		   num = classify_argument (TYPE_MODE (type),
2120 					    type, subclasses,
2121 					    (offset + (bit_offset % 64)) % 256);
2122 		   if (!num)
2123 		     return 0;
2124 		   for (i = 0; i < num; i++)
2125 		     {
2126 		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2127 		       classes[i + pos] =
2128 			 merge_classes (subclasses[i], classes[i + pos]);
2129 		     }
2130 		}
2131 	    }
2132 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2133 	    {
2134 	      if (TREE_CODE (field) == FIELD_DECL)
2135 		{
2136 		  int num;
2137 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2138 					   TREE_TYPE (field), subclasses,
2139 					   bit_offset);
2140 		  if (!num)
2141 		    return 0;
2142 		  for (i = 0; i < num; i++)
2143 		    classes[i] = merge_classes (subclasses[i], classes[i]);
2144 		}
2145 	    }
2146 	}
2147       else if (TREE_CODE (type) == SET_TYPE)
2148 	{
2149 	  if (bytes <= 4)
2150 	    {
2151 	      classes[0] = X86_64_INTEGERSI_CLASS;
2152 	      return 1;
2153 	    }
2154 	  else if (bytes <= 8)
2155 	    {
2156 	      classes[0] = X86_64_INTEGER_CLASS;
2157 	      return 1;
2158 	    }
2159 	  else if (bytes <= 12)
2160 	    {
2161 	      classes[0] = X86_64_INTEGER_CLASS;
2162 	      classes[1] = X86_64_INTEGERSI_CLASS;
2163 	      return 2;
2164 	    }
2165 	  else
2166 	    {
2167 	      classes[0] = X86_64_INTEGER_CLASS;
2168 	      classes[1] = X86_64_INTEGER_CLASS;
2169 	      return 2;
2170 	    }
2171 	}
2172       else
2173 	abort ();
2174 
2175       /* Final merger cleanup.  */
2176       for (i = 0; i < words; i++)
2177 	{
2178 	  /* If one class is MEMORY, everything should be passed in
2179 	     memory.  */
2180 	  if (classes[i] == X86_64_MEMORY_CLASS)
2181 	    return 0;
2182 
2183 	  /* The X86_64_SSEUP_CLASS should be always preceded by
2184 	     X86_64_SSE_CLASS.  */
2185 	  if (classes[i] == X86_64_SSEUP_CLASS
2186 	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2187 	    classes[i] = X86_64_SSE_CLASS;
2188 
2189 	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
2190 	  if (classes[i] == X86_64_X87UP_CLASS
2191 	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2192 	    classes[i] = X86_64_SSE_CLASS;
2193 	}
2194       return words;
2195     }
2196 
2197   /* Compute alignment needed.  We align all types to natural boundaries with
2198      exception of XFmode that is aligned to 64bits.  */
2199   if (mode != VOIDmode && mode != BLKmode)
2200     {
2201       int mode_alignment = GET_MODE_BITSIZE (mode);
2202 
2203       if (mode == XFmode)
2204 	mode_alignment = 128;
2205       else if (mode == XCmode)
2206 	mode_alignment = 256;
2207       if (COMPLEX_MODE_P (mode))
2208 	mode_alignment /= 2;
2209       /* Misaligned fields are always returned in memory.  */
2210       if (bit_offset % mode_alignment)
2211 	return 0;
2212     }
2213 
2214   /* Classification of atomic types.  */
2215   switch (mode)
2216     {
2217     case DImode:
2218     case SImode:
2219     case HImode:
2220     case QImode:
2221     case CSImode:
2222     case CHImode:
2223     case CQImode:
2224       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2225 	classes[0] = X86_64_INTEGERSI_CLASS;
2226       else
2227 	classes[0] = X86_64_INTEGER_CLASS;
2228       return 1;
2229     case CDImode:
2230     case TImode:
2231       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2232       return 2;
2233     case CTImode:
2234       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2235       classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2236       return 4;
2237     case SFmode:
2238       if (!(bit_offset % 64))
2239 	classes[0] = X86_64_SSESF_CLASS;
2240       else
2241 	classes[0] = X86_64_SSE_CLASS;
2242       return 1;
2243     case DFmode:
2244       classes[0] = X86_64_SSEDF_CLASS;
2245       return 1;
2246     case XFmode:
2247       classes[0] = X86_64_X87_CLASS;
2248       classes[1] = X86_64_X87UP_CLASS;
2249       return 2;
2250     case TFmode:
2251     case TCmode:
2252       return 0;
2253     case XCmode:
2254       classes[0] = X86_64_X87_CLASS;
2255       classes[1] = X86_64_X87UP_CLASS;
2256       classes[2] = X86_64_X87_CLASS;
2257       classes[3] = X86_64_X87UP_CLASS;
2258       return 4;
2259     case DCmode:
2260       classes[0] = X86_64_SSEDF_CLASS;
2261       classes[1] = X86_64_SSEDF_CLASS;
2262       return 2;
2263     case SCmode:
2264       classes[0] = X86_64_SSE_CLASS;
2265       return 1;
2266     case V4SFmode:
2267     case V4SImode:
2268     case V16QImode:
2269     case V8HImode:
2270     case V2DFmode:
2271     case V2DImode:
2272       classes[0] = X86_64_SSE_CLASS;
2273       classes[1] = X86_64_SSEUP_CLASS;
2274       return 2;
2275     case V2SFmode:
2276     case V2SImode:
2277     case V4HImode:
2278     case V8QImode:
2279       return 0;
2280     case BLKmode:
2281     case VOIDmode:
2282       return 0;
2283     default:
2284       abort ();
2285     }
2286 }
2287 
2288 /* Examine the argument and return set number of register required in each
2289    class.  Return 0 iff parameter should be passed in memory.  */
2290 static int
examine_argument(enum machine_mode mode,tree type,int in_return,int * int_nregs,int * sse_nregs)2291 examine_argument (enum machine_mode mode, tree type, int in_return,
2292 		  int *int_nregs, int *sse_nregs)
2293 {
2294   enum x86_64_reg_class class[MAX_CLASSES];
2295   int n = classify_argument (mode, type, class, 0);
2296 
2297   *int_nregs = 0;
2298   *sse_nregs = 0;
2299   if (!n)
2300     return 0;
2301   for (n--; n >= 0; n--)
2302     switch (class[n])
2303       {
2304       case X86_64_INTEGER_CLASS:
2305       case X86_64_INTEGERSI_CLASS:
2306 	(*int_nregs)++;
2307 	break;
2308       case X86_64_SSE_CLASS:
2309       case X86_64_SSESF_CLASS:
2310       case X86_64_SSEDF_CLASS:
2311 	(*sse_nregs)++;
2312 	break;
2313       case X86_64_NO_CLASS:
2314       case X86_64_SSEUP_CLASS:
2315 	break;
2316       case X86_64_X87_CLASS:
2317       case X86_64_X87UP_CLASS:
2318 	if (!in_return)
2319 	  return 0;
2320 	break;
2321       case X86_64_MEMORY_CLASS:
2322 	abort ();
2323       }
2324   return 1;
2325 }
2326 /* Construct container for the argument used by GCC interface.  See
2327    FUNCTION_ARG for the detailed description.  */
2328 static rtx
construct_container(enum machine_mode mode,tree type,int in_return,int nintregs,int nsseregs,const int * intreg,int sse_regno)2329 construct_container (enum machine_mode mode, tree type, int in_return,
2330 		     int nintregs, int nsseregs, const int * intreg,
2331 		     int sse_regno)
2332 {
2333   enum machine_mode tmpmode;
2334   int bytes =
2335     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2336   enum x86_64_reg_class class[MAX_CLASSES];
2337   int n;
2338   int i;
2339   int nexps = 0;
2340   int needed_sseregs, needed_intregs;
2341   rtx exp[MAX_CLASSES];
2342   rtx ret;
2343 
2344   n = classify_argument (mode, type, class, 0);
2345   if (TARGET_DEBUG_ARG)
2346     {
2347       if (!n)
2348 	fprintf (stderr, "Memory class\n");
2349       else
2350 	{
2351 	  fprintf (stderr, "Classes:");
2352 	  for (i = 0; i < n; i++)
2353 	    {
2354 	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2355 	    }
2356 	   fprintf (stderr, "\n");
2357 	}
2358     }
2359   if (!n)
2360     return NULL;
2361   if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2362     return NULL;
2363   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2364     return NULL;
2365 
2366   /* First construct simple cases.  Avoid SCmode, since we want to use
2367      single register to pass this type.  */
2368   if (n == 1 && mode != SCmode)
2369     switch (class[0])
2370       {
2371       case X86_64_INTEGER_CLASS:
2372       case X86_64_INTEGERSI_CLASS:
2373 	return gen_rtx_REG (mode, intreg[0]);
2374       case X86_64_SSE_CLASS:
2375       case X86_64_SSESF_CLASS:
2376       case X86_64_SSEDF_CLASS:
2377 	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2378       case X86_64_X87_CLASS:
2379 	return gen_rtx_REG (mode, FIRST_STACK_REG);
2380       case X86_64_NO_CLASS:
2381 	/* Zero sized array, struct or class.  */
2382 	return NULL;
2383       default:
2384 	abort ();
2385       }
2386   if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2387       && mode != BLKmode)
2388     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2389   if (n == 2
2390       && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2391     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2392   if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2393       && class[1] == X86_64_INTEGER_CLASS
2394       && (mode == CDImode || mode == TImode || mode == TFmode)
2395       && intreg[0] + 1 == intreg[1])
2396     return gen_rtx_REG (mode, intreg[0]);
2397   if (n == 4
2398       && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2399       && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2400       && mode != BLKmode)
2401     return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2402 
2403   /* Otherwise figure out the entries of the PARALLEL.  */
2404   for (i = 0; i < n; i++)
2405     {
2406       switch (class[i])
2407         {
2408 	  case X86_64_NO_CLASS:
2409 	    break;
2410 	  case X86_64_INTEGER_CLASS:
2411 	  case X86_64_INTEGERSI_CLASS:
2412 	    /* Merge TImodes on aligned occasions here too.  */
2413 	    if (i * 8 + 8 > bytes)
2414 	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2415 	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2416 	      tmpmode = SImode;
2417 	    else
2418 	      tmpmode = DImode;
2419 	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2420 	    if (tmpmode == BLKmode)
2421 	      tmpmode = DImode;
2422 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2423 					       gen_rtx_REG (tmpmode, *intreg),
2424 					       GEN_INT (i*8));
2425 	    intreg++;
2426 	    break;
2427 	  case X86_64_SSESF_CLASS:
2428 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2429 					       gen_rtx_REG (SFmode,
2430 							    SSE_REGNO (sse_regno)),
2431 					       GEN_INT (i*8));
2432 	    sse_regno++;
2433 	    break;
2434 	  case X86_64_SSEDF_CLASS:
2435 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2436 					       gen_rtx_REG (DFmode,
2437 							    SSE_REGNO (sse_regno)),
2438 					       GEN_INT (i*8));
2439 	    sse_regno++;
2440 	    break;
2441 	  case X86_64_SSE_CLASS:
2442 	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2443 	      tmpmode = TImode;
2444 	    else
2445 	      tmpmode = DImode;
2446 	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447 					       gen_rtx_REG (tmpmode,
2448 							    SSE_REGNO (sse_regno)),
2449 					       GEN_INT (i*8));
2450 	    if (tmpmode == TImode)
2451 	      i++;
2452 	    sse_regno++;
2453 	    break;
2454 	  default:
2455 	    abort ();
2456 	}
2457     }
2458   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2459   for (i = 0; i < nexps; i++)
2460     XVECEXP (ret, 0, i) = exp [i];
2461   return ret;
2462 }
2463 
2464 /* Update the data in CUM to advance over an argument
2465    of mode MODE and data type TYPE.
2466    (TYPE is null for libcalls where that information may not be available.)  */
2467 
2468 void
function_arg_advance(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named)2469 function_arg_advance (CUMULATIVE_ARGS *cum,	/* current arg information */
2470 		      enum machine_mode mode,	/* current arg mode */
2471 		      tree type,	/* type of the argument or 0 if lib support */
2472 		      int named)	/* whether or not the argument was named */
2473 {
2474   int bytes =
2475     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2476   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2477 
2478   if (TARGET_DEBUG_ARG)
2479     fprintf (stderr,
2480 	     "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2481 	     words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2482   if (TARGET_64BIT)
2483     {
2484       int int_nregs, sse_nregs;
2485       if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2486 	cum->words += words;
2487       else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2488 	{
2489 	  cum->nregs -= int_nregs;
2490 	  cum->sse_nregs -= sse_nregs;
2491 	  cum->regno += int_nregs;
2492 	  cum->sse_regno += sse_nregs;
2493 	}
2494       else
2495 	cum->words += words;
2496     }
2497   else
2498     {
2499       if (TARGET_SSE && SSE_REG_MODE_P (mode)
2500 	  && (!type || !AGGREGATE_TYPE_P (type)))
2501 	{
2502 	  cum->sse_words += words;
2503 	  cum->sse_nregs -= 1;
2504 	  cum->sse_regno += 1;
2505 	  if (cum->sse_nregs <= 0)
2506 	    {
2507 	      cum->sse_nregs = 0;
2508 	      cum->sse_regno = 0;
2509 	    }
2510 	}
2511       else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2512 	       && (!type || !AGGREGATE_TYPE_P (type)))
2513 	{
2514 	  cum->mmx_words += words;
2515 	  cum->mmx_nregs -= 1;
2516 	  cum->mmx_regno += 1;
2517 	  if (cum->mmx_nregs <= 0)
2518 	    {
2519 	      cum->mmx_nregs = 0;
2520 	      cum->mmx_regno = 0;
2521 	    }
2522 	}
2523       else
2524 	{
2525 	  cum->words += words;
2526 	  cum->nregs -= words;
2527 	  cum->regno += words;
2528 
2529 	  if (cum->nregs <= 0)
2530 	    {
2531 	      cum->nregs = 0;
2532 	      cum->regno = 0;
2533 	    }
2534 	}
2535     }
2536   return;
2537 }
2538 
2539 /* Define where to put the arguments to a function.
2540    Value is zero to push the argument on the stack,
2541    or a hard register in which to store the argument.
2542 
2543    MODE is the argument's machine mode.
2544    TYPE is the data type of the argument (as a tree).
2545     This is null for libcalls where that information may
2546     not be available.
2547    CUM is a variable of type CUMULATIVE_ARGS which gives info about
2548     the preceding args and about the function being called.
2549    NAMED is nonzero if this argument is a named parameter
2550     (otherwise it is an extra parameter matching an ellipsis).  */
2551 
2552 rtx
function_arg(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named)2553 function_arg (CUMULATIVE_ARGS *cum,	/* current arg information */
2554 	      enum machine_mode mode,	/* current arg mode */
2555 	      tree type,	/* type of the argument or 0 if lib support */
2556 	      int named)	/* != 0 for normal args, == 0 for ...  args */
2557 {
2558   rtx ret   = NULL_RTX;
2559   int bytes =
2560     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2561   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2562   static bool warnedsse, warnedmmx;
2563 
2564   /* Handle a hidden AL argument containing number of registers for varargs
2565      x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2566      any AL settings.  */
2567   if (mode == VOIDmode)
2568     {
2569       if (TARGET_64BIT)
2570 	return GEN_INT (cum->maybe_vaarg
2571 			? (cum->sse_nregs < 0
2572 			   ? SSE_REGPARM_MAX
2573 			   : cum->sse_regno)
2574 			: -1);
2575       else
2576 	return constm1_rtx;
2577     }
2578   if (TARGET_64BIT)
2579     ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2580 			       &x86_64_int_parameter_registers [cum->regno],
2581 			       cum->sse_regno);
2582   else
2583     switch (mode)
2584       {
2585 	/* For now, pass fp/complex values on the stack.  */
2586       default:
2587 	break;
2588 
2589       case BLKmode:
2590 	if (bytes < 0)
2591 	  break;
2592 	/* FALLTHRU */
2593       case DImode:
2594       case SImode:
2595       case HImode:
2596       case QImode:
2597 	if (words <= cum->nregs)
2598 	  {
2599 	    int regno = cum->regno;
2600 
2601 	    /* Fastcall allocates the first two DWORD (SImode) or
2602 	       smaller arguments to ECX and EDX.  */
2603 	    if (cum->fastcall)
2604 	      {
2605 	        if (mode == BLKmode || mode == DImode)
2606 	          break;
2607 
2608 	        /* ECX not EAX is the first allocated register.  */
2609 	        if (regno == 0)
2610 		  regno = 2;
2611 	      }
2612 	    ret = gen_rtx_REG (mode, regno);
2613 	  }
2614 	break;
2615       case TImode:
2616       case V16QImode:
2617       case V8HImode:
2618       case V4SImode:
2619       case V2DImode:
2620       case V4SFmode:
2621       case V2DFmode:
2622 	if (!type || !AGGREGATE_TYPE_P (type))
2623 	  {
2624 	    if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2625 	      {
2626 		warnedsse = true;
2627 		warning ("SSE vector argument without SSE enabled "
2628 			 "changes the ABI");
2629 	      }
2630 	    if (cum->sse_nregs)
2631 	      ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2632 	  }
2633 	break;
2634       case V8QImode:
2635       case V4HImode:
2636       case V2SImode:
2637       case V2SFmode:
2638 	if (!type || !AGGREGATE_TYPE_P (type))
2639 	  {
2640 	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2641 	      {
2642 		warnedmmx = true;
2643 		warning ("MMX vector argument without MMX enabled "
2644 			 "changes the ABI");
2645 	      }
2646 	    if (cum->mmx_nregs)
2647 	      ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2648 	  }
2649 	break;
2650       }
2651 
2652   if (TARGET_DEBUG_ARG)
2653     {
2654       fprintf (stderr,
2655 	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2656 	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2657 
2658       if (ret)
2659 	print_simple_rtl (stderr, ret);
2660       else
2661 	fprintf (stderr, ", stack");
2662 
2663       fprintf (stderr, " )\n");
2664     }
2665 
2666   return ret;
2667 }
2668 
2669 /* A C expression that indicates when an argument must be passed by
2670    reference.  If nonzero for an argument, a copy of that argument is
2671    made in memory and a pointer to the argument is passed instead of
2672    the argument itself.  The pointer is passed in whatever way is
2673    appropriate for passing a pointer to that type.  */
2674 
2675 int
function_arg_pass_by_reference(CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,tree type,int named ATTRIBUTE_UNUSED)2676 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2677 				enum machine_mode mode ATTRIBUTE_UNUSED,
2678 				tree type, int named ATTRIBUTE_UNUSED)
2679 {
2680   if (!TARGET_64BIT)
2681     return 0;
2682 
2683   if (type && int_size_in_bytes (type) == -1)
2684     {
2685       if (TARGET_DEBUG_ARG)
2686 	fprintf (stderr, "function_arg_pass_by_reference\n");
2687       return 1;
2688     }
2689 
2690   return 0;
2691 }
2692 
2693 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2694    ABI  */
2695 static bool
contains_128bit_aligned_vector_p(tree type)2696 contains_128bit_aligned_vector_p (tree type)
2697 {
2698   enum machine_mode mode = TYPE_MODE (type);
2699   if (SSE_REG_MODE_P (mode)
2700       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2701     return true;
2702   if (TYPE_ALIGN (type) < 128)
2703     return false;
2704 
2705   if (AGGREGATE_TYPE_P (type))
2706     {
2707       /* Walk the aggregates recursively.  */
2708       if (TREE_CODE (type) == RECORD_TYPE
2709 	  || TREE_CODE (type) == UNION_TYPE
2710 	  || TREE_CODE (type) == QUAL_UNION_TYPE)
2711 	{
2712 	  tree field;
2713 
2714 	  if (TYPE_BINFO (type) != NULL
2715 	      && TYPE_BINFO_BASETYPES (type) != NULL)
2716 	    {
2717 	      tree bases = TYPE_BINFO_BASETYPES (type);
2718 	      int n_bases = TREE_VEC_LENGTH (bases);
2719 	      int i;
2720 
2721 	      for (i = 0; i < n_bases; ++i)
2722 		{
2723 		  tree binfo = TREE_VEC_ELT (bases, i);
2724 		  tree type = BINFO_TYPE (binfo);
2725 
2726 		  if (contains_128bit_aligned_vector_p (type))
2727 		    return true;
2728 		}
2729 	    }
2730 	  /* And now merge the fields of structure.  */
2731 	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2732 	    {
2733 	      if (TREE_CODE (field) == FIELD_DECL
2734 		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2735 		return true;
2736 	    }
2737 	}
2738       /* Just for use if some languages passes arrays by value.  */
2739       else if (TREE_CODE (type) == ARRAY_TYPE)
2740 	{
2741 	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2742 	    return true;
2743 	}
2744       else
2745 	abort ();
2746     }
2747   return false;
2748 }
2749 
2750 /* Gives the alignment boundary, in bits, of an argument with the
2751    specified mode and type.  */
2752 
2753 int
ix86_function_arg_boundary(enum machine_mode mode,tree type)2754 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2755 {
2756   int align;
2757   if (type)
2758     align = TYPE_ALIGN (type);
2759   else
2760     align = GET_MODE_ALIGNMENT (mode);
2761   if (align < PARM_BOUNDARY)
2762     align = PARM_BOUNDARY;
2763   if (!TARGET_64BIT)
2764     {
2765       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
2766 	 make an exception for SSE modes since these require 128bit
2767 	 alignment.
2768 
2769 	 The handling here differs from field_alignment.  ICC aligns MMX
2770 	 arguments to 4 byte boundaries, while structure fields are aligned
2771 	 to 8 byte boundaries.  */
2772       if (!type)
2773 	{
2774 	  if (!SSE_REG_MODE_P (mode))
2775 	    align = PARM_BOUNDARY;
2776 	}
2777       else
2778 	{
2779 	  if (!contains_128bit_aligned_vector_p (type))
2780 	    align = PARM_BOUNDARY;
2781 	}
2782     }
2783   if (align > 128)
2784     align = 128;
2785   return align;
2786 }
2787 
2788 /* Return true if N is a possible register number of function value.  */
2789 bool
ix86_function_value_regno_p(int regno)2790 ix86_function_value_regno_p (int regno)
2791 {
2792   if (!TARGET_64BIT)
2793     {
2794       return ((regno) == 0
2795 	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2796 	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2797     }
2798   return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2799 	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2800 	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2801 }
2802 
2803 /* Define how to find the value returned by a function.
2804    VALTYPE is the data type of the value (as a tree).
2805    If the precise function being called is known, FUNC is its FUNCTION_DECL;
2806    otherwise, FUNC is 0.  */
2807 rtx
ix86_function_value(tree valtype)2808 ix86_function_value (tree valtype)
2809 {
2810   if (TARGET_64BIT)
2811     {
2812       rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2813 				     REGPARM_MAX, SSE_REGPARM_MAX,
2814 				     x86_64_int_return_registers, 0);
2815       /* For zero sized structures, construct_container return NULL, but we need
2816          to keep rest of compiler happy by returning meaningful value.  */
2817       if (!ret)
2818 	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2819       return ret;
2820     }
2821   else
2822     return gen_rtx_REG (TYPE_MODE (valtype),
2823 			ix86_value_regno (TYPE_MODE (valtype)));
2824 }
2825 
2826 /* Return false iff type is returned in memory.  */
2827 int
ix86_return_in_memory(tree type)2828 ix86_return_in_memory (tree type)
2829 {
2830   int needed_intregs, needed_sseregs, size;
2831   enum machine_mode mode = TYPE_MODE (type);
2832 
2833   if (TARGET_64BIT)
2834     return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2835 
2836   if (mode == BLKmode)
2837     return 1;
2838 
2839   size = int_size_in_bytes (type);
2840 
2841   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2842     return 0;
2843 
2844   if (VECTOR_MODE_P (mode) || mode == TImode)
2845     {
2846       /* User-created vectors small enough to fit in EAX.  */
2847       if (size < 8)
2848 	return 0;
2849 
2850       /* MMX/3dNow values are returned on the stack, since we've
2851 	 got to EMMS/FEMMS before returning.  */
2852       if (size == 8)
2853 	return 1;
2854 
2855       /* SSE values are returned in XMM0.  */
2856       /* ??? Except when it doesn't exist?  We have a choice of
2857 	 either (1) being abi incompatible with a -march switch,
2858 	 or (2) generating an error here.  Given no good solution,
2859 	 I think the safest thing is one warning.  The user won't
2860 	 be able to use -Werror, but....  */
2861       if (size == 16)
2862 	{
2863 	  static bool warned;
2864 
2865 	  if (TARGET_SSE)
2866 	    return 0;
2867 
2868 	  if (!warned)
2869 	    {
2870 	      warned = true;
2871 	      warning ("SSE vector return without SSE enabled "
2872 		       "changes the ABI");
2873 	    }
2874 	  return 1;
2875 	}
2876     }
2877 
2878   if (mode == XFmode)
2879     return 0;
2880 
2881   if (size > 12)
2882     return 1;
2883   return 0;
2884 }
2885 
2886 /* Define how to find the value returned by a library function
2887    assuming the value has mode MODE.  */
2888 rtx
ix86_libcall_value(enum machine_mode mode)2889 ix86_libcall_value (enum machine_mode mode)
2890 {
2891   if (TARGET_64BIT)
2892     {
2893       switch (mode)
2894 	{
2895 	case SFmode:
2896 	case SCmode:
2897 	case DFmode:
2898 	case DCmode:
2899 	  return gen_rtx_REG (mode, FIRST_SSE_REG);
2900 	case XFmode:
2901 	case XCmode:
2902 	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2903 	case TFmode:
2904 	case TCmode:
2905 	  return NULL;
2906 	default:
2907 	  return gen_rtx_REG (mode, 0);
2908 	}
2909     }
2910   else
2911     return gen_rtx_REG (mode, ix86_value_regno (mode));
2912 }
2913 
2914 /* Given a mode, return the register to use for a return value.  */
2915 
2916 static int
ix86_value_regno(enum machine_mode mode)2917 ix86_value_regno (enum machine_mode mode)
2918 {
2919   /* Floating point return values in %st(0).  */
2920   if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2921     return FIRST_FLOAT_REG;
2922   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
2923      we prevent this case when sse is not available.  */
2924   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2925     return FIRST_SSE_REG;
2926   /* Everything else in %eax.  */
2927   return 0;
2928 }
2929 
2930 /* Create the va_list data type.  */
2931 
2932 static tree
ix86_build_builtin_va_list(void)2933 ix86_build_builtin_va_list (void)
2934 {
2935   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2936 
2937   /* For i386 we use plain pointer to argument area.  */
2938   if (!TARGET_64BIT)
2939     return build_pointer_type (char_type_node);
2940 
2941   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2942   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2943 
2944   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2945 		      unsigned_type_node);
2946   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2947 		      unsigned_type_node);
2948   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2949 		      ptr_type_node);
2950   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2951 		      ptr_type_node);
2952 
2953   DECL_FIELD_CONTEXT (f_gpr) = record;
2954   DECL_FIELD_CONTEXT (f_fpr) = record;
2955   DECL_FIELD_CONTEXT (f_ovf) = record;
2956   DECL_FIELD_CONTEXT (f_sav) = record;
2957 
2958   TREE_CHAIN (record) = type_decl;
2959   TYPE_NAME (record) = type_decl;
2960   TYPE_FIELDS (record) = f_gpr;
2961   TREE_CHAIN (f_gpr) = f_fpr;
2962   TREE_CHAIN (f_fpr) = f_ovf;
2963   TREE_CHAIN (f_ovf) = f_sav;
2964 
2965   layout_type (record);
2966 
2967   /* The correct type is an array type of one element.  */
2968   return build_array_type (record, build_index_type (size_zero_node));
2969 }
2970 
2971 /* Perform any needed actions needed for a function that is receiving a
2972    variable number of arguments.
2973 
2974    CUM is as above.
2975 
2976    MODE and TYPE are the mode and type of the current parameter.
2977 
2978    PRETEND_SIZE is a variable that should be set to the amount of stack
2979    that must be pushed by the prolog to pretend that our caller pushed
2980    it.
2981 
2982    Normally, this macro will push all remaining incoming registers on the
2983    stack and set PRETEND_SIZE to the length of the registers pushed.  */
2984 
2985 void
ix86_setup_incoming_varargs(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)2986 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2987 			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
2988 			     int no_rtl)
2989 {
2990   CUMULATIVE_ARGS next_cum;
2991   rtx save_area = NULL_RTX, mem;
2992   rtx label;
2993   rtx label_ref;
2994   rtx tmp_reg;
2995   rtx nsse_reg;
2996   int set;
2997   tree fntype;
2998   int stdarg_p;
2999   int i;
3000 
3001   if (!TARGET_64BIT)
3002     return;
3003 
3004   /* Indicate to allocate space on the stack for varargs save area.  */
3005   ix86_save_varrargs_registers = 1;
3006 
3007   cfun->stack_alignment_needed = 128;
3008 
3009   fntype = TREE_TYPE (current_function_decl);
3010   stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3011 	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3012 		  != void_type_node));
3013 
3014   /* For varargs, we do not want to skip the dummy va_dcl argument.
3015      For stdargs, we do want to skip the last named argument.  */
3016   next_cum = *cum;
3017   if (stdarg_p)
3018     function_arg_advance (&next_cum, mode, type, 1);
3019 
3020   if (!no_rtl)
3021     save_area = frame_pointer_rtx;
3022 
3023   set = get_varargs_alias_set ();
3024 
3025   for (i = next_cum.regno; i < ix86_regparm; i++)
3026     {
3027       mem = gen_rtx_MEM (Pmode,
3028 			 plus_constant (save_area, i * UNITS_PER_WORD));
3029       set_mem_alias_set (mem, set);
3030       emit_move_insn (mem, gen_rtx_REG (Pmode,
3031 					x86_64_int_parameter_registers[i]));
3032     }
3033 
3034   if (next_cum.sse_nregs)
3035     {
3036       /* Now emit code to save SSE registers.  The AX parameter contains number
3037 	 of SSE parameter registers used to call this function.  We use
3038 	 sse_prologue_save insn template that produces computed jump across
3039 	 SSE saves.  We need some preparation work to get this working.  */
3040 
3041       label = gen_label_rtx ();
3042       label_ref = gen_rtx_LABEL_REF (Pmode, label);
3043 
3044       /* Compute address to jump to :
3045          label - 5*eax + nnamed_sse_arguments*5  */
3046       tmp_reg = gen_reg_rtx (Pmode);
3047       nsse_reg = gen_reg_rtx (Pmode);
3048       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3049       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3050 			      gen_rtx_MULT (Pmode, nsse_reg,
3051 					    GEN_INT (4))));
3052       if (next_cum.sse_regno)
3053 	emit_move_insn
3054 	  (nsse_reg,
3055 	   gen_rtx_CONST (DImode,
3056 			  gen_rtx_PLUS (DImode,
3057 					label_ref,
3058 					GEN_INT (next_cum.sse_regno * 4))));
3059       else
3060 	emit_move_insn (nsse_reg, label_ref);
3061       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3062 
3063       /* Compute address of memory block we save into.  We always use pointer
3064 	 pointing 127 bytes after first byte to store - this is needed to keep
3065 	 instruction size limited by 4 bytes.  */
3066       tmp_reg = gen_reg_rtx (Pmode);
3067       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3068 			      plus_constant (save_area,
3069 					     8 * REGPARM_MAX + 127)));
3070       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3071       set_mem_alias_set (mem, set);
3072       set_mem_align (mem, BITS_PER_WORD);
3073 
3074       /* And finally do the dirty job!  */
3075       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3076 					GEN_INT (next_cum.sse_regno), label));
3077     }
3078 
3079 }
3080 
3081 /* Implement va_start.  */
3082 
3083 void
ix86_va_start(tree valist,rtx nextarg)3084 ix86_va_start (tree valist, rtx nextarg)
3085 {
3086   HOST_WIDE_INT words, n_gpr, n_fpr;
3087   tree f_gpr, f_fpr, f_ovf, f_sav;
3088   tree gpr, fpr, ovf, sav, t;
3089 
3090   /* Only 64bit target needs something special.  */
3091   if (!TARGET_64BIT)
3092     {
3093       std_expand_builtin_va_start (valist, nextarg);
3094       return;
3095     }
3096 
3097   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3098   f_fpr = TREE_CHAIN (f_gpr);
3099   f_ovf = TREE_CHAIN (f_fpr);
3100   f_sav = TREE_CHAIN (f_ovf);
3101 
3102   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3103   gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3104   fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3105   ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3106   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3107 
3108   /* Count number of gp and fp argument registers used.  */
3109   words = current_function_args_info.words;
3110   n_gpr = current_function_args_info.regno;
3111   n_fpr = current_function_args_info.sse_regno;
3112 
3113   if (TARGET_DEBUG_ARG)
3114     fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3115 	     (int) words, (int) n_gpr, (int) n_fpr);
3116 
3117   t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3118 	     build_int_2 (n_gpr * 8, 0));
3119   TREE_SIDE_EFFECTS (t) = 1;
3120   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3121 
3122   t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3123 	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3124   TREE_SIDE_EFFECTS (t) = 1;
3125   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3126 
3127   /* Find the overflow area.  */
3128   t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3129   if (words != 0)
3130     t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3131 	       build_int_2 (words * UNITS_PER_WORD, 0));
3132   t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3133   TREE_SIDE_EFFECTS (t) = 1;
3134   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3135 
3136   /* Find the register save area.
3137      Prologue of the function save it right above stack frame.  */
3138   t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3139   t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3140   TREE_SIDE_EFFECTS (t) = 1;
3141   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3142 }
3143 
3144 /* Implement va_arg.  */
3145 rtx
ix86_va_arg(tree valist,tree type)3146 ix86_va_arg (tree valist, tree type)
3147 {
3148   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3149   tree f_gpr, f_fpr, f_ovf, f_sav;
3150   tree gpr, fpr, ovf, sav, t;
3151   int size, rsize;
3152   rtx lab_false, lab_over = NULL_RTX;
3153   rtx addr_rtx, r;
3154   rtx container;
3155   int indirect_p = 0;
3156 
3157   /* Only 64bit target needs something special.  */
3158   if (!TARGET_64BIT)
3159     {
3160       return std_expand_builtin_va_arg (valist, type);
3161     }
3162 
3163   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3164   f_fpr = TREE_CHAIN (f_gpr);
3165   f_ovf = TREE_CHAIN (f_fpr);
3166   f_sav = TREE_CHAIN (f_ovf);
3167 
3168   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3169   gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3170   fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3171   ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3172   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3173 
3174   size = int_size_in_bytes (type);
3175   if (size == -1)
3176     {
3177       /* Passed by reference.  */
3178       indirect_p = 1;
3179       type = build_pointer_type (type);
3180       size = int_size_in_bytes (type);
3181     }
3182   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3183 
3184   container = construct_container (TYPE_MODE (type), type, 0,
3185 				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3186   /*
3187    * Pull the value out of the saved registers ...
3188    */
3189 
3190   addr_rtx = gen_reg_rtx (Pmode);
3191 
3192   if (container)
3193     {
3194       rtx int_addr_rtx, sse_addr_rtx;
3195       int needed_intregs, needed_sseregs;
3196       int need_temp;
3197 
3198       lab_over = gen_label_rtx ();
3199       lab_false = gen_label_rtx ();
3200 
3201       examine_argument (TYPE_MODE (type), type, 0,
3202 		        &needed_intregs, &needed_sseregs);
3203 
3204 
3205       need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3206 		   || TYPE_ALIGN (type) > 128);
3207 
3208       /* In case we are passing structure, verify that it is consecutive block
3209          on the register save area.  If not we need to do moves.  */
3210       if (!need_temp && !REG_P (container))
3211 	{
3212 	  /* Verify that all registers are strictly consecutive  */
3213 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3214 	    {
3215 	      int i;
3216 
3217 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3218 		{
3219 		  rtx slot = XVECEXP (container, 0, i);
3220 		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3221 		      || INTVAL (XEXP (slot, 1)) != i * 16)
3222 		    need_temp = 1;
3223 		}
3224 	    }
3225 	  else
3226 	    {
3227 	      int i;
3228 
3229 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3230 		{
3231 		  rtx slot = XVECEXP (container, 0, i);
3232 		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3233 		      || INTVAL (XEXP (slot, 1)) != i * 8)
3234 		    need_temp = 1;
3235 		}
3236 	    }
3237 	}
3238       if (!need_temp)
3239 	{
3240 	  int_addr_rtx = addr_rtx;
3241 	  sse_addr_rtx = addr_rtx;
3242 	}
3243       else
3244 	{
3245 	  int_addr_rtx = gen_reg_rtx (Pmode);
3246 	  sse_addr_rtx = gen_reg_rtx (Pmode);
3247 	}
3248       /* First ensure that we fit completely in registers.  */
3249       if (needed_intregs)
3250 	{
3251 	  emit_cmp_and_jump_insns (expand_expr
3252 				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3253 				   GEN_INT ((REGPARM_MAX - needed_intregs +
3254 					     1) * 8), GE, const1_rtx, SImode,
3255 				   1, lab_false);
3256 	}
3257       if (needed_sseregs)
3258 	{
3259 	  emit_cmp_and_jump_insns (expand_expr
3260 				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3261 				   GEN_INT ((SSE_REGPARM_MAX -
3262 					     needed_sseregs + 1) * 16 +
3263 					    REGPARM_MAX * 8), GE, const1_rtx,
3264 				   SImode, 1, lab_false);
3265 	}
3266 
3267       /* Compute index to start of area used for integer regs.  */
3268       if (needed_intregs)
3269 	{
3270 	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3271 	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3272 	  if (r != int_addr_rtx)
3273 	    emit_move_insn (int_addr_rtx, r);
3274 	}
3275       if (needed_sseregs)
3276 	{
3277 	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3278 	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3279 	  if (r != sse_addr_rtx)
3280 	    emit_move_insn (sse_addr_rtx, r);
3281 	}
3282       if (need_temp)
3283 	{
3284 	  int i;
3285 	  rtx mem;
3286 	  rtx x;
3287 
3288 	  /* Never use the memory itself, as it has the alias set.  */
3289 	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
3290 	  mem = gen_rtx_MEM (BLKmode, x);
3291 	  force_operand (x, addr_rtx);
3292 	  set_mem_alias_set (mem, get_varargs_alias_set ());
3293 	  set_mem_align (mem, BITS_PER_UNIT);
3294 
3295 	  for (i = 0; i < XVECLEN (container, 0); i++)
3296 	    {
3297 	      rtx slot = XVECEXP (container, 0, i);
3298 	      rtx reg = XEXP (slot, 0);
3299 	      enum machine_mode mode = GET_MODE (reg);
3300 	      rtx src_addr;
3301 	      rtx src_mem;
3302 	      int src_offset;
3303 	      rtx dest_mem;
3304 
3305 	      if (SSE_REGNO_P (REGNO (reg)))
3306 		{
3307 		  src_addr = sse_addr_rtx;
3308 		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3309 		}
3310 	      else
3311 		{
3312 		  src_addr = int_addr_rtx;
3313 		  src_offset = REGNO (reg) * 8;
3314 		}
3315 	      src_mem = gen_rtx_MEM (mode, src_addr);
3316 	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
3317 	      src_mem = adjust_address (src_mem, mode, src_offset);
3318 	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3319 	      emit_move_insn (dest_mem, src_mem);
3320 	    }
3321 	}
3322 
3323       if (needed_intregs)
3324 	{
3325 	  t =
3326 	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3327 		   build_int_2 (needed_intregs * 8, 0));
3328 	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3329 	  TREE_SIDE_EFFECTS (t) = 1;
3330 	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3331 	}
3332       if (needed_sseregs)
3333 	{
3334 	  t =
3335 	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3336 		   build_int_2 (needed_sseregs * 16, 0));
3337 	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3338 	  TREE_SIDE_EFFECTS (t) = 1;
3339 	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3340 	}
3341 
3342       emit_jump_insn (gen_jump (lab_over));
3343       emit_barrier ();
3344       emit_label (lab_false);
3345     }
3346 
3347   /* ... otherwise out of the overflow area.  */
3348 
3349   /* Care for on-stack alignment if needed.  */
3350   if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3351     t = ovf;
3352   else
3353     {
3354       HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3355       t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3356       t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3357     }
3358   t = save_expr (t);
3359 
3360   r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3361   if (r != addr_rtx)
3362     emit_move_insn (addr_rtx, r);
3363 
3364   t =
3365     build (PLUS_EXPR, TREE_TYPE (t), t,
3366 	   build_int_2 (rsize * UNITS_PER_WORD, 0));
3367   t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3368   TREE_SIDE_EFFECTS (t) = 1;
3369   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3370 
3371   if (container)
3372     emit_label (lab_over);
3373 
3374   if (indirect_p)
3375     {
3376       r = gen_rtx_MEM (Pmode, addr_rtx);
3377       set_mem_alias_set (r, get_varargs_alias_set ());
3378       emit_move_insn (addr_rtx, r);
3379     }
3380 
3381   return addr_rtx;
3382 }
3383 
3384 /* Return nonzero if OP is either a i387 or SSE fp register.  */
3385 int
any_fp_register_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3386 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3387 {
3388   return ANY_FP_REG_P (op);
3389 }
3390 
3391 /* Return nonzero if OP is an i387 fp register.  */
3392 int
fp_register_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3393 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3394 {
3395   return FP_REG_P (op);
3396 }
3397 
3398 /* Return nonzero if OP is a non-fp register_operand.  */
3399 int
register_and_not_any_fp_reg_operand(rtx op,enum machine_mode mode)3400 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3401 {
3402   return register_operand (op, mode) && !ANY_FP_REG_P (op);
3403 }
3404 
3405 /* Return nonzero if OP is a register operand other than an
3406    i387 fp register.  */
3407 int
register_and_not_fp_reg_operand(rtx op,enum machine_mode mode)3408 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3409 {
3410   return register_operand (op, mode) && !FP_REG_P (op);
3411 }
3412 
3413 /* Return nonzero if OP is general operand representable on x86_64.  */
3414 
3415 int
x86_64_general_operand(rtx op,enum machine_mode mode)3416 x86_64_general_operand (rtx op, enum machine_mode mode)
3417 {
3418   if (!TARGET_64BIT)
3419     return general_operand (op, mode);
3420   if (nonimmediate_operand (op, mode))
3421     return 1;
3422   return x86_64_sign_extended_value (op);
3423 }
3424 
3425 /* Return nonzero if OP is general operand representable on x86_64
3426    as either sign extended or zero extended constant.  */
3427 
3428 int
x86_64_szext_general_operand(rtx op,enum machine_mode mode)3429 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3430 {
3431   if (!TARGET_64BIT)
3432     return general_operand (op, mode);
3433   if (nonimmediate_operand (op, mode))
3434     return 1;
3435   return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3436 }
3437 
3438 /* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3439 
3440 int
x86_64_nonmemory_operand(rtx op,enum machine_mode mode)3441 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3442 {
3443   if (!TARGET_64BIT)
3444     return nonmemory_operand (op, mode);
3445   if (register_operand (op, mode))
3446     return 1;
3447   return x86_64_sign_extended_value (op);
3448 }
3449 
3450 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
3451 
3452 int
x86_64_movabs_operand(rtx op,enum machine_mode mode)3453 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3454 {
3455   if (!TARGET_64BIT || !flag_pic)
3456     return nonmemory_operand (op, mode);
3457   if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3458     return 1;
3459   if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3460     return 1;
3461   return 0;
3462 }
3463 
3464 /* Return nonzero if OPNUM's MEM should be matched
3465    in movabs* patterns.  */
3466 
3467 int
ix86_check_movabs(rtx insn,int opnum)3468 ix86_check_movabs (rtx insn, int opnum)
3469 {
3470   rtx set, mem;
3471 
3472   set = PATTERN (insn);
3473   if (GET_CODE (set) == PARALLEL)
3474     set = XVECEXP (set, 0, 0);
3475   if (GET_CODE (set) != SET)
3476     abort ();
3477   mem = XEXP (set, opnum);
3478   while (GET_CODE (mem) == SUBREG)
3479     mem = SUBREG_REG (mem);
3480   if (GET_CODE (mem) != MEM)
3481     abort ();
3482   return (volatile_ok || !MEM_VOLATILE_P (mem));
3483 }
3484 
3485 /* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3486 
3487 int
x86_64_szext_nonmemory_operand(rtx op,enum machine_mode mode)3488 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3489 {
3490   if (!TARGET_64BIT)
3491     return nonmemory_operand (op, mode);
3492   if (register_operand (op, mode))
3493     return 1;
3494   return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3495 }
3496 
3497 /* Return nonzero if OP is immediate operand representable on x86_64.  */
3498 
3499 int
x86_64_immediate_operand(rtx op,enum machine_mode mode)3500 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3501 {
3502   if (!TARGET_64BIT)
3503     return immediate_operand (op, mode);
3504   return x86_64_sign_extended_value (op);
3505 }
3506 
3507 /* Return nonzero if OP is immediate operand representable on x86_64.  */
3508 
3509 int
x86_64_zext_immediate_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3510 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3511 {
3512   return x86_64_zero_extended_value (op);
3513 }
3514 
3515 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3516    for shift & compare patterns, as shifting by 0 does not change flags),
3517    else return zero.  */
3518 
3519 int
const_int_1_31_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3520 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3521 {
3522   return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3523 }
3524 
3525 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3526    reference and a constant.  */
3527 
3528 int
symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3529 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3530 {
3531   switch (GET_CODE (op))
3532     {
3533     case SYMBOL_REF:
3534     case LABEL_REF:
3535       return 1;
3536 
3537     case CONST:
3538       op = XEXP (op, 0);
3539       if (GET_CODE (op) == SYMBOL_REF
3540 	  || GET_CODE (op) == LABEL_REF
3541 	  || (GET_CODE (op) == UNSPEC
3542 	      && (XINT (op, 1) == UNSPEC_GOT
3543 		  || XINT (op, 1) == UNSPEC_GOTOFF
3544 		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
3545 	return 1;
3546       if (GET_CODE (op) != PLUS
3547 	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3548 	return 0;
3549 
3550       op = XEXP (op, 0);
3551       if (GET_CODE (op) == SYMBOL_REF
3552 	  || GET_CODE (op) == LABEL_REF)
3553 	return 1;
3554       /* Only @GOTOFF gets offsets.  */
3555       if (GET_CODE (op) != UNSPEC
3556 	  || XINT (op, 1) != UNSPEC_GOTOFF)
3557 	return 0;
3558 
3559       op = XVECEXP (op, 0, 0);
3560       if (GET_CODE (op) == SYMBOL_REF
3561 	  || GET_CODE (op) == LABEL_REF)
3562 	return 1;
3563       return 0;
3564 
3565     default:
3566       return 0;
3567     }
3568 }
3569 
3570 /* Return true if the operand contains a @GOT or @GOTOFF reference.  */
3571 
3572 int
pic_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3573 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3574 {
3575   if (GET_CODE (op) != CONST)
3576     return 0;
3577   op = XEXP (op, 0);
3578   if (TARGET_64BIT)
3579     {
3580       if (GET_CODE (op) == UNSPEC
3581 	  && XINT (op, 1) == UNSPEC_GOTPCREL)
3582 	return 1;
3583       if (GET_CODE (op) == PLUS
3584 	  && GET_CODE (XEXP (op, 0)) == UNSPEC
3585 	  && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3586 	return 1;
3587     }
3588   else
3589     {
3590       if (GET_CODE (op) == UNSPEC)
3591 	return 1;
3592       if (GET_CODE (op) != PLUS
3593 	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3594 	return 0;
3595       op = XEXP (op, 0);
3596       if (GET_CODE (op) == UNSPEC)
3597 	return 1;
3598     }
3599   return 0;
3600 }
3601 
3602 /* Return true if OP is a symbolic operand that resolves locally.  */
3603 
3604 static int
local_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3605 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3606 {
3607   if (GET_CODE (op) == CONST
3608       && GET_CODE (XEXP (op, 0)) == PLUS
3609       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3610     op = XEXP (XEXP (op, 0), 0);
3611 
3612   if (GET_CODE (op) == LABEL_REF)
3613     return 1;
3614 
3615   if (GET_CODE (op) != SYMBOL_REF)
3616     return 0;
3617 
3618   if (SYMBOL_REF_LOCAL_P (op))
3619     return 1;
3620 
3621   /* There is, however, a not insubstantial body of code in the rest of
3622      the compiler that assumes it can just stick the results of
3623      ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3624   /* ??? This is a hack.  Should update the body of the compiler to
3625      always create a DECL an invoke targetm.encode_section_info.  */
3626   if (strncmp (XSTR (op, 0), internal_label_prefix,
3627 	       internal_label_prefix_len) == 0)
3628     return 1;
3629 
3630   return 0;
3631 }
3632 
3633 /* Test for various thread-local symbols.  */
3634 
3635 int
tls_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3636 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3637 {
3638   if (GET_CODE (op) != SYMBOL_REF)
3639     return 0;
3640   return SYMBOL_REF_TLS_MODEL (op);
3641 }
3642 
3643 static inline int
tls_symbolic_operand_1(rtx op,enum tls_model kind)3644 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3645 {
3646   if (GET_CODE (op) != SYMBOL_REF)
3647     return 0;
3648   return SYMBOL_REF_TLS_MODEL (op) == kind;
3649 }
3650 
3651 int
global_dynamic_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3652 global_dynamic_symbolic_operand (rtx op,
3653 				 enum machine_mode mode ATTRIBUTE_UNUSED)
3654 {
3655   return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3656 }
3657 
3658 int
local_dynamic_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3659 local_dynamic_symbolic_operand (rtx op,
3660 				enum machine_mode mode ATTRIBUTE_UNUSED)
3661 {
3662   return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3663 }
3664 
3665 int
initial_exec_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3666 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3667 {
3668   return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3669 }
3670 
3671 int
local_exec_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3672 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3673 {
3674   return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3675 }
3676 
3677 /* Test for a valid operand for a call instruction.  Don't allow the
3678    arg pointer register or virtual regs since they may decay into
3679    reg + const, which the patterns can't handle.  */
3680 
3681 int
call_insn_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3682 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3683 {
3684   /* Disallow indirect through a virtual register.  This leads to
3685      compiler aborts when trying to eliminate them.  */
3686   if (GET_CODE (op) == REG
3687       && (op == arg_pointer_rtx
3688 	  || op == frame_pointer_rtx
3689 	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3690 	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3691     return 0;
3692 
3693   /* Disallow `call 1234'.  Due to varying assembler lameness this
3694      gets either rejected or translated to `call .+1234'.  */
3695   if (GET_CODE (op) == CONST_INT)
3696     return 0;
3697 
3698   /* Explicitly allow SYMBOL_REF even if pic.  */
3699   if (GET_CODE (op) == SYMBOL_REF)
3700     return 1;
3701 
3702   /* Otherwise we can allow any general_operand in the address.  */
3703   return general_operand (op, Pmode);
3704 }
3705 
3706 /* Test for a valid operand for a call instruction.  Don't allow the
3707    arg pointer register or virtual regs since they may decay into
3708    reg + const, which the patterns can't handle.  */
3709 
3710 int
sibcall_insn_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3711 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3712 {
3713   /* Disallow indirect through a virtual register.  This leads to
3714      compiler aborts when trying to eliminate them.  */
3715   if (GET_CODE (op) == REG
3716       && (op == arg_pointer_rtx
3717 	  || op == frame_pointer_rtx
3718 	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3719 	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3720     return 0;
3721 
3722   /* Explicitly allow SYMBOL_REF even if pic.  */
3723   if (GET_CODE (op) == SYMBOL_REF)
3724     return 1;
3725 
3726   /* Otherwise we can only allow register operands.  */
3727   return register_operand (op, Pmode);
3728 }
3729 
3730 int
constant_call_address_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3731 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3732 {
3733   if (GET_CODE (op) == CONST
3734       && GET_CODE (XEXP (op, 0)) == PLUS
3735       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3736     op = XEXP (XEXP (op, 0), 0);
3737   return GET_CODE (op) == SYMBOL_REF;
3738 }
3739 
3740 /* Match exactly zero and one.  */
3741 
3742 int
const0_operand(rtx op,enum machine_mode mode)3743 const0_operand (rtx op, enum machine_mode mode)
3744 {
3745   return op == CONST0_RTX (mode);
3746 }
3747 
3748 int
const1_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3749 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 {
3751   return op == const1_rtx;
3752 }
3753 
3754 /* Match 2, 4, or 8.  Used for leal multiplicands.  */
3755 
3756 int
const248_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3757 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3758 {
3759   return (GET_CODE (op) == CONST_INT
3760 	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3761 }
3762 
3763 int
const_0_to_3_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3764 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3765 {
3766   return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3767 }
3768 
3769 int
const_0_to_7_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3770 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3771 {
3772   return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3773 }
3774 
3775 int
const_0_to_15_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3776 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3777 {
3778   return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3779 }
3780 
3781 int
const_0_to_255_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3782 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3783 {
3784   return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3785 }
3786 
3787 
3788 /* True if this is a constant appropriate for an increment or decrement.  */
3789 
3790 int
incdec_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3791 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3792 {
3793   /* On Pentium4, the inc and dec operations causes extra dependency on flag
3794      registers, since carry flag is not set.  */
3795   if (TARGET_PENTIUM4 && !optimize_size)
3796     return 0;
3797   return op == const1_rtx || op == constm1_rtx;
3798 }
3799 
3800 /* Return nonzero if OP is acceptable as operand of DImode shift
3801    expander.  */
3802 
3803 int
shiftdi_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3804 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3805 {
3806   if (TARGET_64BIT)
3807     return nonimmediate_operand (op, mode);
3808   else
3809     return register_operand (op, mode);
3810 }
3811 
3812 /* Return false if this is the stack pointer, or any other fake
3813    register eliminable to the stack pointer.  Otherwise, this is
3814    a register operand.
3815 
3816    This is used to prevent esp from being used as an index reg.
3817    Which would only happen in pathological cases.  */
3818 
3819 int
reg_no_sp_operand(rtx op,enum machine_mode mode)3820 reg_no_sp_operand (rtx op, enum machine_mode mode)
3821 {
3822   rtx t = op;
3823   if (GET_CODE (t) == SUBREG)
3824     t = SUBREG_REG (t);
3825   if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3826     return 0;
3827 
3828   return register_operand (op, mode);
3829 }
3830 
3831 int
mmx_reg_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3832 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3833 {
3834   return MMX_REG_P (op);
3835 }
3836 
3837 /* Return false if this is any eliminable register.  Otherwise
3838    general_operand.  */
3839 
3840 int
general_no_elim_operand(rtx op,enum machine_mode mode)3841 general_no_elim_operand (rtx op, enum machine_mode mode)
3842 {
3843   rtx t = op;
3844   if (GET_CODE (t) == SUBREG)
3845     t = SUBREG_REG (t);
3846   if (t == arg_pointer_rtx || t == frame_pointer_rtx
3847       || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3848       || t == virtual_stack_dynamic_rtx)
3849     return 0;
3850   if (REG_P (t)
3851       && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3852       && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3853     return 0;
3854 
3855   return general_operand (op, mode);
3856 }
3857 
3858 /* Return false if this is any eliminable register.  Otherwise
3859    register_operand or const_int.  */
3860 
3861 int
nonmemory_no_elim_operand(rtx op,enum machine_mode mode)3862 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3863 {
3864   rtx t = op;
3865   if (GET_CODE (t) == SUBREG)
3866     t = SUBREG_REG (t);
3867   if (t == arg_pointer_rtx || t == frame_pointer_rtx
3868       || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3869       || t == virtual_stack_dynamic_rtx)
3870     return 0;
3871 
3872   return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3873 }
3874 
3875 /* Return false if this is any eliminable register or stack register,
3876    otherwise work like register_operand.  */
3877 
3878 int
index_register_operand(rtx op,enum machine_mode mode)3879 index_register_operand (rtx op, enum machine_mode mode)
3880 {
3881   rtx t = op;
3882   if (GET_CODE (t) == SUBREG)
3883     t = SUBREG_REG (t);
3884   if (!REG_P (t))
3885     return 0;
3886   if (t == arg_pointer_rtx
3887       || t == frame_pointer_rtx
3888       || t == virtual_incoming_args_rtx
3889       || t == virtual_stack_vars_rtx
3890       || t == virtual_stack_dynamic_rtx
3891       || REGNO (t) == STACK_POINTER_REGNUM)
3892     return 0;
3893 
3894   return general_operand (op, mode);
3895 }
3896 
3897 /* Return true if op is a Q_REGS class register.  */
3898 
3899 int
q_regs_operand(rtx op,enum machine_mode mode)3900 q_regs_operand (rtx op, enum machine_mode mode)
3901 {
3902   if (mode != VOIDmode && GET_MODE (op) != mode)
3903     return 0;
3904   if (GET_CODE (op) == SUBREG)
3905     op = SUBREG_REG (op);
3906   return ANY_QI_REG_P (op);
3907 }
3908 
3909 /* Return true if op is an flags register.  */
3910 
3911 int
flags_reg_operand(rtx op,enum machine_mode mode)3912 flags_reg_operand (rtx op, enum machine_mode mode)
3913 {
3914   if (mode != VOIDmode && GET_MODE (op) != mode)
3915     return 0;
3916   return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3917 }
3918 
3919 /* Return true if op is a NON_Q_REGS class register.  */
3920 
3921 int
non_q_regs_operand(rtx op,enum machine_mode mode)3922 non_q_regs_operand (rtx op, enum machine_mode mode)
3923 {
3924   if (mode != VOIDmode && GET_MODE (op) != mode)
3925     return 0;
3926   if (GET_CODE (op) == SUBREG)
3927     op = SUBREG_REG (op);
3928   return NON_QI_REG_P (op);
3929 }
3930 
3931 int
zero_extended_scalar_load_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3932 zero_extended_scalar_load_operand (rtx op,
3933 				   enum machine_mode mode ATTRIBUTE_UNUSED)
3934 {
3935   unsigned n_elts;
3936   if (GET_CODE (op) != MEM)
3937     return 0;
3938   op = maybe_get_pool_constant (op);
3939   if (!op)
3940     return 0;
3941   if (GET_CODE (op) != CONST_VECTOR)
3942     return 0;
3943   n_elts =
3944     (GET_MODE_SIZE (GET_MODE (op)) /
3945      GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3946   for (n_elts--; n_elts > 0; n_elts--)
3947     {
3948       rtx elt = CONST_VECTOR_ELT (op, n_elts);
3949       if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3950 	return 0;
3951     }
3952   return 1;
3953 }
3954 
3955 /*  Return 1 when OP is operand acceptable for standard SSE move.  */
3956 int
vector_move_operand(rtx op,enum machine_mode mode)3957 vector_move_operand (rtx op, enum machine_mode mode)
3958 {
3959   if (nonimmediate_operand (op, mode))
3960     return 1;
3961   if (GET_MODE (op) != mode && mode != VOIDmode)
3962     return 0;
3963   return (op == CONST0_RTX (GET_MODE (op)));
3964 }
3965 
3966 /* Return true if op if a valid address, and does not contain
3967    a segment override.  */
3968 
3969 int
no_seg_address_operand(rtx op,enum machine_mode mode)3970 no_seg_address_operand (rtx op, enum machine_mode mode)
3971 {
3972   struct ix86_address parts;
3973 
3974   if (! address_operand (op, mode))
3975     return 0;
3976 
3977   if (! ix86_decompose_address (op, &parts))
3978     abort ();
3979 
3980   return parts.seg == SEG_DEFAULT;
3981 }
3982 
3983 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3984    insns.  */
3985 int
sse_comparison_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3986 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3987 {
3988   enum rtx_code code = GET_CODE (op);
3989   switch (code)
3990     {
3991     /* Operations supported directly.  */
3992     case EQ:
3993     case LT:
3994     case LE:
3995     case UNORDERED:
3996     case NE:
3997     case UNGE:
3998     case UNGT:
3999     case ORDERED:
4000       return 1;
4001     /* These are equivalent to ones above in non-IEEE comparisons.  */
4002     case UNEQ:
4003     case UNLT:
4004     case UNLE:
4005     case LTGT:
4006     case GE:
4007     case GT:
4008       return !TARGET_IEEE_FP;
4009     default:
4010       return 0;
4011     }
4012 }
4013 /* Return 1 if OP is a valid comparison operator in valid mode.  */
4014 int
ix86_comparison_operator(rtx op,enum machine_mode mode)4015 ix86_comparison_operator (rtx op, enum machine_mode mode)
4016 {
4017   enum machine_mode inmode;
4018   enum rtx_code code = GET_CODE (op);
4019   if (mode != VOIDmode && GET_MODE (op) != mode)
4020     return 0;
4021   if (GET_RTX_CLASS (code) != '<')
4022     return 0;
4023   inmode = GET_MODE (XEXP (op, 0));
4024 
4025   if (inmode == CCFPmode || inmode == CCFPUmode)
4026     {
4027       enum rtx_code second_code, bypass_code;
4028       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4029       return (bypass_code == NIL && second_code == NIL);
4030     }
4031   switch (code)
4032     {
4033     case EQ: case NE:
4034       return 1;
4035     case LT: case GE:
4036       if (inmode == CCmode || inmode == CCGCmode
4037 	  || inmode == CCGOCmode || inmode == CCNOmode)
4038 	return 1;
4039       return 0;
4040     case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4041       if (inmode == CCmode)
4042 	return 1;
4043       return 0;
4044     case GT: case LE:
4045       if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4046 	return 1;
4047       return 0;
4048     default:
4049       return 0;
4050     }
4051 }
4052 
4053 /* Return 1 if OP is a valid comparison operator testing carry flag
4054    to be set.  */
4055 int
ix86_carry_flag_operator(rtx op,enum machine_mode mode)4056 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4057 {
4058   enum machine_mode inmode;
4059   enum rtx_code code = GET_CODE (op);
4060 
4061   if (mode != VOIDmode && GET_MODE (op) != mode)
4062     return 0;
4063   if (GET_RTX_CLASS (code) != '<')
4064     return 0;
4065   inmode = GET_MODE (XEXP (op, 0));
4066   if (GET_CODE (XEXP (op, 0)) != REG
4067       || REGNO (XEXP (op, 0)) != 17
4068       || XEXP (op, 1) != const0_rtx)
4069     return 0;
4070 
4071   if (inmode == CCFPmode || inmode == CCFPUmode)
4072     {
4073       enum rtx_code second_code, bypass_code;
4074 
4075       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4076       if (bypass_code != NIL || second_code != NIL)
4077 	return 0;
4078       code = ix86_fp_compare_code_to_integer (code);
4079     }
4080   else if (inmode != CCmode)
4081     return 0;
4082   return code == LTU;
4083 }
4084 
4085 /* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
4086 
4087 int
fcmov_comparison_operator(rtx op,enum machine_mode mode)4088 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4089 {
4090   enum machine_mode inmode;
4091   enum rtx_code code = GET_CODE (op);
4092 
4093   if (mode != VOIDmode && GET_MODE (op) != mode)
4094     return 0;
4095   if (GET_RTX_CLASS (code) != '<')
4096     return 0;
4097   inmode = GET_MODE (XEXP (op, 0));
4098   if (inmode == CCFPmode || inmode == CCFPUmode)
4099     {
4100       enum rtx_code second_code, bypass_code;
4101 
4102       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4103       if (bypass_code != NIL || second_code != NIL)
4104 	return 0;
4105       code = ix86_fp_compare_code_to_integer (code);
4106     }
4107   /* i387 supports just limited amount of conditional codes.  */
4108   switch (code)
4109     {
4110     case LTU: case GTU: case LEU: case GEU:
4111       if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4112 	return 1;
4113       return 0;
4114     case ORDERED: case UNORDERED:
4115     case EQ: case NE:
4116       return 1;
4117     default:
4118       return 0;
4119     }
4120 }
4121 
4122 /* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
4123 
4124 int
promotable_binary_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4125 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4126 {
4127   switch (GET_CODE (op))
4128     {
4129     case MULT:
4130       /* Modern CPUs have same latency for HImode and SImode multiply,
4131          but 386 and 486 do HImode multiply faster.  */
4132       return ix86_tune > PROCESSOR_I486;
4133     case PLUS:
4134     case AND:
4135     case IOR:
4136     case XOR:
4137     case ASHIFT:
4138       return 1;
4139     default:
4140       return 0;
4141     }
4142 }
4143 
4144 /* Nearly general operand, but accept any const_double, since we wish
4145    to be able to drop them into memory rather than have them get pulled
4146    into registers.  */
4147 
4148 int
cmp_fp_expander_operand(rtx op,enum machine_mode mode)4149 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4150 {
4151   if (mode != VOIDmode && mode != GET_MODE (op))
4152     return 0;
4153   if (GET_CODE (op) == CONST_DOUBLE)
4154     return 1;
4155   return general_operand (op, mode);
4156 }
4157 
4158 /* Match an SI or HImode register for a zero_extract.  */
4159 
4160 int
ext_register_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4161 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4162 {
4163   int regno;
4164   if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4165       && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4166     return 0;
4167 
4168   if (!register_operand (op, VOIDmode))
4169     return 0;
4170 
4171   /* Be careful to accept only registers having upper parts.  */
4172   regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4173   return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4174 }
4175 
4176 /* Return 1 if this is a valid binary floating-point operation.
4177    OP is the expression matched, and MODE is its mode.  */
4178 
4179 int
binary_fp_operator(rtx op,enum machine_mode mode)4180 binary_fp_operator (rtx op, enum machine_mode mode)
4181 {
4182   if (mode != VOIDmode && mode != GET_MODE (op))
4183     return 0;
4184 
4185   switch (GET_CODE (op))
4186     {
4187     case PLUS:
4188     case MINUS:
4189     case MULT:
4190     case DIV:
4191       return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4192 
4193     default:
4194       return 0;
4195     }
4196 }
4197 
4198 int
mult_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4199 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4200 {
4201   return GET_CODE (op) == MULT;
4202 }
4203 
4204 int
div_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4205 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4206 {
4207   return GET_CODE (op) == DIV;
4208 }
4209 
4210 int
arith_or_logical_operator(rtx op,enum machine_mode mode)4211 arith_or_logical_operator (rtx op, enum machine_mode mode)
4212 {
4213   return ((mode == VOIDmode || GET_MODE (op) == mode)
4214           && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4215               || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4216 }
4217 
4218 /* Returns 1 if OP is memory operand with a displacement.  */
4219 
4220 int
memory_displacement_operand(rtx op,enum machine_mode mode)4221 memory_displacement_operand (rtx op, enum machine_mode mode)
4222 {
4223   struct ix86_address parts;
4224 
4225   if (! memory_operand (op, mode))
4226     return 0;
4227 
4228   if (! ix86_decompose_address (XEXP (op, 0), &parts))
4229     abort ();
4230 
4231   return parts.disp != NULL_RTX;
4232 }
4233 
4234 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4235    re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4236 
4237    ??? It seems likely that this will only work because cmpsi is an
4238    expander, and no actual insns use this.  */
4239 
4240 int
cmpsi_operand(rtx op,enum machine_mode mode)4241 cmpsi_operand (rtx op, enum machine_mode mode)
4242 {
4243   if (nonimmediate_operand (op, mode))
4244     return 1;
4245 
4246   if (GET_CODE (op) == AND
4247       && GET_MODE (op) == SImode
4248       && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4249       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4250       && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4251       && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4252       && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4253       && GET_CODE (XEXP (op, 1)) == CONST_INT)
4254     return 1;
4255 
4256   return 0;
4257 }
4258 
4259 /* Returns 1 if OP is memory operand that can not be represented by the
4260    modRM array.  */
4261 
4262 int
long_memory_operand(rtx op,enum machine_mode mode)4263 long_memory_operand (rtx op, enum machine_mode mode)
4264 {
4265   if (! memory_operand (op, mode))
4266     return 0;
4267 
4268   return memory_address_length (op) != 0;
4269 }
4270 
4271 /* Return nonzero if the rtx is known aligned.  */
4272 
4273 int
aligned_operand(rtx op,enum machine_mode mode)4274 aligned_operand (rtx op, enum machine_mode mode)
4275 {
4276   struct ix86_address parts;
4277 
4278   if (!general_operand (op, mode))
4279     return 0;
4280 
4281   /* Registers and immediate operands are always "aligned".  */
4282   if (GET_CODE (op) != MEM)
4283     return 1;
4284 
4285   /* Don't even try to do any aligned optimizations with volatiles.  */
4286   if (MEM_VOLATILE_P (op))
4287     return 0;
4288 
4289   op = XEXP (op, 0);
4290 
4291   /* Pushes and pops are only valid on the stack pointer.  */
4292   if (GET_CODE (op) == PRE_DEC
4293       || GET_CODE (op) == POST_INC)
4294     return 1;
4295 
4296   /* Decode the address.  */
4297   if (! ix86_decompose_address (op, &parts))
4298     abort ();
4299 
4300   /* Look for some component that isn't known to be aligned.  */
4301   if (parts.index)
4302     {
4303       if (parts.scale < 4
4304 	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4305 	return 0;
4306     }
4307   if (parts.base)
4308     {
4309       if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4310 	return 0;
4311     }
4312   if (parts.disp)
4313     {
4314       if (GET_CODE (parts.disp) != CONST_INT
4315 	  || (INTVAL (parts.disp) & 3) != 0)
4316 	return 0;
4317     }
4318 
4319   /* Didn't find one -- this must be an aligned address.  */
4320   return 1;
4321 }
4322 
4323 /* Initialize the table of extra 80387 mathematical constants.  */
4324 
4325 static void
init_ext_80387_constants(void)4326 init_ext_80387_constants (void)
4327 {
4328   static const char * cst[5] =
4329   {
4330     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4331     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4332     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4333     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4334     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4335   };
4336   int i;
4337 
4338   for (i = 0; i < 5; i++)
4339     {
4340       real_from_string (&ext_80387_constants_table[i], cst[i]);
4341       /* Ensure each constant is rounded to XFmode precision.  */
4342       real_convert (&ext_80387_constants_table[i],
4343 		    XFmode, &ext_80387_constants_table[i]);
4344     }
4345 
4346   ext_80387_constants_init = 1;
4347 }
4348 
4349 /* Return true if the constant is something that can be loaded with
4350    a special instruction.  */
4351 
4352 int
standard_80387_constant_p(rtx x)4353 standard_80387_constant_p (rtx x)
4354 {
4355   if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4356     return -1;
4357 
4358   if (x == CONST0_RTX (GET_MODE (x)))
4359     return 1;
4360   if (x == CONST1_RTX (GET_MODE (x)))
4361     return 2;
4362 
4363   /* For XFmode constants, try to find a special 80387 instruction on
4364      those CPUs that benefit from them.  */
4365   if (GET_MODE (x) == XFmode
4366       && x86_ext_80387_constants & TUNEMASK)
4367     {
4368       REAL_VALUE_TYPE r;
4369       int i;
4370 
4371       if (! ext_80387_constants_init)
4372 	init_ext_80387_constants ();
4373 
4374       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4375       for (i = 0; i < 5; i++)
4376         if (real_identical (&r, &ext_80387_constants_table[i]))
4377 	  return i + 3;
4378     }
4379 
4380   return 0;
4381 }
4382 
4383 /* Return the opcode of the special instruction to be used to load
4384    the constant X.  */
4385 
4386 const char *
standard_80387_constant_opcode(rtx x)4387 standard_80387_constant_opcode (rtx x)
4388 {
4389   switch (standard_80387_constant_p (x))
4390     {
4391     case 1:
4392       return "fldz";
4393     case 2:
4394       return "fld1";
4395     case 3:
4396       return "fldlg2";
4397     case 4:
4398       return "fldln2";
4399     case 5:
4400       return "fldl2e";
4401     case 6:
4402       return "fldl2t";
4403     case 7:
4404       return "fldpi";
4405     }
4406   abort ();
4407 }
4408 
4409 /* Return the CONST_DOUBLE representing the 80387 constant that is
4410    loaded by the specified special instruction.  The argument IDX
4411    matches the return value from standard_80387_constant_p.  */
4412 
4413 rtx
standard_80387_constant_rtx(int idx)4414 standard_80387_constant_rtx (int idx)
4415 {
4416   int i;
4417 
4418   if (! ext_80387_constants_init)
4419     init_ext_80387_constants ();
4420 
4421   switch (idx)
4422     {
4423     case 3:
4424     case 4:
4425     case 5:
4426     case 6:
4427     case 7:
4428       i = idx - 3;
4429       break;
4430 
4431     default:
4432       abort ();
4433     }
4434 
4435   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4436 				       XFmode);
4437 }
4438 
4439 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4440  */
4441 int
standard_sse_constant_p(rtx x)4442 standard_sse_constant_p (rtx x)
4443 {
4444   if (x == const0_rtx)
4445     return 1;
4446   return (x == CONST0_RTX (GET_MODE (x)));
4447 }
4448 
4449 /* Returns 1 if OP contains a symbol reference */
4450 
4451 int
symbolic_reference_mentioned_p(rtx op)4452 symbolic_reference_mentioned_p (rtx op)
4453 {
4454   const char *fmt;
4455   int i;
4456 
4457   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4458     return 1;
4459 
4460   fmt = GET_RTX_FORMAT (GET_CODE (op));
4461   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4462     {
4463       if (fmt[i] == 'E')
4464 	{
4465 	  int j;
4466 
4467 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4468 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4469 	      return 1;
4470 	}
4471 
4472       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4473 	return 1;
4474     }
4475 
4476   return 0;
4477 }
4478 
4479 /* Return 1 if it is appropriate to emit `ret' instructions in the
4480    body of a function.  Do this only if the epilogue is simple, needing a
4481    couple of insns.  Prior to reloading, we can't tell how many registers
4482    must be saved, so return 0 then.  Return 0 if there is no frame
4483    marker to de-allocate.
4484 
4485    If NON_SAVING_SETJMP is defined and true, then it is not possible
4486    for the epilogue to be simple, so return 0.  This is a special case
4487    since NON_SAVING_SETJMP will not cause regs_ever_live to change
4488    until final, but jump_optimize may need to know sooner if a
4489    `return' is OK.  */
4490 
4491 int
ix86_can_use_return_insn_p(void)4492 ix86_can_use_return_insn_p (void)
4493 {
4494   struct ix86_frame frame;
4495 
4496 #ifdef NON_SAVING_SETJMP
4497   if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4498     return 0;
4499 #endif
4500 
4501   if (! reload_completed || frame_pointer_needed)
4502     return 0;
4503 
4504   /* Don't allow more than 32 pop, since that's all we can do
4505      with one instruction.  */
4506   if (current_function_pops_args
4507       && current_function_args_size >= 32768)
4508     return 0;
4509 
4510   ix86_compute_frame_layout (&frame);
4511   return frame.to_allocate == 0 && frame.nregs == 0;
4512 }
4513 
4514 /* Return 1 if VALUE can be stored in the sign extended immediate field.  */
4515 int
x86_64_sign_extended_value(rtx value)4516 x86_64_sign_extended_value (rtx value)
4517 {
4518   switch (GET_CODE (value))
4519     {
4520       /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4521          to be at least 32 and this all acceptable constants are
4522 	 represented as CONST_INT.  */
4523       case CONST_INT:
4524 	if (HOST_BITS_PER_WIDE_INT == 32)
4525 	  return 1;
4526 	else
4527 	  {
4528 	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4529 	    return trunc_int_for_mode (val, SImode) == val;
4530 	  }
4531 	break;
4532 
4533       /* For certain code models, the symbolic references are known to fit.
4534 	 in CM_SMALL_PIC model we know it fits if it is local to the shared
4535 	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
4536 	 only if inside of UNSPEC handled below.  */
4537       case SYMBOL_REF:
4538 	/* TLS symbols are not constant.  */
4539 	if (tls_symbolic_operand (value, Pmode))
4540 	  return false;
4541 	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4542 
4543       /* For certain code models, the code is near as well.  */
4544       case LABEL_REF:
4545 	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4546 		|| ix86_cmodel == CM_KERNEL);
4547 
4548       /* We also may accept the offsetted memory references in certain special
4549          cases.  */
4550       case CONST:
4551 	if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4552 	  switch (XINT (XEXP (value, 0), 1))
4553 	    {
4554 	    case UNSPEC_GOTPCREL:
4555 	    case UNSPEC_DTPOFF:
4556 	    case UNSPEC_GOTNTPOFF:
4557 	    case UNSPEC_NTPOFF:
4558 	      return 1;
4559 	    default:
4560 	      break;
4561 	    }
4562 	if (GET_CODE (XEXP (value, 0)) == PLUS)
4563 	  {
4564 	    rtx op1 = XEXP (XEXP (value, 0), 0);
4565 	    rtx op2 = XEXP (XEXP (value, 0), 1);
4566 	    HOST_WIDE_INT offset;
4567 
4568 	    if (ix86_cmodel == CM_LARGE)
4569 	      return 0;
4570 	    if (GET_CODE (op2) != CONST_INT)
4571 	      return 0;
4572 	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
4573 	    switch (GET_CODE (op1))
4574 	      {
4575 		case SYMBOL_REF:
4576 		  /* For CM_SMALL assume that latest object is 16MB before
4577 		     end of 31bits boundary.  We may also accept pretty
4578 		     large negative constants knowing that all objects are
4579 		     in the positive half of address space.  */
4580 		  if (ix86_cmodel == CM_SMALL
4581 		      && offset < 16*1024*1024
4582 		      && trunc_int_for_mode (offset, SImode) == offset)
4583 		    return 1;
4584 		  /* For CM_KERNEL we know that all object resist in the
4585 		     negative half of 32bits address space.  We may not
4586 		     accept negative offsets, since they may be just off
4587 		     and we may accept pretty large positive ones.  */
4588 		  if (ix86_cmodel == CM_KERNEL
4589 		      && offset > 0
4590 		      && trunc_int_for_mode (offset, SImode) == offset)
4591 		    return 1;
4592 		  break;
4593 		case LABEL_REF:
4594 		  /* These conditions are similar to SYMBOL_REF ones, just the
4595 		     constraints for code models differ.  */
4596 		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4597 		      && offset < 16*1024*1024
4598 		      && trunc_int_for_mode (offset, SImode) == offset)
4599 		    return 1;
4600 		  if (ix86_cmodel == CM_KERNEL
4601 		      && offset > 0
4602 		      && trunc_int_for_mode (offset, SImode) == offset)
4603 		    return 1;
4604 		  break;
4605 		case UNSPEC:
4606 		  switch (XINT (op1, 1))
4607 		    {
4608 		    case UNSPEC_DTPOFF:
4609 		    case UNSPEC_NTPOFF:
4610 		      if (offset > 0
4611 			  && trunc_int_for_mode (offset, SImode) == offset)
4612 			return 1;
4613 		    }
4614 		  break;
4615 		default:
4616 		  return 0;
4617 	      }
4618 	  }
4619 	return 0;
4620       default:
4621 	return 0;
4622     }
4623 }
4624 
4625 /* Return 1 if VALUE can be stored in the zero extended immediate field.  */
4626 int
x86_64_zero_extended_value(rtx value)4627 x86_64_zero_extended_value (rtx value)
4628 {
4629   switch (GET_CODE (value))
4630     {
4631       case CONST_DOUBLE:
4632 	if (HOST_BITS_PER_WIDE_INT == 32)
4633 	  return  (GET_MODE (value) == VOIDmode
4634 		   && !CONST_DOUBLE_HIGH (value));
4635 	else
4636 	  return 0;
4637       case CONST_INT:
4638 	if (HOST_BITS_PER_WIDE_INT == 32)
4639 	  return INTVAL (value) >= 0;
4640 	else
4641 	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4642 	break;
4643 
4644       /* For certain code models, the symbolic references are known to fit.  */
4645       case SYMBOL_REF:
4646 	/* TLS symbols are not constant.  */
4647 	if (tls_symbolic_operand (value, Pmode))
4648 	  return false;
4649 	return ix86_cmodel == CM_SMALL;
4650 
4651       /* For certain code models, the code is near as well.  */
4652       case LABEL_REF:
4653 	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4654 
4655       /* We also may accept the offsetted memory references in certain special
4656          cases.  */
4657       case CONST:
4658 	if (GET_CODE (XEXP (value, 0)) == PLUS)
4659 	  {
4660 	    rtx op1 = XEXP (XEXP (value, 0), 0);
4661 	    rtx op2 = XEXP (XEXP (value, 0), 1);
4662 
4663 	    if (ix86_cmodel == CM_LARGE)
4664 	      return 0;
4665 	    switch (GET_CODE (op1))
4666 	      {
4667 		case SYMBOL_REF:
4668 		    return 0;
4669 		  /* For small code model we may accept pretty large positive
4670 		     offsets, since one bit is available for free.  Negative
4671 		     offsets are limited by the size of NULL pointer area
4672 		     specified by the ABI.  */
4673 		  if (ix86_cmodel == CM_SMALL
4674 		      && GET_CODE (op2) == CONST_INT
4675 		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4676 		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4677 			  == INTVAL (op2)))
4678 		    return 1;
4679 	          /* ??? For the kernel, we may accept adjustment of
4680 		     -0x10000000, since we know that it will just convert
4681 		     negative address space to positive, but perhaps this
4682 		     is not worthwhile.  */
4683 		  break;
4684 		case LABEL_REF:
4685 		  /* These conditions are similar to SYMBOL_REF ones, just the
4686 		     constraints for code models differ.  */
4687 		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4688 		      && GET_CODE (op2) == CONST_INT
4689 		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4690 		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4691 			  == INTVAL (op2)))
4692 		    return 1;
4693 		  break;
4694 		default:
4695 		  return 0;
4696 	      }
4697 	  }
4698 	return 0;
4699       default:
4700 	return 0;
4701     }
4702 }
4703 
4704 /* Value should be nonzero if functions must have frame pointers.
4705    Zero means the frame pointer need not be set up (and parms may
4706    be accessed via the stack pointer) in functions that seem suitable.  */
4707 
4708 int
ix86_frame_pointer_required(void)4709 ix86_frame_pointer_required (void)
4710 {
4711   /* If we accessed previous frames, then the generated code expects
4712      to be able to access the saved ebp value in our frame.  */
4713   if (cfun->machine->accesses_prev_frame)
4714     return 1;
4715 
4716   /* Several x86 os'es need a frame pointer for other reasons,
4717      usually pertaining to setjmp.  */
4718   if (SUBTARGET_FRAME_POINTER_REQUIRED)
4719     return 1;
4720 
4721   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4722      the frame pointer by default.  Turn it back on now if we've not
4723      got a leaf function.  */
4724   if (TARGET_OMIT_LEAF_FRAME_POINTER
4725       && (!current_function_is_leaf))
4726     return 1;
4727 
4728   if (current_function_profile)
4729     return 1;
4730 
4731   return 0;
4732 }
4733 
4734 /* Record that the current function accesses previous call frames.  */
4735 
4736 void
ix86_setup_frame_addresses(void)4737 ix86_setup_frame_addresses (void)
4738 {
4739   cfun->machine->accesses_prev_frame = 1;
4740 }
4741 
4742 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4743 # define USE_HIDDEN_LINKONCE 1
4744 #else
4745 # define USE_HIDDEN_LINKONCE 0
4746 #endif
4747 
4748 static int pic_labels_used;
4749 
4750 /* Fills in the label name that should be used for a pc thunk for
4751    the given register.  */
4752 
4753 static void
get_pc_thunk_name(char name[32],unsigned int regno)4754 get_pc_thunk_name (char name[32], unsigned int regno)
4755 {
4756   if (USE_HIDDEN_LINKONCE)
4757     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4758   else
4759     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4760 }
4761 
4762 
4763 /* This function generates code for -fpic that loads %ebx with
4764    the return address of the caller and then returns.  */
4765 
4766 void
ix86_file_end(void)4767 ix86_file_end (void)
4768 {
4769   rtx xops[2];
4770   int regno;
4771 
4772   for (regno = 0; regno < 8; ++regno)
4773     {
4774       char name[32];
4775 
4776       if (! ((pic_labels_used >> regno) & 1))
4777 	continue;
4778 
4779       get_pc_thunk_name (name, regno);
4780 
4781       if (USE_HIDDEN_LINKONCE)
4782 	{
4783 	  tree decl;
4784 
4785 	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4786 			     error_mark_node);
4787 	  TREE_PUBLIC (decl) = 1;
4788 	  TREE_STATIC (decl) = 1;
4789 	  DECL_ONE_ONLY (decl) = 1;
4790 
4791 	  (*targetm.asm_out.unique_section) (decl, 0);
4792 	  named_section (decl, NULL, 0);
4793 
4794 	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4795 	  fputs ("\t.hidden\t", asm_out_file);
4796 	  assemble_name (asm_out_file, name);
4797 	  fputc ('\n', asm_out_file);
4798 	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4799 	}
4800       else
4801 	{
4802 	  text_section ();
4803 	  ASM_OUTPUT_LABEL (asm_out_file, name);
4804 	}
4805 
4806       xops[0] = gen_rtx_REG (SImode, regno);
4807       xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4808       output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4809       output_asm_insn ("ret", xops);
4810     }
4811 
4812   if (NEED_INDICATE_EXEC_STACK)
4813     file_end_indicate_exec_stack ();
4814 }
4815 
4816 /* Emit code for the SET_GOT patterns.  */
4817 
4818 const char *
output_set_got(rtx dest)4819 output_set_got (rtx dest)
4820 {
4821   rtx xops[3];
4822 
4823   xops[0] = dest;
4824   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4825 
4826   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4827     {
4828       xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4829 
4830       if (!flag_pic)
4831 	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4832       else
4833 	output_asm_insn ("call\t%a2", xops);
4834 
4835 #if TARGET_MACHO
4836       /* Output the "canonical" label name ("Lxx$pb") here too.  This
4837          is what will be referred to by the Mach-O PIC subsystem.  */
4838       ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4839 #endif
4840       (*targetm.asm_out.internal_label) (asm_out_file, "L",
4841 				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4842 
4843       if (flag_pic)
4844 	output_asm_insn ("pop{l}\t%0", xops);
4845     }
4846   else
4847     {
4848       char name[32];
4849       get_pc_thunk_name (name, REGNO (dest));
4850       pic_labels_used |= 1 << REGNO (dest);
4851 
4852       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4853       xops[2] = gen_rtx_MEM (QImode, xops[2]);
4854       output_asm_insn ("call\t%X2", xops);
4855     }
4856 
4857   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4858     output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4859   else if (!TARGET_MACHO)
4860     output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4861 
4862   return "";
4863 }
4864 
4865 /* Generate an "push" pattern for input ARG.  */
4866 
4867 static rtx
gen_push(rtx arg)4868 gen_push (rtx arg)
4869 {
4870   return gen_rtx_SET (VOIDmode,
4871 		      gen_rtx_MEM (Pmode,
4872 				   gen_rtx_PRE_DEC (Pmode,
4873 						    stack_pointer_rtx)),
4874 		      arg);
4875 }
4876 
4877 /* Return >= 0 if there is an unused call-clobbered register available
4878    for the entire function.  */
4879 
4880 static unsigned int
ix86_select_alt_pic_regnum(void)4881 ix86_select_alt_pic_regnum (void)
4882 {
4883   if (current_function_is_leaf && !current_function_profile)
4884     {
4885       int i;
4886       for (i = 2; i >= 0; --i)
4887         if (!regs_ever_live[i])
4888 	  return i;
4889     }
4890 
4891   return INVALID_REGNUM;
4892 }
4893 
4894 /* Return 1 if we need to save REGNO.  */
4895 static int
ix86_save_reg(unsigned int regno,int maybe_eh_return)4896 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4897 {
4898   if (pic_offset_table_rtx
4899       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4900       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4901 	  || current_function_profile
4902 	  || current_function_calls_eh_return
4903 	  || current_function_uses_const_pool))
4904     {
4905       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4906 	return 0;
4907       return 1;
4908     }
4909 
4910   if (current_function_calls_eh_return && maybe_eh_return)
4911     {
4912       unsigned i;
4913       for (i = 0; ; i++)
4914 	{
4915 	  unsigned test = EH_RETURN_DATA_REGNO (i);
4916 	  if (test == INVALID_REGNUM)
4917 	    break;
4918 	  if (test == regno)
4919 	    return 1;
4920 	}
4921     }
4922 
4923   return (regs_ever_live[regno]
4924 	  && !call_used_regs[regno]
4925 	  && !fixed_regs[regno]
4926 	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4927 }
4928 
4929 /* Return number of registers to be saved on the stack.  */
4930 
4931 static int
ix86_nsaved_regs(void)4932 ix86_nsaved_regs (void)
4933 {
4934   int nregs = 0;
4935   int regno;
4936 
4937   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4938     if (ix86_save_reg (regno, true))
4939       nregs++;
4940   return nregs;
4941 }
4942 
4943 /* Return the offset between two registers, one to be eliminated, and the other
4944    its replacement, at the start of a routine.  */
4945 
4946 HOST_WIDE_INT
ix86_initial_elimination_offset(int from,int to)4947 ix86_initial_elimination_offset (int from, int to)
4948 {
4949   struct ix86_frame frame;
4950   ix86_compute_frame_layout (&frame);
4951 
4952   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4953     return frame.hard_frame_pointer_offset;
4954   else if (from == FRAME_POINTER_REGNUM
4955 	   && to == HARD_FRAME_POINTER_REGNUM)
4956     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4957   else
4958     {
4959       if (to != STACK_POINTER_REGNUM)
4960 	abort ();
4961       else if (from == ARG_POINTER_REGNUM)
4962 	return frame.stack_pointer_offset;
4963       else if (from != FRAME_POINTER_REGNUM)
4964 	abort ();
4965       else
4966 	return frame.stack_pointer_offset - frame.frame_pointer_offset;
4967     }
4968 }
4969 
4970 /* Fill structure ix86_frame about frame of currently computed function.  */
4971 
4972 static void
ix86_compute_frame_layout(struct ix86_frame * frame)4973 ix86_compute_frame_layout (struct ix86_frame *frame)
4974 {
4975   HOST_WIDE_INT total_size;
4976   int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4977   HOST_WIDE_INT offset;
4978   int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4979   HOST_WIDE_INT size = get_frame_size ();
4980 
4981   frame->nregs = ix86_nsaved_regs ();
4982   total_size = size;
4983 
4984   /* During reload iteration the amount of registers saved can change.
4985      Recompute the value as needed.  Do not recompute when amount of registers
4986      didn't change as reload does mutiple calls to the function and does not
4987      expect the decision to change within single iteration.  */
4988   if (!optimize_size
4989       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4990     {
4991       int count = frame->nregs;
4992 
4993       cfun->machine->use_fast_prologue_epilogue_nregs = count;
4994       /* The fast prologue uses move instead of push to save registers.  This
4995          is significantly longer, but also executes faster as modern hardware
4996          can execute the moves in parallel, but can't do that for push/pop.
4997 
4998 	 Be careful about choosing what prologue to emit:  When function takes
4999 	 many instructions to execute we may use slow version as well as in
5000 	 case function is known to be outside hot spot (this is known with
5001 	 feedback only).  Weight the size of function by number of registers
5002 	 to save as it is cheap to use one or two push instructions but very
5003 	 slow to use many of them.  */
5004       if (count)
5005 	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5006       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5007 	  || (flag_branch_probabilities
5008 	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5009         cfun->machine->use_fast_prologue_epilogue = false;
5010       else
5011         cfun->machine->use_fast_prologue_epilogue
5012 	   = !expensive_function_p (count);
5013     }
5014   if (TARGET_PROLOGUE_USING_MOVE
5015       && cfun->machine->use_fast_prologue_epilogue)
5016     frame->save_regs_using_mov = true;
5017   else
5018     frame->save_regs_using_mov = false;
5019 
5020 
5021   /* Skip return address and saved base pointer.  */
5022   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5023 
5024   frame->hard_frame_pointer_offset = offset;
5025 
5026   /* Do some sanity checking of stack_alignment_needed and
5027      preferred_alignment, since i386 port is the only using those features
5028      that may break easily.  */
5029 
5030   if (size && !stack_alignment_needed)
5031     abort ();
5032   if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5033     abort ();
5034   if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5035     abort ();
5036   if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5037     abort ();
5038 
5039   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5040     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5041 
5042   /* Register save area */
5043   offset += frame->nregs * UNITS_PER_WORD;
5044 
5045   /* Va-arg area */
5046   if (ix86_save_varrargs_registers)
5047     {
5048       offset += X86_64_VARARGS_SIZE;
5049       frame->va_arg_size = X86_64_VARARGS_SIZE;
5050     }
5051   else
5052     frame->va_arg_size = 0;
5053 
5054   /* Align start of frame for local function.  */
5055   frame->padding1 = ((offset + stack_alignment_needed - 1)
5056 		     & -stack_alignment_needed) - offset;
5057 
5058   offset += frame->padding1;
5059 
5060   /* Frame pointer points here.  */
5061   frame->frame_pointer_offset = offset;
5062 
5063   offset += size;
5064 
5065   /* Add outgoing arguments area.  Can be skipped if we eliminated
5066      all the function calls as dead code.
5067      Skipping is however impossible when function calls alloca.  Alloca
5068      expander assumes that last current_function_outgoing_args_size
5069      of stack frame are unused.  */
5070   if (ACCUMULATE_OUTGOING_ARGS
5071       && (!current_function_is_leaf || current_function_calls_alloca))
5072     {
5073       offset += current_function_outgoing_args_size;
5074       frame->outgoing_arguments_size = current_function_outgoing_args_size;
5075     }
5076   else
5077     frame->outgoing_arguments_size = 0;
5078 
5079   /* Align stack boundary.  Only needed if we're calling another function
5080      or using alloca.  */
5081   if (!current_function_is_leaf || current_function_calls_alloca)
5082     frame->padding2 = ((offset + preferred_alignment - 1)
5083 		       & -preferred_alignment) - offset;
5084   else
5085     frame->padding2 = 0;
5086 
5087   offset += frame->padding2;
5088 
5089   /* We've reached end of stack frame.  */
5090   frame->stack_pointer_offset = offset;
5091 
5092   /* Size prologue needs to allocate.  */
5093   frame->to_allocate =
5094     (size + frame->padding1 + frame->padding2
5095      + frame->outgoing_arguments_size + frame->va_arg_size);
5096 
5097   if ((!frame->to_allocate && frame->nregs <= 1)
5098       || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5099     frame->save_regs_using_mov = false;
5100 
5101   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5102       && current_function_is_leaf)
5103     {
5104       frame->red_zone_size = frame->to_allocate;
5105       if (frame->save_regs_using_mov)
5106 	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5107       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5108 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5109     }
5110   else
5111     frame->red_zone_size = 0;
5112   frame->to_allocate -= frame->red_zone_size;
5113   frame->stack_pointer_offset -= frame->red_zone_size;
5114 #if 0
5115   fprintf (stderr, "nregs: %i\n", frame->nregs);
5116   fprintf (stderr, "size: %i\n", size);
5117   fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5118   fprintf (stderr, "padding1: %i\n", frame->padding1);
5119   fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5120   fprintf (stderr, "padding2: %i\n", frame->padding2);
5121   fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5122   fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5123   fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5124   fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5125 	   frame->hard_frame_pointer_offset);
5126   fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5127 #endif
5128 }
5129 
5130 /* Emit code to save registers in the prologue.  */
5131 
5132 static void
ix86_emit_save_regs(void)5133 ix86_emit_save_regs (void)
5134 {
5135   int regno;
5136   rtx insn;
5137 
5138   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5139     if (ix86_save_reg (regno, true))
5140       {
5141 	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5142 	RTX_FRAME_RELATED_P (insn) = 1;
5143       }
5144 }
5145 
5146 /* Emit code to save registers using MOV insns.  First register
5147    is restored from POINTER + OFFSET.  */
5148 static void
ix86_emit_save_regs_using_mov(rtx pointer,HOST_WIDE_INT offset)5149 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5150 {
5151   int regno;
5152   rtx insn;
5153 
5154   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5155     if (ix86_save_reg (regno, true))
5156       {
5157 	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5158 					       Pmode, offset),
5159 			       gen_rtx_REG (Pmode, regno));
5160 	RTX_FRAME_RELATED_P (insn) = 1;
5161 	offset += UNITS_PER_WORD;
5162       }
5163 }
5164 
5165 /* Expand prologue or epilogue stack adjustment.
5166    The pattern exist to put a dependency on all ebp-based memory accesses.
5167    STYLE should be negative if instructions should be marked as frame related,
5168    zero if %r11 register is live and cannot be freely used and positive
5169    otherwise.  */
5170 
5171 static void
pro_epilogue_adjust_stack(rtx dest,rtx src,rtx offset,int style)5172 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5173 {
5174   rtx insn;
5175 
5176   if (! TARGET_64BIT)
5177     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5178   else if (x86_64_immediate_operand (offset, DImode))
5179     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5180   else
5181     {
5182       rtx r11;
5183       /* r11 is used by indirect sibcall return as well, set before the
5184 	 epilogue and used after the epilogue.  ATM indirect sibcall
5185 	 shouldn't be used together with huge frame sizes in one
5186 	 function because of the frame_size check in sibcall.c.  */
5187       if (style == 0)
5188 	abort ();
5189       r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5190       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5191       if (style < 0)
5192 	RTX_FRAME_RELATED_P (insn) = 1;
5193       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5194 							       offset));
5195     }
5196   if (style < 0)
5197     RTX_FRAME_RELATED_P (insn) = 1;
5198 }
5199 
5200 /* Expand the prologue into a bunch of separate insns.  */
5201 
5202 void
ix86_expand_prologue(void)5203 ix86_expand_prologue (void)
5204 {
5205   rtx insn;
5206   bool pic_reg_used;
5207   struct ix86_frame frame;
5208   HOST_WIDE_INT allocate;
5209 
5210   ix86_compute_frame_layout (&frame);
5211 
5212   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5213      slower on all targets.  Also sdb doesn't like it.  */
5214 
5215   if (frame_pointer_needed)
5216     {
5217       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5218       RTX_FRAME_RELATED_P (insn) = 1;
5219 
5220       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5221       RTX_FRAME_RELATED_P (insn) = 1;
5222     }
5223 
5224   allocate = frame.to_allocate;
5225 
5226   if (!frame.save_regs_using_mov)
5227     ix86_emit_save_regs ();
5228   else
5229     allocate += frame.nregs * UNITS_PER_WORD;
5230 
5231   /* When using red zone we may start register saving before allocating
5232      the stack frame saving one cycle of the prologue.  */
5233   if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5234     ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5235 				   : stack_pointer_rtx,
5236 				   -frame.nregs * UNITS_PER_WORD);
5237 
5238   if (allocate == 0)
5239     ;
5240   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5241     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5242 			       GEN_INT (-allocate), -1);
5243   else
5244     {
5245       /* Only valid for Win32.  */
5246       rtx eax = gen_rtx_REG (SImode, 0);
5247       bool eax_live = ix86_eax_live_at_start_p ();
5248 
5249       if (TARGET_64BIT)
5250         abort ();
5251 
5252       if (eax_live)
5253 	{
5254 	  emit_insn (gen_push (eax));
5255 	  allocate -= 4;
5256 	}
5257 
5258       insn = emit_move_insn (eax, GEN_INT (allocate));
5259       RTX_FRAME_RELATED_P (insn) = 1;
5260 
5261       insn = emit_insn (gen_allocate_stack_worker (eax));
5262       RTX_FRAME_RELATED_P (insn) = 1;
5263 
5264       if (eax_live)
5265 	{
5266 	  rtx t = plus_constant (stack_pointer_rtx, allocate);
5267 	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5268 	}
5269     }
5270 
5271   if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5272     {
5273       if (!frame_pointer_needed || !frame.to_allocate)
5274         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5275       else
5276         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5277 				       -frame.nregs * UNITS_PER_WORD);
5278     }
5279 
5280   pic_reg_used = false;
5281   if (pic_offset_table_rtx
5282       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5283 	  || current_function_profile))
5284     {
5285       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5286 
5287       if (alt_pic_reg_used != INVALID_REGNUM)
5288 	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5289 
5290       pic_reg_used = true;
5291     }
5292 
5293   if (pic_reg_used)
5294     {
5295       insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5296 
5297       /* Even with accurate pre-reload life analysis, we can wind up
5298 	 deleting all references to the pic register after reload.
5299 	 Consider if cross-jumping unifies two sides of a branch
5300 	 controlled by a comparison vs the only read from a global.
5301 	 In which case, allow the set_got to be deleted, though we're
5302 	 too late to do anything about the ebx save in the prologue.  */
5303       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5304     }
5305 
5306   /* Prevent function calls from be scheduled before the call to mcount.
5307      In the pic_reg_used case, make sure that the got load isn't deleted.  */
5308   if (current_function_profile)
5309     emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5310 }
5311 
5312 /* Emit code to restore saved registers using MOV insns.  First register
5313    is restored from POINTER + OFFSET.  */
5314 static void
ix86_emit_restore_regs_using_mov(rtx pointer,HOST_WIDE_INT offset,int maybe_eh_return)5315 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5316 				  int maybe_eh_return)
5317 {
5318   int regno;
5319   rtx base_address = gen_rtx_MEM (Pmode, pointer);
5320 
5321   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5322     if (ix86_save_reg (regno, maybe_eh_return))
5323       {
5324 	/* Ensure that adjust_address won't be forced to produce pointer
5325 	   out of range allowed by x86-64 instruction set.  */
5326 	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5327 	  {
5328 	    rtx r11;
5329 
5330 	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5331 	    emit_move_insn (r11, GEN_INT (offset));
5332 	    emit_insn (gen_adddi3 (r11, r11, pointer));
5333 	    base_address = gen_rtx_MEM (Pmode, r11);
5334 	    offset = 0;
5335 	  }
5336 	emit_move_insn (gen_rtx_REG (Pmode, regno),
5337 			adjust_address (base_address, Pmode, offset));
5338 	offset += UNITS_PER_WORD;
5339       }
5340 }
5341 
5342 /* Restore function stack, frame, and registers.  */
5343 
5344 void
ix86_expand_epilogue(int style)5345 ix86_expand_epilogue (int style)
5346 {
5347   int regno;
5348   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5349   struct ix86_frame frame;
5350   HOST_WIDE_INT offset;
5351 
5352   ix86_compute_frame_layout (&frame);
5353 
5354   /* Calculate start of saved registers relative to ebp.  Special care
5355      must be taken for the normal return case of a function using
5356      eh_return: the eax and edx registers are marked as saved, but not
5357      restored along this path.  */
5358   offset = frame.nregs;
5359   if (current_function_calls_eh_return && style != 2)
5360     offset -= 2;
5361   offset *= -UNITS_PER_WORD;
5362 
5363   /* If we're only restoring one register and sp is not valid then
5364      using a move instruction to restore the register since it's
5365      less work than reloading sp and popping the register.
5366 
5367      The default code result in stack adjustment using add/lea instruction,
5368      while this code results in LEAVE instruction (or discrete equivalent),
5369      so it is profitable in some other cases as well.  Especially when there
5370      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5371      and there is exactly one register to pop. This heuristic may need some
5372      tuning in future.  */
5373   if ((!sp_valid && frame.nregs <= 1)
5374       || (TARGET_EPILOGUE_USING_MOVE
5375 	  && cfun->machine->use_fast_prologue_epilogue
5376 	  && (frame.nregs > 1 || frame.to_allocate))
5377       || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5378       || (frame_pointer_needed && TARGET_USE_LEAVE
5379 	  && cfun->machine->use_fast_prologue_epilogue
5380 	  && frame.nregs == 1)
5381       || current_function_calls_eh_return)
5382     {
5383       /* Restore registers.  We can use ebp or esp to address the memory
5384 	 locations.  If both are available, default to ebp, since offsets
5385 	 are known to be small.  Only exception is esp pointing directly to the
5386 	 end of block of saved registers, where we may simplify addressing
5387 	 mode.  */
5388 
5389       if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5390 	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5391 					  frame.to_allocate, style == 2);
5392       else
5393 	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5394 					  offset, style == 2);
5395 
5396       /* eh_return epilogues need %ecx added to the stack pointer.  */
5397       if (style == 2)
5398 	{
5399 	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5400 
5401 	  if (frame_pointer_needed)
5402 	    {
5403 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5404 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5405 	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5406 
5407 	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5408 	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5409 
5410 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5411 					 const0_rtx, style);
5412 	    }
5413 	  else
5414 	    {
5415 	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5416 	      tmp = plus_constant (tmp, (frame.to_allocate
5417                                          + frame.nregs * UNITS_PER_WORD));
5418 	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5419 	    }
5420 	}
5421       else if (!frame_pointer_needed)
5422 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5423 				   GEN_INT (frame.to_allocate
5424 					    + frame.nregs * UNITS_PER_WORD),
5425 				   style);
5426       /* If not an i386, mov & pop is faster than "leave".  */
5427       else if (TARGET_USE_LEAVE || optimize_size
5428 	       || !cfun->machine->use_fast_prologue_epilogue)
5429 	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5430       else
5431 	{
5432 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5433 				     hard_frame_pointer_rtx,
5434 				     const0_rtx, style);
5435 	  if (TARGET_64BIT)
5436 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5437 	  else
5438 	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5439 	}
5440     }
5441   else
5442     {
5443       /* First step is to deallocate the stack frame so that we can
5444 	 pop the registers.  */
5445       if (!sp_valid)
5446 	{
5447 	  if (!frame_pointer_needed)
5448 	    abort ();
5449 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5450 				     hard_frame_pointer_rtx,
5451 				     GEN_INT (offset), style);
5452 	}
5453       else if (frame.to_allocate)
5454 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5455 				   GEN_INT (frame.to_allocate), style);
5456 
5457       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5458 	if (ix86_save_reg (regno, false))
5459 	  {
5460 	    if (TARGET_64BIT)
5461 	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5462 	    else
5463 	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5464 	  }
5465       if (frame_pointer_needed)
5466 	{
5467 	  /* Leave results in shorter dependency chains on CPUs that are
5468 	     able to grok it fast.  */
5469 	  if (TARGET_USE_LEAVE)
5470 	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5471 	  else if (TARGET_64BIT)
5472 	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5473 	  else
5474 	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5475 	}
5476     }
5477 
5478   /* Sibcall epilogues don't want a return instruction.  */
5479   if (style == 0)
5480     return;
5481 
5482   if (current_function_pops_args && current_function_args_size)
5483     {
5484       rtx popc = GEN_INT (current_function_pops_args);
5485 
5486       /* i386 can only pop 64K bytes.  If asked to pop more, pop
5487 	 return address, do explicit add, and jump indirectly to the
5488 	 caller.  */
5489 
5490       if (current_function_pops_args >= 65536)
5491 	{
5492 	  rtx ecx = gen_rtx_REG (SImode, 2);
5493 
5494 	  /* There is no "pascal" calling convention in 64bit ABI.  */
5495 	  if (TARGET_64BIT)
5496 	    abort ();
5497 
5498 	  emit_insn (gen_popsi1 (ecx));
5499 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5500 	  emit_jump_insn (gen_return_indirect_internal (ecx));
5501 	}
5502       else
5503 	emit_jump_insn (gen_return_pop_internal (popc));
5504     }
5505   else
5506     emit_jump_insn (gen_return_internal ());
5507 }
5508 
5509 /* Reset from the function's potential modifications.  */
5510 
5511 static void
ix86_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5512 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5513 			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5514 {
5515   if (pic_offset_table_rtx)
5516     REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5517 }
5518 
5519 /* Extract the parts of an RTL expression that is a valid memory address
5520    for an instruction.  Return 0 if the structure of the address is
5521    grossly off.  Return -1 if the address contains ASHIFT, so it is not
5522    strictly valid, but still used for computing length of lea instruction.  */
5523 
5524 static int
ix86_decompose_address(rtx addr,struct ix86_address * out)5525 ix86_decompose_address (rtx addr, struct ix86_address *out)
5526 {
5527   rtx base = NULL_RTX;
5528   rtx index = NULL_RTX;
5529   rtx disp = NULL_RTX;
5530   HOST_WIDE_INT scale = 1;
5531   rtx scale_rtx = NULL_RTX;
5532   int retval = 1;
5533   enum ix86_address_seg seg = SEG_DEFAULT;
5534 
5535   if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5536     base = addr;
5537   else if (GET_CODE (addr) == PLUS)
5538     {
5539       rtx addends[4], op;
5540       int n = 0, i;
5541 
5542       op = addr;
5543       do
5544 	{
5545 	  if (n >= 4)
5546 	    return 0;
5547 	  addends[n++] = XEXP (op, 1);
5548 	  op = XEXP (op, 0);
5549 	}
5550       while (GET_CODE (op) == PLUS);
5551       if (n >= 4)
5552 	return 0;
5553       addends[n] = op;
5554 
5555       for (i = n; i >= 0; --i)
5556 	{
5557 	  op = addends[i];
5558 	  switch (GET_CODE (op))
5559 	    {
5560 	    case MULT:
5561 	      if (index)
5562 		return 0;
5563 	      index = XEXP (op, 0);
5564 	      scale_rtx = XEXP (op, 1);
5565 	      break;
5566 
5567 	    case UNSPEC:
5568 	      if (XINT (op, 1) == UNSPEC_TP
5569 	          && TARGET_TLS_DIRECT_SEG_REFS
5570 	          && seg == SEG_DEFAULT)
5571 		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5572 	      else
5573 		return 0;
5574 	      break;
5575 
5576 	    case REG:
5577 	    case SUBREG:
5578 	      if (!base)
5579 		base = op;
5580 	      else if (!index)
5581 		index = op;
5582 	      else
5583 		return 0;
5584 	      break;
5585 
5586 	    case CONST:
5587 	    case CONST_INT:
5588 	    case SYMBOL_REF:
5589 	    case LABEL_REF:
5590 	      if (disp)
5591 		return 0;
5592 	      disp = op;
5593 	      break;
5594 
5595 	    default:
5596 	      return 0;
5597 	    }
5598 	}
5599     }
5600   else if (GET_CODE (addr) == MULT)
5601     {
5602       index = XEXP (addr, 0);		/* index*scale */
5603       scale_rtx = XEXP (addr, 1);
5604     }
5605   else if (GET_CODE (addr) == ASHIFT)
5606     {
5607       rtx tmp;
5608 
5609       /* We're called for lea too, which implements ashift on occasion.  */
5610       index = XEXP (addr, 0);
5611       tmp = XEXP (addr, 1);
5612       if (GET_CODE (tmp) != CONST_INT)
5613 	return 0;
5614       scale = INTVAL (tmp);
5615       if ((unsigned HOST_WIDE_INT) scale > 3)
5616 	return 0;
5617       scale = 1 << scale;
5618       retval = -1;
5619     }
5620   else
5621     disp = addr;			/* displacement */
5622 
5623   /* Extract the integral value of scale.  */
5624   if (scale_rtx)
5625     {
5626       if (GET_CODE (scale_rtx) != CONST_INT)
5627 	return 0;
5628       scale = INTVAL (scale_rtx);
5629     }
5630 
5631   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5632   if (base && index && scale == 1
5633       && (index == arg_pointer_rtx
5634 	  || index == frame_pointer_rtx
5635 	  || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5636     {
5637       rtx tmp = base;
5638       base = index;
5639       index = tmp;
5640     }
5641 
5642   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5643   if ((base == hard_frame_pointer_rtx
5644        || base == frame_pointer_rtx
5645        || base == arg_pointer_rtx) && !disp)
5646     disp = const0_rtx;
5647 
5648   /* Special case: on K6, [%esi] makes the instruction vector decoded.
5649      Avoid this by transforming to [%esi+0].  */
5650   if (ix86_tune == PROCESSOR_K6 && !optimize_size
5651       && base && !index && !disp
5652       && REG_P (base)
5653       && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5654     disp = const0_rtx;
5655 
5656   /* Special case: encode reg+reg instead of reg*2.  */
5657   if (!base && index && scale && scale == 2)
5658     base = index, scale = 1;
5659 
5660   /* Special case: scaling cannot be encoded without base or displacement.  */
5661   if (!base && !disp && index && scale != 1)
5662     disp = const0_rtx;
5663 
5664   out->base = base;
5665   out->index = index;
5666   out->disp = disp;
5667   out->scale = scale;
5668   out->seg = seg;
5669 
5670   return retval;
5671 }
5672 
5673 /* Return cost of the memory address x.
5674    For i386, it is better to use a complex address than let gcc copy
5675    the address into a reg and make a new pseudo.  But not if the address
5676    requires to two regs - that would mean more pseudos with longer
5677    lifetimes.  */
5678 static int
ix86_address_cost(rtx x)5679 ix86_address_cost (rtx x)
5680 {
5681   struct ix86_address parts;
5682   int cost = 1;
5683 
5684   if (!ix86_decompose_address (x, &parts))
5685     abort ();
5686 
5687   /* More complex memory references are better.  */
5688   if (parts.disp && parts.disp != const0_rtx)
5689     cost--;
5690   if (parts.seg != SEG_DEFAULT)
5691     cost--;
5692 
5693   /* Attempt to minimize number of registers in the address.  */
5694   if ((parts.base
5695        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5696       || (parts.index
5697 	  && (!REG_P (parts.index)
5698 	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5699     cost++;
5700 
5701   if (parts.base
5702       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5703       && parts.index
5704       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5705       && parts.base != parts.index)
5706     cost++;
5707 
5708   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5709      since it's predecode logic can't detect the length of instructions
5710      and it degenerates to vector decoded.  Increase cost of such
5711      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5712      to split such addresses or even refuse such addresses at all.
5713 
5714      Following addressing modes are affected:
5715       [base+scale*index]
5716       [scale*index+disp]
5717       [base+index]
5718 
5719      The first and last case  may be avoidable by explicitly coding the zero in
5720      memory address, but I don't have AMD-K6 machine handy to check this
5721      theory.  */
5722 
5723   if (TARGET_K6
5724       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5725 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5726 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5727     cost += 10;
5728 
5729   return cost;
5730 }
5731 
5732 /* If X is a machine specific address (i.e. a symbol or label being
5733    referenced as a displacement from the GOT implemented using an
5734    UNSPEC), then return the base term.  Otherwise return X.  */
5735 
5736 rtx
ix86_find_base_term(rtx x)5737 ix86_find_base_term (rtx x)
5738 {
5739   rtx term;
5740 
5741   if (TARGET_64BIT)
5742     {
5743       if (GET_CODE (x) != CONST)
5744 	return x;
5745       term = XEXP (x, 0);
5746       if (GET_CODE (term) == PLUS
5747 	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5748 	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5749 	term = XEXP (term, 0);
5750       if (GET_CODE (term) != UNSPEC
5751 	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5752 	return x;
5753 
5754       term = XVECEXP (term, 0, 0);
5755 
5756       if (GET_CODE (term) != SYMBOL_REF
5757 	  && GET_CODE (term) != LABEL_REF)
5758 	return x;
5759 
5760       return term;
5761     }
5762 
5763   term = ix86_delegitimize_address (x);
5764 
5765   if (GET_CODE (term) != SYMBOL_REF
5766       && GET_CODE (term) != LABEL_REF)
5767     return x;
5768 
5769   return term;
5770 }
5771 
5772 /* Determine if a given RTX is a valid constant.  We already know this
5773    satisfies CONSTANT_P.  */
5774 
5775 bool
legitimate_constant_p(rtx x)5776 legitimate_constant_p (rtx x)
5777 {
5778   switch (GET_CODE (x))
5779     {
5780     case CONST:
5781       x = XEXP (x, 0);
5782 
5783       if (GET_CODE (x) == PLUS)
5784 	{
5785 	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5786 	    return false;
5787 	  x = XEXP (x, 0);
5788 	}
5789 
5790       /* Only some unspecs are valid as "constants".  */
5791       if (GET_CODE (x) == UNSPEC)
5792 	switch (XINT (x, 1))
5793 	  {
5794 	  case UNSPEC_TPOFF:
5795 	  case UNSPEC_NTPOFF:
5796 	    return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5797 	  case UNSPEC_DTPOFF:
5798 	    return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5799 	  default:
5800 	    return false;
5801 	  }
5802 
5803       /* We must have drilled down to a symbol.  */
5804       if (!symbolic_operand (x, Pmode))
5805 	return false;
5806       /* FALLTHRU */
5807 
5808     case SYMBOL_REF:
5809       /* TLS symbols are never valid.  */
5810       if (tls_symbolic_operand (x, Pmode))
5811 	return false;
5812       break;
5813 
5814     default:
5815       break;
5816     }
5817 
5818   /* Otherwise we handle everything else in the move patterns.  */
5819   return true;
5820 }
5821 
5822 /* Determine if it's legal to put X into the constant pool.  This
5823    is not possible for the address of thread-local symbols, which
5824    is checked above.  */
5825 
5826 static bool
ix86_cannot_force_const_mem(rtx x)5827 ix86_cannot_force_const_mem (rtx x)
5828 {
5829   return !legitimate_constant_p (x);
5830 }
5831 
5832 /* Determine if a given RTX is a valid constant address.  */
5833 
5834 bool
constant_address_p(rtx x)5835 constant_address_p (rtx x)
5836 {
5837   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5838 }
5839 
5840 /* Nonzero if the constant value X is a legitimate general operand
5841    when generating PIC code.  It is given that flag_pic is on and
5842    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5843 
5844 bool
legitimate_pic_operand_p(rtx x)5845 legitimate_pic_operand_p (rtx x)
5846 {
5847   rtx inner;
5848 
5849   switch (GET_CODE (x))
5850     {
5851     case CONST:
5852       inner = XEXP (x, 0);
5853 
5854       /* Only some unspecs are valid as "constants".  */
5855       if (GET_CODE (inner) == UNSPEC)
5856 	switch (XINT (inner, 1))
5857 	  {
5858 	  case UNSPEC_TPOFF:
5859 	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5860 	  default:
5861 	    return false;
5862 	  }
5863       /* FALLTHRU */
5864 
5865     case SYMBOL_REF:
5866     case LABEL_REF:
5867       return legitimate_pic_address_disp_p (x);
5868 
5869     default:
5870       return true;
5871     }
5872 }
5873 
5874 /* Determine if a given CONST RTX is a valid memory displacement
5875    in PIC mode.  */
5876 
5877 int
legitimate_pic_address_disp_p(rtx disp)5878 legitimate_pic_address_disp_p (rtx disp)
5879 {
5880   bool saw_plus;
5881 
5882   /* In 64bit mode we can allow direct addresses of symbols and labels
5883      when they are not dynamic symbols.  */
5884   if (TARGET_64BIT)
5885     {
5886       /* TLS references should always be enclosed in UNSPEC.  */
5887       if (tls_symbolic_operand (disp, GET_MODE (disp)))
5888 	return 0;
5889       if (GET_CODE (disp) == SYMBOL_REF
5890 	  && ix86_cmodel == CM_SMALL_PIC
5891 	  && SYMBOL_REF_LOCAL_P (disp))
5892 	return 1;
5893       if (GET_CODE (disp) == LABEL_REF)
5894 	return 1;
5895       if (GET_CODE (disp) == CONST
5896 	  && GET_CODE (XEXP (disp, 0)) == PLUS)
5897 	{
5898 	  rtx op0 = XEXP (XEXP (disp, 0), 0);
5899 	  rtx op1 = XEXP (XEXP (disp, 0), 1);
5900 
5901 	  /* TLS references should always be enclosed in UNSPEC.  */
5902 	  if (tls_symbolic_operand (op0, GET_MODE (op0)))
5903 	    return 0;
5904 	  if (((GET_CODE (op0) == SYMBOL_REF
5905 		&& ix86_cmodel == CM_SMALL_PIC
5906 		&& SYMBOL_REF_LOCAL_P (op0))
5907 	       || GET_CODE (op0) == LABEL_REF)
5908 	      && GET_CODE (op1) == CONST_INT
5909 	      && INTVAL (op1) < 16*1024*1024
5910 	      && INTVAL (op1) >= -16*1024*1024)
5911 	    return 1;
5912 	}
5913     }
5914   if (GET_CODE (disp) != CONST)
5915     return 0;
5916   disp = XEXP (disp, 0);
5917 
5918   if (TARGET_64BIT)
5919     {
5920       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5921          of GOT tables.  We should not need these anyway.  */
5922       if (GET_CODE (disp) != UNSPEC
5923 	  || XINT (disp, 1) != UNSPEC_GOTPCREL)
5924 	return 0;
5925 
5926       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5927 	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5928 	return 0;
5929       return 1;
5930     }
5931 
5932   saw_plus = false;
5933   if (GET_CODE (disp) == PLUS)
5934     {
5935       if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5936 	return 0;
5937       disp = XEXP (disp, 0);
5938       saw_plus = true;
5939     }
5940 
5941   /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O.  */
5942   if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5943     {
5944       if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5945           || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5946         if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5947           {
5948             const char *sym_name = XSTR (XEXP (disp, 1), 0);
5949             if (! strcmp (sym_name, "<pic base>"))
5950               return 1;
5951           }
5952     }
5953 
5954   if (GET_CODE (disp) != UNSPEC)
5955     return 0;
5956 
5957   switch (XINT (disp, 1))
5958     {
5959     case UNSPEC_GOT:
5960       if (saw_plus)
5961 	return false;
5962       return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5963     case UNSPEC_GOTOFF:
5964       if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5965 	  || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5966         return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5967       return false;
5968     case UNSPEC_GOTTPOFF:
5969     case UNSPEC_GOTNTPOFF:
5970     case UNSPEC_INDNTPOFF:
5971       if (saw_plus)
5972 	return false;
5973       return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5974     case UNSPEC_NTPOFF:
5975       return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5976     case UNSPEC_DTPOFF:
5977       return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5978     }
5979 
5980   return 0;
5981 }
5982 
5983 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5984    memory address for an instruction.  The MODE argument is the machine mode
5985    for the MEM expression that wants to use this address.
5986 
5987    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5988    convert common non-canonical forms to canonical form so that they will
5989    be recognized.  */
5990 
5991 int
legitimate_address_p(enum machine_mode mode,rtx addr,int strict)5992 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5993 {
5994   struct ix86_address parts;
5995   rtx base, index, disp;
5996   HOST_WIDE_INT scale;
5997   const char *reason = NULL;
5998   rtx reason_rtx = NULL_RTX;
5999 
6000   if (TARGET_DEBUG_ADDR)
6001     {
6002       fprintf (stderr,
6003 	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6004 	       GET_MODE_NAME (mode), strict);
6005       debug_rtx (addr);
6006     }
6007 
6008   if (ix86_decompose_address (addr, &parts) <= 0)
6009     {
6010       reason = "decomposition failed";
6011       goto report_error;
6012     }
6013 
6014   base = parts.base;
6015   index = parts.index;
6016   disp = parts.disp;
6017   scale = parts.scale;
6018 
6019   /* Validate base register.
6020 
6021      Don't allow SUBREG's here, it can lead to spill failures when the base
6022      is one word out of a two word structure, which is represented internally
6023      as a DImode int.  */
6024 
6025   if (base)
6026     {
6027       reason_rtx = base;
6028 
6029       if (GET_CODE (base) != REG)
6030 	{
6031 	  reason = "base is not a register";
6032 	  goto report_error;
6033 	}
6034 
6035       if (GET_MODE (base) != Pmode)
6036 	{
6037 	  reason = "base is not in Pmode";
6038 	  goto report_error;
6039 	}
6040 
6041       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6042 	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6043 	{
6044 	  reason = "base is not valid";
6045 	  goto report_error;
6046 	}
6047     }
6048 
6049   /* Validate index register.
6050 
6051      Don't allow SUBREG's here, it can lead to spill failures when the index
6052      is one word out of a two word structure, which is represented internally
6053      as a DImode int.  */
6054 
6055   if (index)
6056     {
6057       reason_rtx = index;
6058 
6059       if (GET_CODE (index) != REG)
6060 	{
6061 	  reason = "index is not a register";
6062 	  goto report_error;
6063 	}
6064 
6065       if (GET_MODE (index) != Pmode)
6066 	{
6067 	  reason = "index is not in Pmode";
6068 	  goto report_error;
6069 	}
6070 
6071       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6072 	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6073 	{
6074 	  reason = "index is not valid";
6075 	  goto report_error;
6076 	}
6077     }
6078 
6079   /* Validate scale factor.  */
6080   if (scale != 1)
6081     {
6082       reason_rtx = GEN_INT (scale);
6083       if (!index)
6084 	{
6085 	  reason = "scale without index";
6086 	  goto report_error;
6087 	}
6088 
6089       if (scale != 2 && scale != 4 && scale != 8)
6090 	{
6091 	  reason = "scale is not a valid multiplier";
6092 	  goto report_error;
6093 	}
6094     }
6095 
6096   /* Validate displacement.  */
6097   if (disp)
6098     {
6099       reason_rtx = disp;
6100 
6101       if (GET_CODE (disp) == CONST
6102 	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6103 	switch (XINT (XEXP (disp, 0), 1))
6104 	  {
6105 	  case UNSPEC_GOT:
6106 	  case UNSPEC_GOTOFF:
6107 	  case UNSPEC_GOTPCREL:
6108 	    if (!flag_pic)
6109 	      abort ();
6110 	    goto is_legitimate_pic;
6111 
6112 	  case UNSPEC_GOTTPOFF:
6113 	  case UNSPEC_GOTNTPOFF:
6114 	  case UNSPEC_INDNTPOFF:
6115 	  case UNSPEC_NTPOFF:
6116 	  case UNSPEC_DTPOFF:
6117 	    break;
6118 
6119 	  default:
6120 	    reason = "invalid address unspec";
6121 	    goto report_error;
6122 	  }
6123 
6124       else if (flag_pic && (SYMBOLIC_CONST (disp)
6125 #if TARGET_MACHO
6126 			    && !machopic_operand_p (disp)
6127 #endif
6128 			    ))
6129 	{
6130 	is_legitimate_pic:
6131 	  if (TARGET_64BIT && (index || base))
6132 	    {
6133 	      /* foo@dtpoff(%rX) is ok.  */
6134 	      if (GET_CODE (disp) != CONST
6135 		  || GET_CODE (XEXP (disp, 0)) != PLUS
6136 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6137 		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6138 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6139 		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6140 		{
6141 		  reason = "non-constant pic memory reference";
6142 		  goto report_error;
6143 		}
6144 	    }
6145 	  else if (! legitimate_pic_address_disp_p (disp))
6146 	    {
6147 	      reason = "displacement is an invalid pic construct";
6148 	      goto report_error;
6149 	    }
6150 
6151           /* This code used to verify that a symbolic pic displacement
6152 	     includes the pic_offset_table_rtx register.
6153 
6154 	     While this is good idea, unfortunately these constructs may
6155 	     be created by "adds using lea" optimization for incorrect
6156 	     code like:
6157 
6158 	     int a;
6159 	     int foo(int i)
6160 	       {
6161 	         return *(&a+i);
6162 	       }
6163 
6164 	     This code is nonsensical, but results in addressing
6165 	     GOT table with pic_offset_table_rtx base.  We can't
6166 	     just refuse it easily, since it gets matched by
6167 	     "addsi3" pattern, that later gets split to lea in the
6168 	     case output register differs from input.  While this
6169 	     can be handled by separate addsi pattern for this case
6170 	     that never results in lea, this seems to be easier and
6171 	     correct fix for crash to disable this test.  */
6172 	}
6173       else if (GET_CODE (disp) != LABEL_REF
6174 	       && GET_CODE (disp) != CONST_INT
6175 	       && (GET_CODE (disp) != CONST
6176 		   || !legitimate_constant_p (disp))
6177 	       && (GET_CODE (disp) != SYMBOL_REF
6178 		   || !legitimate_constant_p (disp)))
6179 	{
6180 	  reason = "displacement is not constant";
6181 	  goto report_error;
6182 	}
6183       else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6184 	{
6185 	  reason = "displacement is out of range";
6186 	  goto report_error;
6187 	}
6188     }
6189 
6190   /* Everything looks valid.  */
6191   if (TARGET_DEBUG_ADDR)
6192     fprintf (stderr, "Success.\n");
6193   return TRUE;
6194 
6195  report_error:
6196   if (TARGET_DEBUG_ADDR)
6197     {
6198       fprintf (stderr, "Error: %s\n", reason);
6199       debug_rtx (reason_rtx);
6200     }
6201   return FALSE;
6202 }
6203 
6204 /* Return an unique alias set for the GOT.  */
6205 
6206 static HOST_WIDE_INT
ix86_GOT_alias_set(void)6207 ix86_GOT_alias_set (void)
6208 {
6209   static HOST_WIDE_INT set = -1;
6210   if (set == -1)
6211     set = new_alias_set ();
6212   return set;
6213 }
6214 
6215 /* Return a legitimate reference for ORIG (an address) using the
6216    register REG.  If REG is 0, a new pseudo is generated.
6217 
6218    There are two types of references that must be handled:
6219 
6220    1. Global data references must load the address from the GOT, via
6221       the PIC reg.  An insn is emitted to do this load, and the reg is
6222       returned.
6223 
6224    2. Static data references, constant pool addresses, and code labels
6225       compute the address as an offset from the GOT, whose base is in
6226       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6227       differentiate them from global data objects.  The returned
6228       address is the PIC reg + an unspec constant.
6229 
6230    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6231    reg also appears in the address.  */
6232 
6233 rtx
legitimize_pic_address(rtx orig,rtx reg)6234 legitimize_pic_address (rtx orig, rtx reg)
6235 {
6236   rtx addr = orig;
6237   rtx new = orig;
6238   rtx base;
6239 
6240 #if TARGET_MACHO
6241   if (reg == 0)
6242     reg = gen_reg_rtx (Pmode);
6243   /* Use the generic Mach-O PIC machinery.  */
6244   return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6245 #endif
6246 
6247   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6248     new = addr;
6249   else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6250     {
6251       /* This symbol may be referenced via a displacement from the PIC
6252 	 base address (@GOTOFF).  */
6253 
6254       if (reload_in_progress)
6255 	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6256       if (GET_CODE (addr) == CONST)
6257 	addr = XEXP (addr, 0);
6258       if (GET_CODE (addr) == PLUS)
6259 	  {
6260             new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6261 	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6262 	  }
6263 	else
6264           new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6265       new = gen_rtx_CONST (Pmode, new);
6266       new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6267 
6268       if (reg != 0)
6269 	{
6270 	  emit_move_insn (reg, new);
6271 	  new = reg;
6272 	}
6273     }
6274   else if (GET_CODE (addr) == SYMBOL_REF)
6275     {
6276       if (TARGET_64BIT)
6277 	{
6278 	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6279 	  new = gen_rtx_CONST (Pmode, new);
6280 	  new = gen_rtx_MEM (Pmode, new);
6281 	  RTX_UNCHANGING_P (new) = 1;
6282 	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6283 
6284 	  if (reg == 0)
6285 	    reg = gen_reg_rtx (Pmode);
6286 	  /* Use directly gen_movsi, otherwise the address is loaded
6287 	     into register for CSE.  We don't want to CSE this addresses,
6288 	     instead we CSE addresses from the GOT table, so skip this.  */
6289 	  emit_insn (gen_movsi (reg, new));
6290 	  new = reg;
6291 	}
6292       else
6293 	{
6294 	  /* This symbol must be referenced via a load from the
6295 	     Global Offset Table (@GOT).  */
6296 
6297 	  if (reload_in_progress)
6298 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6299 	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6300 	  new = gen_rtx_CONST (Pmode, new);
6301 	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6302 	  new = gen_rtx_MEM (Pmode, new);
6303 	  RTX_UNCHANGING_P (new) = 1;
6304 	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6305 
6306 	  if (reg == 0)
6307 	    reg = gen_reg_rtx (Pmode);
6308 	  emit_move_insn (reg, new);
6309 	  new = reg;
6310 	}
6311     }
6312   else
6313     {
6314       if (GET_CODE (addr) == CONST)
6315 	{
6316 	  addr = XEXP (addr, 0);
6317 
6318 	  /* We must match stuff we generate before.  Assume the only
6319 	     unspecs that can get here are ours.  Not that we could do
6320 	     anything with them anyway....  */
6321 	  if (GET_CODE (addr) == UNSPEC
6322 	      || (GET_CODE (addr) == PLUS
6323 		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6324 	    return orig;
6325 	  if (GET_CODE (addr) != PLUS)
6326 	    abort ();
6327 	}
6328       if (GET_CODE (addr) == PLUS)
6329 	{
6330 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6331 
6332 	  /* Check first to see if this is a constant offset from a @GOTOFF
6333 	     symbol reference.  */
6334 	  if (local_symbolic_operand (op0, Pmode)
6335 	      && GET_CODE (op1) == CONST_INT)
6336 	    {
6337 	      if (!TARGET_64BIT)
6338 		{
6339 		  if (reload_in_progress)
6340 		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6341 		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6342 					UNSPEC_GOTOFF);
6343 		  new = gen_rtx_PLUS (Pmode, new, op1);
6344 		  new = gen_rtx_CONST (Pmode, new);
6345 		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6346 
6347 		  if (reg != 0)
6348 		    {
6349 		      emit_move_insn (reg, new);
6350 		      new = reg;
6351 		    }
6352 		}
6353 	      else
6354 		{
6355 		  if (INTVAL (op1) < -16*1024*1024
6356 		      || INTVAL (op1) >= 16*1024*1024)
6357 		    new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6358 		}
6359 	    }
6360 	  else
6361 	    {
6362 	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6363 	      new  = legitimize_pic_address (XEXP (addr, 1),
6364 					     base == reg ? NULL_RTX : reg);
6365 
6366 	      if (GET_CODE (new) == CONST_INT)
6367 		new = plus_constant (base, INTVAL (new));
6368 	      else
6369 		{
6370 		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6371 		    {
6372 		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6373 		      new = XEXP (new, 1);
6374 		    }
6375 		  new = gen_rtx_PLUS (Pmode, base, new);
6376 		}
6377 	    }
6378 	}
6379     }
6380   return new;
6381 }
6382 
6383 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6384 
6385 static rtx
get_thread_pointer(int to_reg)6386 get_thread_pointer (int to_reg)
6387 {
6388   rtx tp, reg, insn;
6389 
6390   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6391   if (!to_reg)
6392     return tp;
6393 
6394   reg = gen_reg_rtx (Pmode);
6395   insn = gen_rtx_SET (VOIDmode, reg, tp);
6396   insn = emit_insn (insn);
6397 
6398   return reg;
6399 }
6400 
6401 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6402    false if we expect this to be used for a memory address and true if
6403    we expect to load the address into a register.  */
6404 
6405 static rtx
legitimize_tls_address(rtx x,enum tls_model model,int for_mov)6406 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6407 {
6408   rtx dest, base, off, pic;
6409   int type;
6410 
6411   switch (model)
6412     {
6413     case TLS_MODEL_GLOBAL_DYNAMIC:
6414       dest = gen_reg_rtx (Pmode);
6415       if (TARGET_64BIT)
6416 	{
6417 	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6418 
6419 	  start_sequence ();
6420 	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6421 	  insns = get_insns ();
6422 	  end_sequence ();
6423 
6424 	  emit_libcall_block (insns, dest, rax, x);
6425 	}
6426       else
6427 	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6428       break;
6429 
6430     case TLS_MODEL_LOCAL_DYNAMIC:
6431       base = gen_reg_rtx (Pmode);
6432       if (TARGET_64BIT)
6433 	{
6434 	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6435 
6436 	  start_sequence ();
6437 	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6438 	  insns = get_insns ();
6439 	  end_sequence ();
6440 
6441 	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6442 	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6443 	  emit_libcall_block (insns, base, rax, note);
6444 	}
6445       else
6446 	emit_insn (gen_tls_local_dynamic_base_32 (base));
6447 
6448       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6449       off = gen_rtx_CONST (Pmode, off);
6450 
6451       return gen_rtx_PLUS (Pmode, base, off);
6452 
6453     case TLS_MODEL_INITIAL_EXEC:
6454       if (TARGET_64BIT)
6455 	{
6456 	  pic = NULL;
6457 	  type = UNSPEC_GOTNTPOFF;
6458 	}
6459       else if (flag_pic)
6460 	{
6461 	  if (reload_in_progress)
6462 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6463 	  pic = pic_offset_table_rtx;
6464 	  type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6465 	}
6466       else if (!TARGET_GNU_TLS)
6467 	{
6468 	  pic = gen_reg_rtx (Pmode);
6469 	  emit_insn (gen_set_got (pic));
6470 	  type = UNSPEC_GOTTPOFF;
6471 	}
6472       else
6473 	{
6474 	  pic = NULL;
6475 	  type = UNSPEC_INDNTPOFF;
6476 	}
6477 
6478       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6479       off = gen_rtx_CONST (Pmode, off);
6480       if (pic)
6481 	off = gen_rtx_PLUS (Pmode, pic, off);
6482       off = gen_rtx_MEM (Pmode, off);
6483       RTX_UNCHANGING_P (off) = 1;
6484       set_mem_alias_set (off, ix86_GOT_alias_set ());
6485 
6486       if (TARGET_64BIT || TARGET_GNU_TLS)
6487 	{
6488           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6489 	  off = force_reg (Pmode, off);
6490 	  return gen_rtx_PLUS (Pmode, base, off);
6491 	}
6492       else
6493 	{
6494 	  base = get_thread_pointer (true);
6495 	  dest = gen_reg_rtx (Pmode);
6496 	  emit_insn (gen_subsi3 (dest, base, off));
6497 	}
6498       break;
6499 
6500     case TLS_MODEL_LOCAL_EXEC:
6501       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6502 			    (TARGET_64BIT || TARGET_GNU_TLS)
6503 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6504       off = gen_rtx_CONST (Pmode, off);
6505 
6506       if (TARGET_64BIT || TARGET_GNU_TLS)
6507 	{
6508 	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6509 	  return gen_rtx_PLUS (Pmode, base, off);
6510 	}
6511       else
6512 	{
6513 	  base = get_thread_pointer (true);
6514 	  dest = gen_reg_rtx (Pmode);
6515 	  emit_insn (gen_subsi3 (dest, base, off));
6516 	}
6517       break;
6518 
6519     default:
6520       abort ();
6521     }
6522 
6523   return dest;
6524 }
6525 
6526 /* Try machine-dependent ways of modifying an illegitimate address
6527    to be legitimate.  If we find one, return the new, valid address.
6528    This macro is used in only one place: `memory_address' in explow.c.
6529 
6530    OLDX is the address as it was before break_out_memory_refs was called.
6531    In some cases it is useful to look at this to decide what needs to be done.
6532 
6533    MODE and WIN are passed so that this macro can use
6534    GO_IF_LEGITIMATE_ADDRESS.
6535 
6536    It is always safe for this macro to do nothing.  It exists to recognize
6537    opportunities to optimize the output.
6538 
6539    For the 80386, we handle X+REG by loading X into a register R and
6540    using R+REG.  R will go in a general reg and indexing will be used.
6541    However, if REG is a broken-out memory address or multiplication,
6542    nothing needs to be done because REG can certainly go in a general reg.
6543 
6544    When -fpic is used, special handling is needed for symbolic references.
6545    See comments by legitimize_pic_address in i386.c for details.  */
6546 
6547 rtx
legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode)6548 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6549 {
6550   int changed = 0;
6551   unsigned log;
6552 
6553   if (TARGET_DEBUG_ADDR)
6554     {
6555       fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6556 	       GET_MODE_NAME (mode));
6557       debug_rtx (x);
6558     }
6559 
6560   log = tls_symbolic_operand (x, mode);
6561   if (log)
6562     return legitimize_tls_address (x, log, false);
6563 
6564   if (flag_pic && SYMBOLIC_CONST (x))
6565     return legitimize_pic_address (x, 0);
6566 
6567   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6568   if (GET_CODE (x) == ASHIFT
6569       && GET_CODE (XEXP (x, 1)) == CONST_INT
6570       && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6571     {
6572       changed = 1;
6573       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6574 			GEN_INT (1 << log));
6575     }
6576 
6577   if (GET_CODE (x) == PLUS)
6578     {
6579       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6580 
6581       if (GET_CODE (XEXP (x, 0)) == ASHIFT
6582 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6583 	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6584 	{
6585 	  changed = 1;
6586 	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6587 				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6588 				      GEN_INT (1 << log));
6589 	}
6590 
6591       if (GET_CODE (XEXP (x, 1)) == ASHIFT
6592 	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6593 	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6594 	{
6595 	  changed = 1;
6596 	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6597 				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6598 				      GEN_INT (1 << log));
6599 	}
6600 
6601       /* Put multiply first if it isn't already.  */
6602       if (GET_CODE (XEXP (x, 1)) == MULT)
6603 	{
6604 	  rtx tmp = XEXP (x, 0);
6605 	  XEXP (x, 0) = XEXP (x, 1);
6606 	  XEXP (x, 1) = tmp;
6607 	  changed = 1;
6608 	}
6609 
6610       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6611 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6612 	 created by virtual register instantiation, register elimination, and
6613 	 similar optimizations.  */
6614       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6615 	{
6616 	  changed = 1;
6617 	  x = gen_rtx_PLUS (Pmode,
6618 			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6619 					  XEXP (XEXP (x, 1), 0)),
6620 			    XEXP (XEXP (x, 1), 1));
6621 	}
6622 
6623       /* Canonicalize
6624 	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6625 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6626       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6627 	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6628 	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6629 	       && CONSTANT_P (XEXP (x, 1)))
6630 	{
6631 	  rtx constant;
6632 	  rtx other = NULL_RTX;
6633 
6634 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6635 	    {
6636 	      constant = XEXP (x, 1);
6637 	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6638 	    }
6639 	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6640 	    {
6641 	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6642 	      other = XEXP (x, 1);
6643 	    }
6644 	  else
6645 	    constant = 0;
6646 
6647 	  if (constant)
6648 	    {
6649 	      changed = 1;
6650 	      x = gen_rtx_PLUS (Pmode,
6651 				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6652 					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6653 				plus_constant (other, INTVAL (constant)));
6654 	    }
6655 	}
6656 
6657       if (changed && legitimate_address_p (mode, x, FALSE))
6658 	return x;
6659 
6660       if (GET_CODE (XEXP (x, 0)) == MULT)
6661 	{
6662 	  changed = 1;
6663 	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6664 	}
6665 
6666       if (GET_CODE (XEXP (x, 1)) == MULT)
6667 	{
6668 	  changed = 1;
6669 	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6670 	}
6671 
6672       if (changed
6673 	  && GET_CODE (XEXP (x, 1)) == REG
6674 	  && GET_CODE (XEXP (x, 0)) == REG)
6675 	return x;
6676 
6677       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6678 	{
6679 	  changed = 1;
6680 	  x = legitimize_pic_address (x, 0);
6681 	}
6682 
6683       if (changed && legitimate_address_p (mode, x, FALSE))
6684 	return x;
6685 
6686       if (GET_CODE (XEXP (x, 0)) == REG)
6687 	{
6688 	  rtx temp = gen_reg_rtx (Pmode);
6689 	  rtx val  = force_operand (XEXP (x, 1), temp);
6690 	  if (val != temp)
6691 	    emit_move_insn (temp, val);
6692 
6693 	  XEXP (x, 1) = temp;
6694 	  return x;
6695 	}
6696 
6697       else if (GET_CODE (XEXP (x, 1)) == REG)
6698 	{
6699 	  rtx temp = gen_reg_rtx (Pmode);
6700 	  rtx val  = force_operand (XEXP (x, 0), temp);
6701 	  if (val != temp)
6702 	    emit_move_insn (temp, val);
6703 
6704 	  XEXP (x, 0) = temp;
6705 	  return x;
6706 	}
6707     }
6708 
6709   return x;
6710 }
6711 
6712 /* Print an integer constant expression in assembler syntax.  Addition
6713    and subtraction are the only arithmetic that may appear in these
6714    expressions.  FILE is the stdio stream to write to, X is the rtx, and
6715    CODE is the operand print code from the output string.  */
6716 
6717 static void
output_pic_addr_const(FILE * file,rtx x,int code)6718 output_pic_addr_const (FILE *file, rtx x, int code)
6719 {
6720   char buf[256];
6721 
6722   switch (GET_CODE (x))
6723     {
6724     case PC:
6725       if (flag_pic)
6726 	putc ('.', file);
6727       else
6728 	abort ();
6729       break;
6730 
6731     case SYMBOL_REF:
6732       assemble_name (file, XSTR (x, 0));
6733       if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6734 	fputs ("@PLT", file);
6735       break;
6736 
6737     case LABEL_REF:
6738       x = XEXP (x, 0);
6739       /* FALLTHRU */
6740     case CODE_LABEL:
6741       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6742       assemble_name (asm_out_file, buf);
6743       break;
6744 
6745     case CONST_INT:
6746       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6747       break;
6748 
6749     case CONST:
6750       /* This used to output parentheses around the expression,
6751 	 but that does not work on the 386 (either ATT or BSD assembler).  */
6752       output_pic_addr_const (file, XEXP (x, 0), code);
6753       break;
6754 
6755     case CONST_DOUBLE:
6756       if (GET_MODE (x) == VOIDmode)
6757 	{
6758 	  /* We can use %d if the number is <32 bits and positive.  */
6759 	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6760 	    fprintf (file, "0x%lx%08lx",
6761 		     (unsigned long) CONST_DOUBLE_HIGH (x),
6762 		     (unsigned long) CONST_DOUBLE_LOW (x));
6763 	  else
6764 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6765 	}
6766       else
6767 	/* We can't handle floating point constants;
6768 	   PRINT_OPERAND must handle them.  */
6769 	output_operand_lossage ("floating constant misused");
6770       break;
6771 
6772     case PLUS:
6773       /* Some assemblers need integer constants to appear first.  */
6774       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6775 	{
6776 	  output_pic_addr_const (file, XEXP (x, 0), code);
6777 	  putc ('+', file);
6778 	  output_pic_addr_const (file, XEXP (x, 1), code);
6779 	}
6780       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6781 	{
6782 	  output_pic_addr_const (file, XEXP (x, 1), code);
6783 	  putc ('+', file);
6784 	  output_pic_addr_const (file, XEXP (x, 0), code);
6785 	}
6786       else
6787 	abort ();
6788       break;
6789 
6790     case MINUS:
6791       if (!TARGET_MACHO)
6792 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6793       output_pic_addr_const (file, XEXP (x, 0), code);
6794       putc ('-', file);
6795       output_pic_addr_const (file, XEXP (x, 1), code);
6796       if (!TARGET_MACHO)
6797 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6798       break;
6799 
6800      case UNSPEC:
6801        if (XVECLEN (x, 0) != 1)
6802 	 abort ();
6803        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6804        switch (XINT (x, 1))
6805 	{
6806 	case UNSPEC_GOT:
6807 	  fputs ("@GOT", file);
6808 	  break;
6809 	case UNSPEC_GOTOFF:
6810 	  fputs ("@GOTOFF", file);
6811 	  break;
6812 	case UNSPEC_GOTPCREL:
6813 	  fputs ("@GOTPCREL(%rip)", file);
6814 	  break;
6815 	case UNSPEC_GOTTPOFF:
6816 	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6817 	  fputs ("@GOTTPOFF", file);
6818 	  break;
6819 	case UNSPEC_TPOFF:
6820 	  fputs ("@TPOFF", file);
6821 	  break;
6822 	case UNSPEC_NTPOFF:
6823 	  if (TARGET_64BIT)
6824 	    fputs ("@TPOFF", file);
6825 	  else
6826 	    fputs ("@NTPOFF", file);
6827 	  break;
6828 	case UNSPEC_DTPOFF:
6829 	  fputs ("@DTPOFF", file);
6830 	  break;
6831 	case UNSPEC_GOTNTPOFF:
6832 	  if (TARGET_64BIT)
6833 	    fputs ("@GOTTPOFF(%rip)", file);
6834 	  else
6835 	    fputs ("@GOTNTPOFF", file);
6836 	  break;
6837 	case UNSPEC_INDNTPOFF:
6838 	  fputs ("@INDNTPOFF", file);
6839 	  break;
6840 	default:
6841 	  output_operand_lossage ("invalid UNSPEC as operand");
6842 	  break;
6843 	}
6844        break;
6845 
6846     default:
6847       output_operand_lossage ("invalid expression as operand");
6848     }
6849 }
6850 
6851 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6852    We need to handle our special PIC relocations.  */
6853 
6854 void
i386_dwarf_output_addr_const(FILE * file,rtx x)6855 i386_dwarf_output_addr_const (FILE *file, rtx x)
6856 {
6857 #ifdef ASM_QUAD
6858   fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6859 #else
6860   if (TARGET_64BIT)
6861     abort ();
6862   fprintf (file, "%s", ASM_LONG);
6863 #endif
6864   if (flag_pic)
6865     output_pic_addr_const (file, x, '\0');
6866   else
6867     output_addr_const (file, x);
6868   fputc ('\n', file);
6869 }
6870 
6871 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6872    We need to emit DTP-relative relocations.  */
6873 
6874 void
i386_output_dwarf_dtprel(FILE * file,int size,rtx x)6875 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6876 {
6877   fputs (ASM_LONG, file);
6878   output_addr_const (file, x);
6879   fputs ("@DTPOFF", file);
6880   switch (size)
6881     {
6882     case 4:
6883       break;
6884     case 8:
6885       fputs (", 0", file);
6886       break;
6887     default:
6888       abort ();
6889    }
6890 }
6891 
6892 /* In the name of slightly smaller debug output, and to cater to
6893    general assembler losage, recognize PIC+GOTOFF and turn it back
6894    into a direct symbol reference.  */
6895 
6896 static rtx
ix86_delegitimize_address(rtx orig_x)6897 ix86_delegitimize_address (rtx orig_x)
6898 {
6899   rtx x = orig_x, y;
6900 
6901   if (GET_CODE (x) == MEM)
6902     x = XEXP (x, 0);
6903 
6904   if (TARGET_64BIT)
6905     {
6906       if (GET_CODE (x) != CONST
6907 	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6908 	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6909 	  || GET_CODE (orig_x) != MEM)
6910 	return orig_x;
6911       return XVECEXP (XEXP (x, 0), 0, 0);
6912     }
6913 
6914   if (GET_CODE (x) != PLUS
6915       || GET_CODE (XEXP (x, 1)) != CONST)
6916     return orig_x;
6917 
6918   if (GET_CODE (XEXP (x, 0)) == REG
6919       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6920     /* %ebx + GOT/GOTOFF */
6921     y = NULL;
6922   else if (GET_CODE (XEXP (x, 0)) == PLUS)
6923     {
6924       /* %ebx + %reg * scale + GOT/GOTOFF */
6925       y = XEXP (x, 0);
6926       if (GET_CODE (XEXP (y, 0)) == REG
6927 	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6928 	y = XEXP (y, 1);
6929       else if (GET_CODE (XEXP (y, 1)) == REG
6930 	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6931 	y = XEXP (y, 0);
6932       else
6933 	return orig_x;
6934       if (GET_CODE (y) != REG
6935 	  && GET_CODE (y) != MULT
6936 	  && GET_CODE (y) != ASHIFT)
6937 	return orig_x;
6938     }
6939   else
6940     return orig_x;
6941 
6942   x = XEXP (XEXP (x, 1), 0);
6943   if (GET_CODE (x) == UNSPEC
6944       && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6945 	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6946     {
6947       if (y)
6948 	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6949       return XVECEXP (x, 0, 0);
6950     }
6951 
6952   if (GET_CODE (x) == PLUS
6953       && GET_CODE (XEXP (x, 0)) == UNSPEC
6954       && GET_CODE (XEXP (x, 1)) == CONST_INT
6955       && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6956 	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6957 	      && GET_CODE (orig_x) != MEM)))
6958     {
6959       x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6960       if (y)
6961 	return gen_rtx_PLUS (Pmode, y, x);
6962       return x;
6963     }
6964 
6965   return orig_x;
6966 }
6967 
6968 static void
put_condition_code(enum rtx_code code,enum machine_mode mode,int reverse,int fp,FILE * file)6969 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6970 		    int fp, FILE *file)
6971 {
6972   const char *suffix;
6973 
6974   if (mode == CCFPmode || mode == CCFPUmode)
6975     {
6976       enum rtx_code second_code, bypass_code;
6977       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6978       if (bypass_code != NIL || second_code != NIL)
6979 	abort ();
6980       code = ix86_fp_compare_code_to_integer (code);
6981       mode = CCmode;
6982     }
6983   if (reverse)
6984     code = reverse_condition (code);
6985 
6986   switch (code)
6987     {
6988     case EQ:
6989       suffix = "e";
6990       break;
6991     case NE:
6992       suffix = "ne";
6993       break;
6994     case GT:
6995       if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6996 	abort ();
6997       suffix = "g";
6998       break;
6999     case GTU:
7000       /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7001 	 Those same assemblers have the same but opposite losage on cmov.  */
7002       if (mode != CCmode)
7003 	abort ();
7004       suffix = fp ? "nbe" : "a";
7005       break;
7006     case LT:
7007       if (mode == CCNOmode || mode == CCGOCmode)
7008 	suffix = "s";
7009       else if (mode == CCmode || mode == CCGCmode)
7010 	suffix = "l";
7011       else
7012 	abort ();
7013       break;
7014     case LTU:
7015       if (mode != CCmode)
7016 	abort ();
7017       suffix = "b";
7018       break;
7019     case GE:
7020       if (mode == CCNOmode || mode == CCGOCmode)
7021 	suffix = "ns";
7022       else if (mode == CCmode || mode == CCGCmode)
7023 	suffix = "ge";
7024       else
7025 	abort ();
7026       break;
7027     case GEU:
7028       /* ??? As above.  */
7029       if (mode != CCmode)
7030 	abort ();
7031       suffix = fp ? "nb" : "ae";
7032       break;
7033     case LE:
7034       if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7035 	abort ();
7036       suffix = "le";
7037       break;
7038     case LEU:
7039       if (mode != CCmode)
7040 	abort ();
7041       suffix = "be";
7042       break;
7043     case UNORDERED:
7044       suffix = fp ? "u" : "p";
7045       break;
7046     case ORDERED:
7047       suffix = fp ? "nu" : "np";
7048       break;
7049     default:
7050       abort ();
7051     }
7052   fputs (suffix, file);
7053 }
7054 
7055 /* Print the name of register X to FILE based on its machine mode and number.
7056    If CODE is 'w', pretend the mode is HImode.
7057    If CODE is 'b', pretend the mode is QImode.
7058    If CODE is 'k', pretend the mode is SImode.
7059    If CODE is 'q', pretend the mode is DImode.
7060    If CODE is 'h', pretend the reg is the `high' byte register.
7061    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7062 
7063 void
print_reg(rtx x,int code,FILE * file)7064 print_reg (rtx x, int code, FILE *file)
7065 {
7066   if (REGNO (x) == ARG_POINTER_REGNUM
7067       || REGNO (x) == FRAME_POINTER_REGNUM
7068       || REGNO (x) == FLAGS_REG
7069       || REGNO (x) == FPSR_REG)
7070     abort ();
7071 
7072   if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7073     putc ('%', file);
7074 
7075   if (code == 'w' || MMX_REG_P (x))
7076     code = 2;
7077   else if (code == 'b')
7078     code = 1;
7079   else if (code == 'k')
7080     code = 4;
7081   else if (code == 'q')
7082     code = 8;
7083   else if (code == 'y')
7084     code = 3;
7085   else if (code == 'h')
7086     code = 0;
7087   else
7088     code = GET_MODE_SIZE (GET_MODE (x));
7089 
7090   /* Irritatingly, AMD extended registers use different naming convention
7091      from the normal registers.  */
7092   if (REX_INT_REG_P (x))
7093     {
7094       if (!TARGET_64BIT)
7095 	abort ();
7096       switch (code)
7097 	{
7098 	  case 0:
7099 	    error ("extended registers have no high halves");
7100 	    break;
7101 	  case 1:
7102 	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7103 	    break;
7104 	  case 2:
7105 	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7106 	    break;
7107 	  case 4:
7108 	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7109 	    break;
7110 	  case 8:
7111 	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7112 	    break;
7113 	  default:
7114 	    error ("unsupported operand size for extended register");
7115 	    break;
7116 	}
7117       return;
7118     }
7119   switch (code)
7120     {
7121     case 3:
7122       if (STACK_TOP_P (x))
7123 	{
7124 	  fputs ("st(0)", file);
7125 	  break;
7126 	}
7127       /* FALLTHRU */
7128     case 8:
7129     case 4:
7130     case 12:
7131       if (! ANY_FP_REG_P (x))
7132 	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7133       /* FALLTHRU */
7134     case 16:
7135     case 2:
7136     normal:
7137       fputs (hi_reg_name[REGNO (x)], file);
7138       break;
7139     case 1:
7140       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7141 	goto normal;
7142       fputs (qi_reg_name[REGNO (x)], file);
7143       break;
7144     case 0:
7145       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7146 	goto normal;
7147       fputs (qi_high_reg_name[REGNO (x)], file);
7148       break;
7149     default:
7150       abort ();
7151     }
7152 }
7153 
7154 /* Locate some local-dynamic symbol still in use by this function
7155    so that we can print its name in some tls_local_dynamic_base
7156    pattern.  */
7157 
7158 static const char *
get_some_local_dynamic_name(void)7159 get_some_local_dynamic_name (void)
7160 {
7161   rtx insn;
7162 
7163   if (cfun->machine->some_ld_name)
7164     return cfun->machine->some_ld_name;
7165 
7166   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7167     if (INSN_P (insn)
7168 	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7169       return cfun->machine->some_ld_name;
7170 
7171   abort ();
7172 }
7173 
7174 static int
get_some_local_dynamic_name_1(rtx * px,void * data ATTRIBUTE_UNUSED)7175 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7176 {
7177   rtx x = *px;
7178 
7179   if (GET_CODE (x) == SYMBOL_REF
7180       && local_dynamic_symbolic_operand (x, Pmode))
7181     {
7182       cfun->machine->some_ld_name = XSTR (x, 0);
7183       return 1;
7184     }
7185 
7186   return 0;
7187 }
7188 
7189 /* Meaning of CODE:
7190    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7191    C -- print opcode suffix for set/cmov insn.
7192    c -- like C, but print reversed condition
7193    F,f -- likewise, but for floating-point.
7194    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7195         otherwise nothing
7196    R -- print the prefix for register names.
7197    z -- print the opcode suffix for the size of the current operand.
7198    * -- print a star (in certain assembler syntax)
7199    A -- print an absolute memory reference.
7200    w -- print the operand as if it's a "word" (HImode) even if it isn't.
7201    s -- print a shift double count, followed by the assemblers argument
7202 	delimiter.
7203    b -- print the QImode name of the register for the indicated operand.
7204 	%b0 would print %al if operands[0] is reg 0.
7205    w --  likewise, print the HImode name of the register.
7206    k --  likewise, print the SImode name of the register.
7207    q --  likewise, print the DImode name of the register.
7208    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7209    y -- print "st(0)" instead of "st" as a register.
7210    D -- print condition for SSE cmp instruction.
7211    P -- if PIC, print an @PLT suffix.
7212    X -- don't print any sort of PIC '@' suffix for a symbol.
7213    & -- print some in-use local-dynamic symbol name.
7214  */
7215 
7216 void
print_operand(FILE * file,rtx x,int code)7217 print_operand (FILE *file, rtx x, int code)
7218 {
7219   if (code)
7220     {
7221       switch (code)
7222 	{
7223 	case '*':
7224 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7225 	    putc ('*', file);
7226 	  return;
7227 
7228 	case '&':
7229 	  assemble_name (file, get_some_local_dynamic_name ());
7230 	  return;
7231 
7232 	case 'A':
7233 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7234 	    putc ('*', file);
7235 	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
7236 	    {
7237 	      /* Intel syntax. For absolute addresses, registers should not
7238 		 be surrounded by braces.  */
7239 	      if (GET_CODE (x) != REG)
7240 		{
7241 		  putc ('[', file);
7242 		  PRINT_OPERAND (file, x, 0);
7243 		  putc (']', file);
7244 		  return;
7245 		}
7246 	    }
7247 	  else
7248 	    abort ();
7249 
7250 	  PRINT_OPERAND (file, x, 0);
7251 	  return;
7252 
7253 
7254 	case 'L':
7255 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7256 	    putc ('l', file);
7257 	  return;
7258 
7259 	case 'W':
7260 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7261 	    putc ('w', file);
7262 	  return;
7263 
7264 	case 'B':
7265 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7266 	    putc ('b', file);
7267 	  return;
7268 
7269 	case 'Q':
7270 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7271 	    putc ('l', file);
7272 	  return;
7273 
7274 	case 'S':
7275 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7276 	    putc ('s', file);
7277 	  return;
7278 
7279 	case 'T':
7280 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7281 	    putc ('t', file);
7282 	  return;
7283 
7284 	case 'z':
7285 	  /* 387 opcodes don't get size suffixes if the operands are
7286 	     registers.  */
7287 	  if (STACK_REG_P (x))
7288 	    return;
7289 
7290 	  /* Likewise if using Intel opcodes.  */
7291 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7292 	    return;
7293 
7294 	  /* This is the size of op from size of operand.  */
7295 	  switch (GET_MODE_SIZE (GET_MODE (x)))
7296 	    {
7297 	    case 2:
7298 #ifdef HAVE_GAS_FILDS_FISTS
7299 	      putc ('s', file);
7300 #endif
7301 	      return;
7302 
7303 	    case 4:
7304 	      if (GET_MODE (x) == SFmode)
7305 		{
7306 		  putc ('s', file);
7307 		  return;
7308 		}
7309 	      else
7310 		putc ('l', file);
7311 	      return;
7312 
7313 	    case 12:
7314 	    case 16:
7315 	      putc ('t', file);
7316 	      return;
7317 
7318 	    case 8:
7319 	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7320 		{
7321 #ifdef GAS_MNEMONICS
7322 		  putc ('q', file);
7323 #else
7324 		  putc ('l', file);
7325 		  putc ('l', file);
7326 #endif
7327 		}
7328 	      else
7329 	        putc ('l', file);
7330 	      return;
7331 
7332 	    default:
7333 	      abort ();
7334 	    }
7335 
7336 	case 'b':
7337 	case 'w':
7338 	case 'k':
7339 	case 'q':
7340 	case 'h':
7341 	case 'y':
7342 	case 'X':
7343 	case 'P':
7344 	  break;
7345 
7346 	case 's':
7347 	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7348 	    {
7349 	      PRINT_OPERAND (file, x, 0);
7350 	      putc (',', file);
7351 	    }
7352 	  return;
7353 
7354 	case 'D':
7355 	  /* Little bit of braindamage here.  The SSE compare instructions
7356 	     does use completely different names for the comparisons that the
7357 	     fp conditional moves.  */
7358 	  switch (GET_CODE (x))
7359 	    {
7360 	    case EQ:
7361 	    case UNEQ:
7362 	      fputs ("eq", file);
7363 	      break;
7364 	    case LT:
7365 	    case UNLT:
7366 	      fputs ("lt", file);
7367 	      break;
7368 	    case LE:
7369 	    case UNLE:
7370 	      fputs ("le", file);
7371 	      break;
7372 	    case UNORDERED:
7373 	      fputs ("unord", file);
7374 	      break;
7375 	    case NE:
7376 	    case LTGT:
7377 	      fputs ("neq", file);
7378 	      break;
7379 	    case UNGE:
7380 	    case GE:
7381 	      fputs ("nlt", file);
7382 	      break;
7383 	    case UNGT:
7384 	    case GT:
7385 	      fputs ("nle", file);
7386 	      break;
7387 	    case ORDERED:
7388 	      fputs ("ord", file);
7389 	      break;
7390 	    default:
7391 	      abort ();
7392 	      break;
7393 	    }
7394 	  return;
7395 	case 'O':
7396 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7397 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7398 	    {
7399 	      switch (GET_MODE (x))
7400 		{
7401 		case HImode: putc ('w', file); break;
7402 		case SImode:
7403 		case SFmode: putc ('l', file); break;
7404 		case DImode:
7405 		case DFmode: putc ('q', file); break;
7406 		default: abort ();
7407 		}
7408 	      putc ('.', file);
7409 	    }
7410 #endif
7411 	  return;
7412 	case 'C':
7413 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7414 	  return;
7415 	case 'F':
7416 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7417 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7418 	    putc ('.', file);
7419 #endif
7420 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7421 	  return;
7422 
7423 	  /* Like above, but reverse condition */
7424 	case 'c':
7425 	  /* Check to see if argument to %c is really a constant
7426 	     and not a condition code which needs to be reversed.  */
7427 	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7428 	  {
7429 	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7430 	     return;
7431 	  }
7432 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7433 	  return;
7434 	case 'f':
7435 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7436 	  if (ASSEMBLER_DIALECT == ASM_ATT)
7437 	    putc ('.', file);
7438 #endif
7439 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7440 	  return;
7441 	case '+':
7442 	  {
7443 	    rtx x;
7444 
7445 	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7446 	      return;
7447 
7448 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7449 	    if (x)
7450 	      {
7451 		int pred_val = INTVAL (XEXP (x, 0));
7452 
7453 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7454 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7455 		  {
7456 		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7457 		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7458 
7459 		    /* Emit hints only in the case default branch prediction
7460 		       heuristics would fail.  */
7461 		    if (taken != cputaken)
7462 		      {
7463 			/* We use 3e (DS) prefix for taken branches and
7464 			   2e (CS) prefix for not taken branches.  */
7465 			if (taken)
7466 			  fputs ("ds ; ", file);
7467 			else
7468 			  fputs ("cs ; ", file);
7469 		      }
7470 		  }
7471 	      }
7472 	    return;
7473 	  }
7474 	default:
7475 	    output_operand_lossage ("invalid operand code `%c'", code);
7476 	}
7477     }
7478 
7479   if (GET_CODE (x) == REG)
7480     print_reg (x, code, file);
7481 
7482   else if (GET_CODE (x) == MEM)
7483     {
7484       /* No `byte ptr' prefix for call instructions.  */
7485       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7486 	{
7487 	  const char * size;
7488 	  switch (GET_MODE_SIZE (GET_MODE (x)))
7489 	    {
7490 	    case 1: size = "BYTE"; break;
7491 	    case 2: size = "WORD"; break;
7492 	    case 4: size = "DWORD"; break;
7493 	    case 8: size = "QWORD"; break;
7494 	    case 12: size = "XWORD"; break;
7495 	    case 16: size = "XMMWORD"; break;
7496 	    default:
7497 	      abort ();
7498 	    }
7499 
7500 	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7501 	  if (code == 'b')
7502 	    size = "BYTE";
7503 	  else if (code == 'w')
7504 	    size = "WORD";
7505 	  else if (code == 'k')
7506 	    size = "DWORD";
7507 
7508 	  fputs (size, file);
7509 	  fputs (" PTR ", file);
7510 	}
7511 
7512       x = XEXP (x, 0);
7513       /* Avoid (%rip) for call operands.  */
7514       if (CONSTANT_ADDRESS_P (x) && code == 'P'
7515 	       && GET_CODE (x) != CONST_INT)
7516 	output_addr_const (file, x);
7517       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7518 	output_operand_lossage ("invalid constraints for operand");
7519       else
7520 	output_address (x);
7521     }
7522 
7523   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7524     {
7525       REAL_VALUE_TYPE r;
7526       long l;
7527 
7528       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7529       REAL_VALUE_TO_TARGET_SINGLE (r, l);
7530 
7531       if (ASSEMBLER_DIALECT == ASM_ATT)
7532 	putc ('$', file);
7533       fprintf (file, "0x%08lx", l);
7534     }
7535 
7536   /* These float cases don't actually occur as immediate operands.  */
7537   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7538     {
7539       char dstr[30];
7540 
7541       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7542       fprintf (file, "%s", dstr);
7543     }
7544 
7545   else if (GET_CODE (x) == CONST_DOUBLE
7546 	   && GET_MODE (x) == XFmode)
7547     {
7548       char dstr[30];
7549 
7550       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7551       fprintf (file, "%s", dstr);
7552     }
7553 
7554   else
7555     {
7556       if (code != 'P')
7557 	{
7558 	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7559 	    {
7560 	      if (ASSEMBLER_DIALECT == ASM_ATT)
7561 		putc ('$', file);
7562 	    }
7563 	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7564 		   || GET_CODE (x) == LABEL_REF)
7565 	    {
7566 	      if (ASSEMBLER_DIALECT == ASM_ATT)
7567 		putc ('$', file);
7568 	      else
7569 		fputs ("OFFSET FLAT:", file);
7570 	    }
7571 	}
7572       if (GET_CODE (x) == CONST_INT)
7573 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7574       else if (flag_pic)
7575 	output_pic_addr_const (file, x, code);
7576       else
7577 	output_addr_const (file, x);
7578     }
7579 }
7580 
7581 /* Print a memory operand whose address is ADDR.  */
7582 
7583 void
print_operand_address(FILE * file,rtx addr)7584 print_operand_address (FILE *file, rtx addr)
7585 {
7586   struct ix86_address parts;
7587   rtx base, index, disp;
7588   int scale;
7589 
7590   if (! ix86_decompose_address (addr, &parts))
7591     abort ();
7592 
7593   base = parts.base;
7594   index = parts.index;
7595   disp = parts.disp;
7596   scale = parts.scale;
7597 
7598   switch (parts.seg)
7599     {
7600     case SEG_DEFAULT:
7601       break;
7602     case SEG_FS:
7603     case SEG_GS:
7604       if (USER_LABEL_PREFIX[0] == 0)
7605 	putc ('%', file);
7606       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7607       break;
7608     default:
7609       abort ();
7610     }
7611 
7612   if (!base && !index)
7613     {
7614       /* Displacement only requires special attention.  */
7615 
7616       if (GET_CODE (disp) == CONST_INT)
7617 	{
7618 	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7619 	    {
7620 	      if (USER_LABEL_PREFIX[0] == 0)
7621 		putc ('%', file);
7622 	      fputs ("ds:", file);
7623 	    }
7624 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7625 	}
7626       else if (flag_pic)
7627 	output_pic_addr_const (file, disp, 0);
7628       else
7629 	output_addr_const (file, disp);
7630 
7631       /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7632       if (TARGET_64BIT
7633 	  && ((GET_CODE (disp) == SYMBOL_REF
7634 	       && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7635 	      || GET_CODE (disp) == LABEL_REF
7636 	      || (GET_CODE (disp) == CONST
7637 		  && GET_CODE (XEXP (disp, 0)) == PLUS
7638 		  && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7639 		      || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7640 		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7641 	fputs ("(%rip)", file);
7642     }
7643   else
7644     {
7645       if (ASSEMBLER_DIALECT == ASM_ATT)
7646 	{
7647 	  if (disp)
7648 	    {
7649 	      if (flag_pic)
7650 		output_pic_addr_const (file, disp, 0);
7651 	      else if (GET_CODE (disp) == LABEL_REF)
7652 		output_asm_label (disp);
7653 	      else
7654 		output_addr_const (file, disp);
7655 	    }
7656 
7657 	  putc ('(', file);
7658 	  if (base)
7659 	    print_reg (base, 0, file);
7660 	  if (index)
7661 	    {
7662 	      putc (',', file);
7663 	      print_reg (index, 0, file);
7664 	      if (scale != 1)
7665 		fprintf (file, ",%d", scale);
7666 	    }
7667 	  putc (')', file);
7668 	}
7669       else
7670 	{
7671 	  rtx offset = NULL_RTX;
7672 
7673 	  if (disp)
7674 	    {
7675 	      /* Pull out the offset of a symbol; print any symbol itself.  */
7676 	      if (GET_CODE (disp) == CONST
7677 		  && GET_CODE (XEXP (disp, 0)) == PLUS
7678 		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7679 		{
7680 		  offset = XEXP (XEXP (disp, 0), 1);
7681 		  disp = gen_rtx_CONST (VOIDmode,
7682 					XEXP (XEXP (disp, 0), 0));
7683 		}
7684 
7685 	      if (flag_pic)
7686 		output_pic_addr_const (file, disp, 0);
7687 	      else if (GET_CODE (disp) == LABEL_REF)
7688 		output_asm_label (disp);
7689 	      else if (GET_CODE (disp) == CONST_INT)
7690 		offset = disp;
7691 	      else
7692 		output_addr_const (file, disp);
7693 	    }
7694 
7695 	  putc ('[', file);
7696 	  if (base)
7697 	    {
7698 	      print_reg (base, 0, file);
7699 	      if (offset)
7700 		{
7701 		  if (INTVAL (offset) >= 0)
7702 		    putc ('+', file);
7703 		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7704 		}
7705 	    }
7706 	  else if (offset)
7707 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7708 	  else
7709 	    putc ('0', file);
7710 
7711 	  if (index)
7712 	    {
7713 	      putc ('+', file);
7714 	      print_reg (index, 0, file);
7715 	      if (scale != 1)
7716 		fprintf (file, "*%d", scale);
7717 	    }
7718 	  putc (']', file);
7719 	}
7720     }
7721 }
7722 
7723 bool
output_addr_const_extra(FILE * file,rtx x)7724 output_addr_const_extra (FILE *file, rtx x)
7725 {
7726   rtx op;
7727 
7728   if (GET_CODE (x) != UNSPEC)
7729     return false;
7730 
7731   op = XVECEXP (x, 0, 0);
7732   switch (XINT (x, 1))
7733     {
7734     case UNSPEC_GOTTPOFF:
7735       output_addr_const (file, op);
7736       /* FIXME: This might be @TPOFF in Sun ld.  */
7737       fputs ("@GOTTPOFF", file);
7738       break;
7739     case UNSPEC_TPOFF:
7740       output_addr_const (file, op);
7741       fputs ("@TPOFF", file);
7742       break;
7743     case UNSPEC_NTPOFF:
7744       output_addr_const (file, op);
7745       if (TARGET_64BIT)
7746 	fputs ("@TPOFF", file);
7747       else
7748 	fputs ("@NTPOFF", file);
7749       break;
7750     case UNSPEC_DTPOFF:
7751       output_addr_const (file, op);
7752       fputs ("@DTPOFF", file);
7753       break;
7754     case UNSPEC_GOTNTPOFF:
7755       output_addr_const (file, op);
7756       if (TARGET_64BIT)
7757 	fputs ("@GOTTPOFF(%rip)", file);
7758       else
7759 	fputs ("@GOTNTPOFF", file);
7760       break;
7761     case UNSPEC_INDNTPOFF:
7762       output_addr_const (file, op);
7763       fputs ("@INDNTPOFF", file);
7764       break;
7765 
7766     default:
7767       return false;
7768     }
7769 
7770   return true;
7771 }
7772 
7773 /* Split one or more DImode RTL references into pairs of SImode
7774    references.  The RTL can be REG, offsettable MEM, integer constant, or
7775    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7776    split and "num" is its length.  lo_half and hi_half are output arrays
7777    that parallel "operands".  */
7778 
7779 void
split_di(rtx operands[],int num,rtx lo_half[],rtx hi_half[])7780 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7781 {
7782   while (num--)
7783     {
7784       rtx op = operands[num];
7785 
7786       /* simplify_subreg refuse to split volatile memory addresses,
7787          but we still have to handle it.  */
7788       if (GET_CODE (op) == MEM)
7789 	{
7790 	  lo_half[num] = adjust_address (op, SImode, 0);
7791 	  hi_half[num] = adjust_address (op, SImode, 4);
7792 	}
7793       else
7794 	{
7795 	  lo_half[num] = simplify_gen_subreg (SImode, op,
7796 					      GET_MODE (op) == VOIDmode
7797 					      ? DImode : GET_MODE (op), 0);
7798 	  hi_half[num] = simplify_gen_subreg (SImode, op,
7799 					      GET_MODE (op) == VOIDmode
7800 					      ? DImode : GET_MODE (op), 4);
7801 	}
7802     }
7803 }
7804 /* Split one or more TImode RTL references into pairs of SImode
7805    references.  The RTL can be REG, offsettable MEM, integer constant, or
7806    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7807    split and "num" is its length.  lo_half and hi_half are output arrays
7808    that parallel "operands".  */
7809 
7810 void
split_ti(rtx operands[],int num,rtx lo_half[],rtx hi_half[])7811 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7812 {
7813   while (num--)
7814     {
7815       rtx op = operands[num];
7816 
7817       /* simplify_subreg refuse to split volatile memory addresses, but we
7818          still have to handle it.  */
7819       if (GET_CODE (op) == MEM)
7820 	{
7821 	  lo_half[num] = adjust_address (op, DImode, 0);
7822 	  hi_half[num] = adjust_address (op, DImode, 8);
7823 	}
7824       else
7825 	{
7826 	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7827 	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7828 	}
7829     }
7830 }
7831 
7832 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7833    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7834    is the expression of the binary operation.  The output may either be
7835    emitted here, or returned to the caller, like all output_* functions.
7836 
7837    There is no guarantee that the operands are the same mode, as they
7838    might be within FLOAT or FLOAT_EXTEND expressions.  */
7839 
7840 #ifndef SYSV386_COMPAT
7841 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7842    wants to fix the assemblers because that causes incompatibility
7843    with gcc.  No-one wants to fix gcc because that causes
7844    incompatibility with assemblers...  You can use the option of
7845    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7846 #define SYSV386_COMPAT 1
7847 #endif
7848 
7849 const char *
output_387_binary_op(rtx insn,rtx * operands)7850 output_387_binary_op (rtx insn, rtx *operands)
7851 {
7852   static char buf[30];
7853   const char *p;
7854   const char *ssep;
7855   int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7856 
7857 #ifdef ENABLE_CHECKING
7858   /* Even if we do not want to check the inputs, this documents input
7859      constraints.  Which helps in understanding the following code.  */
7860   if (STACK_REG_P (operands[0])
7861       && ((REG_P (operands[1])
7862 	   && REGNO (operands[0]) == REGNO (operands[1])
7863 	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7864 	  || (REG_P (operands[2])
7865 	      && REGNO (operands[0]) == REGNO (operands[2])
7866 	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7867       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7868     ; /* ok */
7869   else if (!is_sse)
7870     abort ();
7871 #endif
7872 
7873   switch (GET_CODE (operands[3]))
7874     {
7875     case PLUS:
7876       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7877 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7878 	p = "fiadd";
7879       else
7880 	p = "fadd";
7881       ssep = "add";
7882       break;
7883 
7884     case MINUS:
7885       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7886 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7887 	p = "fisub";
7888       else
7889 	p = "fsub";
7890       ssep = "sub";
7891       break;
7892 
7893     case MULT:
7894       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7895 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7896 	p = "fimul";
7897       else
7898 	p = "fmul";
7899       ssep = "mul";
7900       break;
7901 
7902     case DIV:
7903       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7904 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7905 	p = "fidiv";
7906       else
7907 	p = "fdiv";
7908       ssep = "div";
7909       break;
7910 
7911     default:
7912       abort ();
7913     }
7914 
7915   if (is_sse)
7916    {
7917       strcpy (buf, ssep);
7918       if (GET_MODE (operands[0]) == SFmode)
7919 	strcat (buf, "ss\t{%2, %0|%0, %2}");
7920       else
7921 	strcat (buf, "sd\t{%2, %0|%0, %2}");
7922       return buf;
7923    }
7924   strcpy (buf, p);
7925 
7926   switch (GET_CODE (operands[3]))
7927     {
7928     case MULT:
7929     case PLUS:
7930       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7931 	{
7932 	  rtx temp = operands[2];
7933 	  operands[2] = operands[1];
7934 	  operands[1] = temp;
7935 	}
7936 
7937       /* know operands[0] == operands[1].  */
7938 
7939       if (GET_CODE (operands[2]) == MEM)
7940 	{
7941 	  p = "%z2\t%2";
7942 	  break;
7943 	}
7944 
7945       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7946 	{
7947 	  if (STACK_TOP_P (operands[0]))
7948 	    /* How is it that we are storing to a dead operand[2]?
7949 	       Well, presumably operands[1] is dead too.  We can't
7950 	       store the result to st(0) as st(0) gets popped on this
7951 	       instruction.  Instead store to operands[2] (which I
7952 	       think has to be st(1)).  st(1) will be popped later.
7953 	       gcc <= 2.8.1 didn't have this check and generated
7954 	       assembly code that the Unixware assembler rejected.  */
7955 	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7956 	  else
7957 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7958 	  break;
7959 	}
7960 
7961       if (STACK_TOP_P (operands[0]))
7962 	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7963       else
7964 	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7965       break;
7966 
7967     case MINUS:
7968     case DIV:
7969       if (GET_CODE (operands[1]) == MEM)
7970 	{
7971 	  p = "r%z1\t%1";
7972 	  break;
7973 	}
7974 
7975       if (GET_CODE (operands[2]) == MEM)
7976 	{
7977 	  p = "%z2\t%2";
7978 	  break;
7979 	}
7980 
7981       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7982 	{
7983 #if SYSV386_COMPAT
7984 	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7985 	     derived assemblers, confusingly reverse the direction of
7986 	     the operation for fsub{r} and fdiv{r} when the
7987 	     destination register is not st(0).  The Intel assembler
7988 	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
7989 	     figure out what the hardware really does.  */
7990 	  if (STACK_TOP_P (operands[0]))
7991 	    p = "{p\t%0, %2|rp\t%2, %0}";
7992 	  else
7993 	    p = "{rp\t%2, %0|p\t%0, %2}";
7994 #else
7995 	  if (STACK_TOP_P (operands[0]))
7996 	    /* As above for fmul/fadd, we can't store to st(0).  */
7997 	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7998 	  else
7999 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8000 #endif
8001 	  break;
8002 	}
8003 
8004       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8005 	{
8006 #if SYSV386_COMPAT
8007 	  if (STACK_TOP_P (operands[0]))
8008 	    p = "{rp\t%0, %1|p\t%1, %0}";
8009 	  else
8010 	    p = "{p\t%1, %0|rp\t%0, %1}";
8011 #else
8012 	  if (STACK_TOP_P (operands[0]))
8013 	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8014 	  else
8015 	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8016 #endif
8017 	  break;
8018 	}
8019 
8020       if (STACK_TOP_P (operands[0]))
8021 	{
8022 	  if (STACK_TOP_P (operands[1]))
8023 	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8024 	  else
8025 	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8026 	  break;
8027 	}
8028       else if (STACK_TOP_P (operands[1]))
8029 	{
8030 #if SYSV386_COMPAT
8031 	  p = "{\t%1, %0|r\t%0, %1}";
8032 #else
8033 	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8034 #endif
8035 	}
8036       else
8037 	{
8038 #if SYSV386_COMPAT
8039 	  p = "{r\t%2, %0|\t%0, %2}";
8040 #else
8041 	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8042 #endif
8043 	}
8044       break;
8045 
8046     default:
8047       abort ();
8048     }
8049 
8050   strcat (buf, p);
8051   return buf;
8052 }
8053 
8054 /* Output code to initialize control word copies used by
8055    trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
8056    is set to control word rounding downwards.  */
8057 void
emit_i387_cw_initialization(rtx normal,rtx round_down)8058 emit_i387_cw_initialization (rtx normal, rtx round_down)
8059 {
8060   rtx reg = gen_reg_rtx (HImode);
8061 
8062   emit_insn (gen_x86_fnstcw_1 (normal));
8063   emit_move_insn (reg, normal);
8064   if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8065       && !TARGET_64BIT)
8066     emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8067   else
8068     emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8069   emit_move_insn (round_down, reg);
8070 }
8071 
8072 /* Output code for INSN to convert a float to a signed int.  OPERANDS
8073    are the insn operands.  The output may be [HSD]Imode and the input
8074    operand may be [SDX]Fmode.  */
8075 
8076 const char *
output_fix_trunc(rtx insn,rtx * operands)8077 output_fix_trunc (rtx insn, rtx *operands)
8078 {
8079   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8080   int dimode_p = GET_MODE (operands[0]) == DImode;
8081 
8082   /* Jump through a hoop or two for DImode, since the hardware has no
8083      non-popping instruction.  We used to do this a different way, but
8084      that was somewhat fragile and broke with post-reload splitters.  */
8085   if (dimode_p && !stack_top_dies)
8086     output_asm_insn ("fld\t%y1", operands);
8087 
8088   if (!STACK_TOP_P (operands[1]))
8089     abort ();
8090 
8091   if (GET_CODE (operands[0]) != MEM)
8092     abort ();
8093 
8094   output_asm_insn ("fldcw\t%3", operands);
8095   if (stack_top_dies || dimode_p)
8096     output_asm_insn ("fistp%z0\t%0", operands);
8097   else
8098     output_asm_insn ("fist%z0\t%0", operands);
8099   output_asm_insn ("fldcw\t%2", operands);
8100 
8101   return "";
8102 }
8103 
8104 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8105    should be used and 2 when fnstsw should be used.  UNORDERED_P is true
8106    when fucom should be used.  */
8107 
8108 const char *
output_fp_compare(rtx insn,rtx * operands,int eflags_p,int unordered_p)8109 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8110 {
8111   int stack_top_dies;
8112   rtx cmp_op0 = operands[0];
8113   rtx cmp_op1 = operands[1];
8114   int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8115 
8116   if (eflags_p == 2)
8117     {
8118       cmp_op0 = cmp_op1;
8119       cmp_op1 = operands[2];
8120     }
8121   if (is_sse)
8122     {
8123       if (GET_MODE (operands[0]) == SFmode)
8124 	if (unordered_p)
8125 	  return "ucomiss\t{%1, %0|%0, %1}";
8126 	else
8127 	  return "comiss\t{%1, %0|%0, %1}";
8128       else
8129 	if (unordered_p)
8130 	  return "ucomisd\t{%1, %0|%0, %1}";
8131 	else
8132 	  return "comisd\t{%1, %0|%0, %1}";
8133     }
8134 
8135   if (! STACK_TOP_P (cmp_op0))
8136     abort ();
8137 
8138   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8139 
8140   if (STACK_REG_P (cmp_op1)
8141       && stack_top_dies
8142       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8143       && REGNO (cmp_op1) != FIRST_STACK_REG)
8144     {
8145       /* If both the top of the 387 stack dies, and the other operand
8146 	 is also a stack register that dies, then this must be a
8147 	 `fcompp' float compare */
8148 
8149       if (eflags_p == 1)
8150 	{
8151 	  /* There is no double popping fcomi variant.  Fortunately,
8152 	     eflags is immune from the fstp's cc clobbering.  */
8153 	  if (unordered_p)
8154 	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8155 	  else
8156 	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8157 	  return "fstp\t%y0";
8158 	}
8159       else
8160 	{
8161 	  if (eflags_p == 2)
8162 	    {
8163 	      if (unordered_p)
8164 		return "fucompp\n\tfnstsw\t%0";
8165 	      else
8166 		return "fcompp\n\tfnstsw\t%0";
8167 	    }
8168 	  else
8169 	    {
8170 	      if (unordered_p)
8171 		return "fucompp";
8172 	      else
8173 		return "fcompp";
8174 	    }
8175 	}
8176     }
8177   else
8178     {
8179       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8180 
8181       static const char * const alt[24] =
8182       {
8183 	"fcom%z1\t%y1",
8184 	"fcomp%z1\t%y1",
8185 	"fucom%z1\t%y1",
8186 	"fucomp%z1\t%y1",
8187 
8188 	"ficom%z1\t%y1",
8189 	"ficomp%z1\t%y1",
8190 	NULL,
8191 	NULL,
8192 
8193 	"fcomi\t{%y1, %0|%0, %y1}",
8194 	"fcomip\t{%y1, %0|%0, %y1}",
8195 	"fucomi\t{%y1, %0|%0, %y1}",
8196 	"fucomip\t{%y1, %0|%0, %y1}",
8197 
8198 	NULL,
8199 	NULL,
8200 	NULL,
8201 	NULL,
8202 
8203 	"fcom%z2\t%y2\n\tfnstsw\t%0",
8204 	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8205 	"fucom%z2\t%y2\n\tfnstsw\t%0",
8206 	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8207 
8208 	"ficom%z2\t%y2\n\tfnstsw\t%0",
8209 	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8210 	NULL,
8211 	NULL
8212       };
8213 
8214       int mask;
8215       const char *ret;
8216 
8217       mask  = eflags_p << 3;
8218       mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8219       mask |= unordered_p << 1;
8220       mask |= stack_top_dies;
8221 
8222       if (mask >= 24)
8223 	abort ();
8224       ret = alt[mask];
8225       if (ret == NULL)
8226 	abort ();
8227 
8228       return ret;
8229     }
8230 }
8231 
8232 void
ix86_output_addr_vec_elt(FILE * file,int value)8233 ix86_output_addr_vec_elt (FILE *file, int value)
8234 {
8235   const char *directive = ASM_LONG;
8236 
8237   if (TARGET_64BIT)
8238     {
8239 #ifdef ASM_QUAD
8240       directive = ASM_QUAD;
8241 #else
8242       abort ();
8243 #endif
8244     }
8245 
8246   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8247 }
8248 
8249 void
ix86_output_addr_diff_elt(FILE * file,int value,int rel)8250 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8251 {
8252   if (TARGET_64BIT)
8253     fprintf (file, "%s%s%d-%s%d\n",
8254 	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
8255   else if (HAVE_AS_GOTOFF_IN_DATA)
8256     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8257 #if TARGET_MACHO
8258   else if (TARGET_MACHO)
8259     {
8260       fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8261       machopic_output_function_base_name (file);
8262       fprintf(file, "\n");
8263     }
8264 #endif
8265   else
8266     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8267 		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8268 }
8269 
8270 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8271    for the target.  */
8272 
8273 void
ix86_expand_clear(rtx dest)8274 ix86_expand_clear (rtx dest)
8275 {
8276   rtx tmp;
8277 
8278   /* We play register width games, which are only valid after reload.  */
8279   if (!reload_completed)
8280     abort ();
8281 
8282   /* Avoid HImode and its attendant prefix byte.  */
8283   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8284     dest = gen_rtx_REG (SImode, REGNO (dest));
8285 
8286   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8287 
8288   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8289   if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8290     {
8291       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8292       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8293     }
8294 
8295   emit_insn (tmp);
8296 }
8297 
8298 /* X is an unchanging MEM.  If it is a constant pool reference, return
8299    the constant pool rtx, else NULL.  */
8300 
8301 static rtx
maybe_get_pool_constant(rtx x)8302 maybe_get_pool_constant (rtx x)
8303 {
8304   x = ix86_delegitimize_address (XEXP (x, 0));
8305 
8306   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8307     return get_pool_constant (x);
8308 
8309   return NULL_RTX;
8310 }
8311 
8312 void
ix86_expand_move(enum machine_mode mode,rtx operands[])8313 ix86_expand_move (enum machine_mode mode, rtx operands[])
8314 {
8315   int strict = (reload_in_progress || reload_completed);
8316   rtx op0, op1;
8317   enum tls_model model;
8318 
8319   op0 = operands[0];
8320   op1 = operands[1];
8321 
8322   model = tls_symbolic_operand (op1, Pmode);
8323   if (model)
8324     {
8325       op1 = legitimize_tls_address (op1, model, true);
8326       op1 = force_operand (op1, op0);
8327       if (op1 == op0)
8328 	return;
8329     }
8330 
8331   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8332     {
8333 #if TARGET_MACHO
8334       if (MACHOPIC_PURE)
8335 	{
8336 	  rtx temp = ((reload_in_progress
8337 		       || ((op0 && GET_CODE (op0) == REG)
8338 			   && mode == Pmode))
8339 		      ? op0 : gen_reg_rtx (Pmode));
8340 	  op1 = machopic_indirect_data_reference (op1, temp);
8341 	  op1 = machopic_legitimize_pic_address (op1, mode,
8342 						 temp == op1 ? 0 : temp);
8343 	}
8344       else if (MACHOPIC_INDIRECT)
8345 	op1 = machopic_indirect_data_reference (op1, 0);
8346       if (op0 == op1)
8347 	return;
8348 #else
8349       if (GET_CODE (op0) == MEM)
8350 	op1 = force_reg (Pmode, op1);
8351       else
8352 	{
8353 	  rtx temp = op0;
8354 	  if (GET_CODE (temp) != REG)
8355 	    temp = gen_reg_rtx (Pmode);
8356 	  temp = legitimize_pic_address (op1, temp);
8357 	  if (temp == op0)
8358 	    return;
8359 	  op1 = temp;
8360 	}
8361 #endif /* TARGET_MACHO */
8362     }
8363   else
8364     {
8365       if (GET_CODE (op0) == MEM
8366 	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8367 	      || !push_operand (op0, mode))
8368 	  && GET_CODE (op1) == MEM)
8369 	op1 = force_reg (mode, op1);
8370 
8371       if (push_operand (op0, mode)
8372 	  && ! general_no_elim_operand (op1, mode))
8373 	op1 = copy_to_mode_reg (mode, op1);
8374 
8375       /* Force large constants in 64bit compilation into register
8376 	 to get them CSEed.  */
8377       if (TARGET_64BIT && mode == DImode
8378 	  && immediate_operand (op1, mode)
8379 	  && !x86_64_zero_extended_value (op1)
8380 	  && !register_operand (op0, mode)
8381 	  && optimize && !reload_completed && !reload_in_progress)
8382 	op1 = copy_to_mode_reg (mode, op1);
8383 
8384       if (FLOAT_MODE_P (mode))
8385 	{
8386 	  /* If we are loading a floating point constant to a register,
8387 	     force the value to memory now, since we'll get better code
8388 	     out the back end.  */
8389 
8390 	  if (strict)
8391 	    ;
8392 	  else if (GET_CODE (op1) == CONST_DOUBLE)
8393 	    {
8394 	      op1 = validize_mem (force_const_mem (mode, op1));
8395 	      if (!register_operand (op0, mode))
8396 		{
8397 		  rtx temp = gen_reg_rtx (mode);
8398 		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8399 		  emit_move_insn (op0, temp);
8400 		  return;
8401 		}
8402 	    }
8403 	}
8404     }
8405 
8406   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8407 }
8408 
8409 void
ix86_expand_vector_move(enum machine_mode mode,rtx operands[])8410 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8411 {
8412   /* Force constants other than zero into memory.  We do not know how
8413      the instructions used to build constants modify the upper 64 bits
8414      of the register, once we have that information we may be able
8415      to handle some of them more efficiently.  */
8416   if ((reload_in_progress | reload_completed) == 0
8417       && register_operand (operands[0], mode)
8418       && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8419     operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8420 
8421   /* Make operand1 a register if it isn't already.  */
8422   if (!no_new_pseudos
8423       && !register_operand (operands[0], mode)
8424       && !register_operand (operands[1], mode))
8425     {
8426       rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8427       emit_move_insn (operands[0], temp);
8428       return;
8429     }
8430 
8431   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8432 }
8433 
8434 /* Attempt to expand a binary operator.  Make the expansion closer to the
8435    actual machine, then just general_operand, which will allow 3 separate
8436    memory references (one output, two input) in a single insn.  */
8437 
8438 void
ix86_expand_binary_operator(enum rtx_code code,enum machine_mode mode,rtx operands[])8439 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8440 			     rtx operands[])
8441 {
8442   int matching_memory;
8443   rtx src1, src2, dst, op, clob;
8444 
8445   dst = operands[0];
8446   src1 = operands[1];
8447   src2 = operands[2];
8448 
8449   /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8450   if (GET_RTX_CLASS (code) == 'c'
8451       && (rtx_equal_p (dst, src2)
8452 	  || immediate_operand (src1, mode)))
8453     {
8454       rtx temp = src1;
8455       src1 = src2;
8456       src2 = temp;
8457     }
8458 
8459   /* If the destination is memory, and we do not have matching source
8460      operands, do things in registers.  */
8461   matching_memory = 0;
8462   if (GET_CODE (dst) == MEM)
8463     {
8464       if (rtx_equal_p (dst, src1))
8465 	matching_memory = 1;
8466       else if (GET_RTX_CLASS (code) == 'c'
8467 	       && rtx_equal_p (dst, src2))
8468 	matching_memory = 2;
8469       else
8470 	dst = gen_reg_rtx (mode);
8471     }
8472 
8473   /* Both source operands cannot be in memory.  */
8474   if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8475     {
8476       if (matching_memory != 2)
8477 	src2 = force_reg (mode, src2);
8478       else
8479 	src1 = force_reg (mode, src1);
8480     }
8481 
8482   /* If the operation is not commutable, source 1 cannot be a constant
8483      or non-matching memory.  */
8484   if ((CONSTANT_P (src1)
8485        || (!matching_memory && GET_CODE (src1) == MEM))
8486       && GET_RTX_CLASS (code) != 'c')
8487     src1 = force_reg (mode, src1);
8488 
8489   /* If optimizing, copy to regs to improve CSE */
8490   if (optimize && ! no_new_pseudos)
8491     {
8492       if (GET_CODE (dst) == MEM)
8493 	dst = gen_reg_rtx (mode);
8494       if (GET_CODE (src1) == MEM)
8495 	src1 = force_reg (mode, src1);
8496       if (GET_CODE (src2) == MEM)
8497 	src2 = force_reg (mode, src2);
8498     }
8499 
8500   /* Emit the instruction.  */
8501 
8502   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8503   if (reload_in_progress)
8504     {
8505       /* Reload doesn't know about the flags register, and doesn't know that
8506          it doesn't want to clobber it.  We can only do this with PLUS.  */
8507       if (code != PLUS)
8508 	abort ();
8509       emit_insn (op);
8510     }
8511   else
8512     {
8513       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8514       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8515     }
8516 
8517   /* Fix up the destination if needed.  */
8518   if (dst != operands[0])
8519     emit_move_insn (operands[0], dst);
8520 }
8521 
8522 /* Return TRUE or FALSE depending on whether the binary operator meets the
8523    appropriate constraints.  */
8524 
8525 int
ix86_binary_operator_ok(enum rtx_code code,enum machine_mode mode ATTRIBUTE_UNUSED,rtx operands[3])8526 ix86_binary_operator_ok (enum rtx_code code,
8527 			 enum machine_mode mode ATTRIBUTE_UNUSED,
8528 			 rtx operands[3])
8529 {
8530   /* Both source operands cannot be in memory.  */
8531   if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8532     return 0;
8533   /* If the operation is not commutable, source 1 cannot be a constant.  */
8534   if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8535     return 0;
8536   /* If the destination is memory, we must have a matching source operand.  */
8537   if (GET_CODE (operands[0]) == MEM
8538       && ! (rtx_equal_p (operands[0], operands[1])
8539 	    || (GET_RTX_CLASS (code) == 'c'
8540 		&& rtx_equal_p (operands[0], operands[2]))))
8541     return 0;
8542   /* If the operation is not commutable and the source 1 is memory, we must
8543      have a matching destination.  */
8544   if (GET_CODE (operands[1]) == MEM
8545       && GET_RTX_CLASS (code) != 'c'
8546       && ! rtx_equal_p (operands[0], operands[1]))
8547     return 0;
8548   return 1;
8549 }
8550 
8551 /* Attempt to expand a unary operator.  Make the expansion closer to the
8552    actual machine, then just general_operand, which will allow 2 separate
8553    memory references (one output, one input) in a single insn.  */
8554 
8555 void
ix86_expand_unary_operator(enum rtx_code code,enum machine_mode mode,rtx operands[])8556 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8557 			    rtx operands[])
8558 {
8559   int matching_memory;
8560   rtx src, dst, op, clob;
8561 
8562   dst = operands[0];
8563   src = operands[1];
8564 
8565   /* If the destination is memory, and we do not have matching source
8566      operands, do things in registers.  */
8567   matching_memory = 0;
8568   if (GET_CODE (dst) == MEM)
8569     {
8570       if (rtx_equal_p (dst, src))
8571 	matching_memory = 1;
8572       else
8573 	dst = gen_reg_rtx (mode);
8574     }
8575 
8576   /* When source operand is memory, destination must match.  */
8577   if (!matching_memory && GET_CODE (src) == MEM)
8578     src = force_reg (mode, src);
8579 
8580   /* If optimizing, copy to regs to improve CSE */
8581   if (optimize && ! no_new_pseudos)
8582     {
8583       if (GET_CODE (dst) == MEM)
8584 	dst = gen_reg_rtx (mode);
8585       if (GET_CODE (src) == MEM)
8586 	src = force_reg (mode, src);
8587     }
8588 
8589   /* Emit the instruction.  */
8590 
8591   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8592   if (reload_in_progress || code == NOT)
8593     {
8594       /* Reload doesn't know about the flags register, and doesn't know that
8595          it doesn't want to clobber it.  */
8596       if (code != NOT)
8597         abort ();
8598       emit_insn (op);
8599     }
8600   else
8601     {
8602       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8603       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8604     }
8605 
8606   /* Fix up the destination if needed.  */
8607   if (dst != operands[0])
8608     emit_move_insn (operands[0], dst);
8609 }
8610 
8611 /* Return TRUE or FALSE depending on whether the unary operator meets the
8612    appropriate constraints.  */
8613 
8614 int
ix86_unary_operator_ok(enum rtx_code code ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,rtx operands[2]ATTRIBUTE_UNUSED)8615 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8616 			enum machine_mode mode ATTRIBUTE_UNUSED,
8617 			rtx operands[2] ATTRIBUTE_UNUSED)
8618 {
8619   /* If one of operands is memory, source and destination must match.  */
8620   if ((GET_CODE (operands[0]) == MEM
8621        || GET_CODE (operands[1]) == MEM)
8622       && ! rtx_equal_p (operands[0], operands[1]))
8623     return FALSE;
8624   return TRUE;
8625 }
8626 
8627 /* Return TRUE or FALSE depending on whether the first SET in INSN
8628    has source and destination with matching CC modes, and that the
8629    CC mode is at least as constrained as REQ_MODE.  */
8630 
8631 int
ix86_match_ccmode(rtx insn,enum machine_mode req_mode)8632 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8633 {
8634   rtx set;
8635   enum machine_mode set_mode;
8636 
8637   set = PATTERN (insn);
8638   if (GET_CODE (set) == PARALLEL)
8639     set = XVECEXP (set, 0, 0);
8640   if (GET_CODE (set) != SET)
8641     abort ();
8642   if (GET_CODE (SET_SRC (set)) != COMPARE)
8643     abort ();
8644 
8645   set_mode = GET_MODE (SET_DEST (set));
8646   switch (set_mode)
8647     {
8648     case CCNOmode:
8649       if (req_mode != CCNOmode
8650 	  && (req_mode != CCmode
8651 	      || XEXP (SET_SRC (set), 1) != const0_rtx))
8652 	return 0;
8653       break;
8654     case CCmode:
8655       if (req_mode == CCGCmode)
8656 	return 0;
8657       /* FALLTHRU */
8658     case CCGCmode:
8659       if (req_mode == CCGOCmode || req_mode == CCNOmode)
8660 	return 0;
8661       /* FALLTHRU */
8662     case CCGOCmode:
8663       if (req_mode == CCZmode)
8664 	return 0;
8665       /* FALLTHRU */
8666     case CCZmode:
8667       break;
8668 
8669     default:
8670       abort ();
8671     }
8672 
8673   return (GET_MODE (SET_SRC (set)) == set_mode);
8674 }
8675 
8676 /* Generate insn patterns to do an integer compare of OPERANDS.  */
8677 
8678 static rtx
ix86_expand_int_compare(enum rtx_code code,rtx op0,rtx op1)8679 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8680 {
8681   enum machine_mode cmpmode;
8682   rtx tmp, flags;
8683 
8684   cmpmode = SELECT_CC_MODE (code, op0, op1);
8685   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8686 
8687   /* This is very simple, but making the interface the same as in the
8688      FP case makes the rest of the code easier.  */
8689   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8690   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8691 
8692   /* Return the test that should be put into the flags user, i.e.
8693      the bcc, scc, or cmov instruction.  */
8694   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8695 }
8696 
8697 /* Figure out whether to use ordered or unordered fp comparisons.
8698    Return the appropriate mode to use.  */
8699 
8700 enum machine_mode
ix86_fp_compare_mode(enum rtx_code code ATTRIBUTE_UNUSED)8701 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8702 {
8703   /* ??? In order to make all comparisons reversible, we do all comparisons
8704      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
8705      all forms trapping and nontrapping comparisons, we can make inequality
8706      comparisons trapping again, since it results in better code when using
8707      FCOM based compares.  */
8708   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8709 }
8710 
8711 enum machine_mode
ix86_cc_mode(enum rtx_code code,rtx op0,rtx op1)8712 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8713 {
8714   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8715     return ix86_fp_compare_mode (code);
8716   switch (code)
8717     {
8718       /* Only zero flag is needed.  */
8719     case EQ:			/* ZF=0 */
8720     case NE:			/* ZF!=0 */
8721       return CCZmode;
8722       /* Codes needing carry flag.  */
8723     case GEU:			/* CF=0 */
8724     case GTU:			/* CF=0 & ZF=0 */
8725     case LTU:			/* CF=1 */
8726     case LEU:			/* CF=1 | ZF=1 */
8727       return CCmode;
8728       /* Codes possibly doable only with sign flag when
8729          comparing against zero.  */
8730     case GE:			/* SF=OF   or   SF=0 */
8731     case LT:			/* SF<>OF  or   SF=1 */
8732       if (op1 == const0_rtx)
8733 	return CCGOCmode;
8734       else
8735 	/* For other cases Carry flag is not required.  */
8736 	return CCGCmode;
8737       /* Codes doable only with sign flag when comparing
8738          against zero, but we miss jump instruction for it
8739          so we need to use relational tests against overflow
8740          that thus needs to be zero.  */
8741     case GT:			/* ZF=0 & SF=OF */
8742     case LE:			/* ZF=1 | SF<>OF */
8743       if (op1 == const0_rtx)
8744 	return CCNOmode;
8745       else
8746 	return CCGCmode;
8747       /* strcmp pattern do (use flags) and combine may ask us for proper
8748 	 mode.  */
8749     case USE:
8750       return CCmode;
8751     default:
8752       abort ();
8753     }
8754 }
8755 
8756 /* Return the fixed registers used for condition codes.  */
8757 
8758 static bool
ix86_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)8759 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8760 {
8761   *p1 = FLAGS_REG;
8762   *p2 = FPSR_REG;
8763   return true;
8764 }
8765 
8766 /* If two condition code modes are compatible, return a condition code
8767    mode which is compatible with both.  Otherwise, return
8768    VOIDmode.  */
8769 
8770 static enum machine_mode
ix86_cc_modes_compatible(enum machine_mode m1,enum machine_mode m2)8771 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8772 {
8773   if (m1 == m2)
8774     return m1;
8775 
8776   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8777     return VOIDmode;
8778 
8779   if ((m1 == CCGCmode && m2 == CCGOCmode)
8780       || (m1 == CCGOCmode && m2 == CCGCmode))
8781     return CCGCmode;
8782 
8783   switch (m1)
8784     {
8785     default:
8786       abort ();
8787 
8788     case CCmode:
8789     case CCGCmode:
8790     case CCGOCmode:
8791     case CCNOmode:
8792     case CCZmode:
8793       switch (m2)
8794 	{
8795 	default:
8796 	  return VOIDmode;
8797 
8798 	case CCmode:
8799 	case CCGCmode:
8800 	case CCGOCmode:
8801 	case CCNOmode:
8802 	case CCZmode:
8803 	  return CCmode;
8804 	}
8805 
8806     case CCFPmode:
8807     case CCFPUmode:
8808       /* These are only compatible with themselves, which we already
8809 	 checked above.  */
8810       return VOIDmode;
8811     }
8812 }
8813 
8814 /* Return true if we should use an FCOMI instruction for this fp comparison.  */
8815 
8816 int
ix86_use_fcomi_compare(enum rtx_code code ATTRIBUTE_UNUSED)8817 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8818 {
8819   enum rtx_code swapped_code = swap_condition (code);
8820   return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8821 	  || (ix86_fp_comparison_cost (swapped_code)
8822 	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
8823 }
8824 
8825 /* Swap, force into registers, or otherwise massage the two operands
8826    to a fp comparison.  The operands are updated in place; the new
8827    comparison code is returned.  */
8828 
8829 static enum rtx_code
ix86_prepare_fp_compare_args(enum rtx_code code,rtx * pop0,rtx * pop1)8830 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8831 {
8832   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8833   rtx op0 = *pop0, op1 = *pop1;
8834   enum machine_mode op_mode = GET_MODE (op0);
8835   int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8836 
8837   /* All of the unordered compare instructions only work on registers.
8838      The same is true of the XFmode compare instructions.  The same is
8839      true of the fcomi compare instructions.  */
8840 
8841   if (!is_sse
8842       && (fpcmp_mode == CCFPUmode
8843 	  || op_mode == XFmode
8844 	  || ix86_use_fcomi_compare (code)))
8845     {
8846       op0 = force_reg (op_mode, op0);
8847       op1 = force_reg (op_mode, op1);
8848     }
8849   else
8850     {
8851       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
8852 	 things around if they appear profitable, otherwise force op0
8853 	 into a register.  */
8854 
8855       if (standard_80387_constant_p (op0) == 0
8856 	  || (GET_CODE (op0) == MEM
8857 	      && ! (standard_80387_constant_p (op1) == 0
8858 		    || GET_CODE (op1) == MEM)))
8859 	{
8860 	  rtx tmp;
8861 	  tmp = op0, op0 = op1, op1 = tmp;
8862 	  code = swap_condition (code);
8863 	}
8864 
8865       if (GET_CODE (op0) != REG)
8866 	op0 = force_reg (op_mode, op0);
8867 
8868       if (CONSTANT_P (op1))
8869 	{
8870 	  if (standard_80387_constant_p (op1))
8871 	    op1 = force_reg (op_mode, op1);
8872 	  else
8873 	    op1 = validize_mem (force_const_mem (op_mode, op1));
8874 	}
8875     }
8876 
8877   /* Try to rearrange the comparison to make it cheaper.  */
8878   if (ix86_fp_comparison_cost (code)
8879       > ix86_fp_comparison_cost (swap_condition (code))
8880       && (GET_CODE (op1) == REG || !no_new_pseudos))
8881     {
8882       rtx tmp;
8883       tmp = op0, op0 = op1, op1 = tmp;
8884       code = swap_condition (code);
8885       if (GET_CODE (op0) != REG)
8886 	op0 = force_reg (op_mode, op0);
8887     }
8888 
8889   *pop0 = op0;
8890   *pop1 = op1;
8891   return code;
8892 }
8893 
8894 /* Convert comparison codes we use to represent FP comparison to integer
8895    code that will result in proper branch.  Return UNKNOWN if no such code
8896    is available.  */
8897 static enum rtx_code
ix86_fp_compare_code_to_integer(enum rtx_code code)8898 ix86_fp_compare_code_to_integer (enum rtx_code code)
8899 {
8900   switch (code)
8901     {
8902     case GT:
8903       return GTU;
8904     case GE:
8905       return GEU;
8906     case ORDERED:
8907     case UNORDERED:
8908       return code;
8909       break;
8910     case UNEQ:
8911       return EQ;
8912       break;
8913     case UNLT:
8914       return LTU;
8915       break;
8916     case UNLE:
8917       return LEU;
8918       break;
8919     case LTGT:
8920       return NE;
8921       break;
8922     default:
8923       return UNKNOWN;
8924     }
8925 }
8926 
8927 /* Split comparison code CODE into comparisons we can do using branch
8928    instructions.  BYPASS_CODE is comparison code for branch that will
8929    branch around FIRST_CODE and SECOND_CODE.  If some of branches
8930    is not required, set value to NIL.
8931    We never require more than two branches.  */
8932 static void
ix86_fp_comparison_codes(enum rtx_code code,enum rtx_code * bypass_code,enum rtx_code * first_code,enum rtx_code * second_code)8933 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8934 			  enum rtx_code *first_code,
8935 			  enum rtx_code *second_code)
8936 {
8937   *first_code = code;
8938   *bypass_code = NIL;
8939   *second_code = NIL;
8940 
8941   /* The fcomi comparison sets flags as follows:
8942 
8943      cmp    ZF PF CF
8944      >      0  0  0
8945      <      0  0  1
8946      =      1  0  0
8947      un     1  1  1 */
8948 
8949   switch (code)
8950     {
8951     case GT:			/* GTU - CF=0 & ZF=0 */
8952     case GE:			/* GEU - CF=0 */
8953     case ORDERED:		/* PF=0 */
8954     case UNORDERED:		/* PF=1 */
8955     case UNEQ:			/* EQ - ZF=1 */
8956     case UNLT:			/* LTU - CF=1 */
8957     case UNLE:			/* LEU - CF=1 | ZF=1 */
8958     case LTGT:			/* EQ - ZF=0 */
8959       break;
8960     case LT:			/* LTU - CF=1 - fails on unordered */
8961       *first_code = UNLT;
8962       *bypass_code = UNORDERED;
8963       break;
8964     case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
8965       *first_code = UNLE;
8966       *bypass_code = UNORDERED;
8967       break;
8968     case EQ:			/* EQ - ZF=1 - fails on unordered */
8969       *first_code = UNEQ;
8970       *bypass_code = UNORDERED;
8971       break;
8972     case NE:			/* NE - ZF=0 - fails on unordered */
8973       *first_code = LTGT;
8974       *second_code = UNORDERED;
8975       break;
8976     case UNGE:			/* GEU - CF=0 - fails on unordered */
8977       *first_code = GE;
8978       *second_code = UNORDERED;
8979       break;
8980     case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
8981       *first_code = GT;
8982       *second_code = UNORDERED;
8983       break;
8984     default:
8985       abort ();
8986     }
8987   if (!TARGET_IEEE_FP)
8988     {
8989       *second_code = NIL;
8990       *bypass_code = NIL;
8991     }
8992 }
8993 
8994 /* Return cost of comparison done fcom + arithmetics operations on AX.
8995    All following functions do use number of instructions as a cost metrics.
8996    In future this should be tweaked to compute bytes for optimize_size and
8997    take into account performance of various instructions on various CPUs.  */
8998 static int
ix86_fp_comparison_arithmetics_cost(enum rtx_code code)8999 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9000 {
9001   if (!TARGET_IEEE_FP)
9002     return 4;
9003   /* The cost of code output by ix86_expand_fp_compare.  */
9004   switch (code)
9005     {
9006     case UNLE:
9007     case UNLT:
9008     case LTGT:
9009     case GT:
9010     case GE:
9011     case UNORDERED:
9012     case ORDERED:
9013     case UNEQ:
9014       return 4;
9015       break;
9016     case LT:
9017     case NE:
9018     case EQ:
9019     case UNGE:
9020       return 5;
9021       break;
9022     case LE:
9023     case UNGT:
9024       return 6;
9025       break;
9026     default:
9027       abort ();
9028     }
9029 }
9030 
9031 /* Return cost of comparison done using fcomi operation.
9032    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9033 static int
ix86_fp_comparison_fcomi_cost(enum rtx_code code)9034 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9035 {
9036   enum rtx_code bypass_code, first_code, second_code;
9037   /* Return arbitrarily high cost when instruction is not supported - this
9038      prevents gcc from using it.  */
9039   if (!TARGET_CMOVE)
9040     return 1024;
9041   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9042   return (bypass_code != NIL || second_code != NIL) + 2;
9043 }
9044 
9045 /* Return cost of comparison done using sahf operation.
9046    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9047 static int
ix86_fp_comparison_sahf_cost(enum rtx_code code)9048 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9049 {
9050   enum rtx_code bypass_code, first_code, second_code;
9051   /* Return arbitrarily high cost when instruction is not preferred - this
9052      avoids gcc from using it.  */
9053   if (!TARGET_USE_SAHF && !optimize_size)
9054     return 1024;
9055   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9056   return (bypass_code != NIL || second_code != NIL) + 3;
9057 }
9058 
9059 /* Compute cost of the comparison done using any method.
9060    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9061 static int
ix86_fp_comparison_cost(enum rtx_code code)9062 ix86_fp_comparison_cost (enum rtx_code code)
9063 {
9064   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9065   int min;
9066 
9067   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9068   sahf_cost = ix86_fp_comparison_sahf_cost (code);
9069 
9070   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9071   if (min > sahf_cost)
9072     min = sahf_cost;
9073   if (min > fcomi_cost)
9074     min = fcomi_cost;
9075   return min;
9076 }
9077 
9078 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
9079 
9080 static rtx
ix86_expand_fp_compare(enum rtx_code code,rtx op0,rtx op1,rtx scratch,rtx * second_test,rtx * bypass_test)9081 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9082 			rtx *second_test, rtx *bypass_test)
9083 {
9084   enum machine_mode fpcmp_mode, intcmp_mode;
9085   rtx tmp, tmp2;
9086   int cost = ix86_fp_comparison_cost (code);
9087   enum rtx_code bypass_code, first_code, second_code;
9088 
9089   fpcmp_mode = ix86_fp_compare_mode (code);
9090   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9091 
9092   if (second_test)
9093     *second_test = NULL_RTX;
9094   if (bypass_test)
9095     *bypass_test = NULL_RTX;
9096 
9097   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9098 
9099   /* Do fcomi/sahf based test when profitable.  */
9100   if ((bypass_code == NIL || bypass_test)
9101       && (second_code == NIL || second_test)
9102       && ix86_fp_comparison_arithmetics_cost (code) > cost)
9103     {
9104       if (TARGET_CMOVE)
9105 	{
9106 	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9107 	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9108 			     tmp);
9109 	  emit_insn (tmp);
9110 	}
9111       else
9112 	{
9113 	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9114 	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9115 	  if (!scratch)
9116 	    scratch = gen_reg_rtx (HImode);
9117 	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9118 	  emit_insn (gen_x86_sahf_1 (scratch));
9119 	}
9120 
9121       /* The FP codes work out to act like unsigned.  */
9122       intcmp_mode = fpcmp_mode;
9123       code = first_code;
9124       if (bypass_code != NIL)
9125 	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9126 				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9127 				       const0_rtx);
9128       if (second_code != NIL)
9129 	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9130 				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9131 				       const0_rtx);
9132     }
9133   else
9134     {
9135       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
9136       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9137       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9138       if (!scratch)
9139 	scratch = gen_reg_rtx (HImode);
9140       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9141 
9142       /* In the unordered case, we have to check C2 for NaN's, which
9143 	 doesn't happen to work out to anything nice combination-wise.
9144 	 So do some bit twiddling on the value we've got in AH to come
9145 	 up with an appropriate set of condition codes.  */
9146 
9147       intcmp_mode = CCNOmode;
9148       switch (code)
9149 	{
9150 	case GT:
9151 	case UNGT:
9152 	  if (code == GT || !TARGET_IEEE_FP)
9153 	    {
9154 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9155 	      code = EQ;
9156 	    }
9157 	  else
9158 	    {
9159 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9160 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9161 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9162 	      intcmp_mode = CCmode;
9163 	      code = GEU;
9164 	    }
9165 	  break;
9166 	case LT:
9167 	case UNLT:
9168 	  if (code == LT && TARGET_IEEE_FP)
9169 	    {
9170 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9171 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9172 	      intcmp_mode = CCmode;
9173 	      code = EQ;
9174 	    }
9175 	  else
9176 	    {
9177 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9178 	      code = NE;
9179 	    }
9180 	  break;
9181 	case GE:
9182 	case UNGE:
9183 	  if (code == GE || !TARGET_IEEE_FP)
9184 	    {
9185 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9186 	      code = EQ;
9187 	    }
9188 	  else
9189 	    {
9190 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9191 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9192 					     GEN_INT (0x01)));
9193 	      code = NE;
9194 	    }
9195 	  break;
9196 	case LE:
9197 	case UNLE:
9198 	  if (code == LE && TARGET_IEEE_FP)
9199 	    {
9200 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9201 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9202 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9203 	      intcmp_mode = CCmode;
9204 	      code = LTU;
9205 	    }
9206 	  else
9207 	    {
9208 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9209 	      code = NE;
9210 	    }
9211 	  break;
9212 	case EQ:
9213 	case UNEQ:
9214 	  if (code == EQ && TARGET_IEEE_FP)
9215 	    {
9216 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9218 	      intcmp_mode = CCmode;
9219 	      code = EQ;
9220 	    }
9221 	  else
9222 	    {
9223 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9224 	      code = NE;
9225 	      break;
9226 	    }
9227 	  break;
9228 	case NE:
9229 	case LTGT:
9230 	  if (code == NE && TARGET_IEEE_FP)
9231 	    {
9232 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9233 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9234 					     GEN_INT (0x40)));
9235 	      code = NE;
9236 	    }
9237 	  else
9238 	    {
9239 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9240 	      code = EQ;
9241 	    }
9242 	  break;
9243 
9244 	case UNORDERED:
9245 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9246 	  code = NE;
9247 	  break;
9248 	case ORDERED:
9249 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9250 	  code = EQ;
9251 	  break;
9252 
9253 	default:
9254 	  abort ();
9255 	}
9256     }
9257 
9258   /* Return the test that should be put into the flags user, i.e.
9259      the bcc, scc, or cmov instruction.  */
9260   return gen_rtx_fmt_ee (code, VOIDmode,
9261 			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9262 			 const0_rtx);
9263 }
9264 
9265 rtx
ix86_expand_compare(enum rtx_code code,rtx * second_test,rtx * bypass_test)9266 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9267 {
9268   rtx op0, op1, ret;
9269   op0 = ix86_compare_op0;
9270   op1 = ix86_compare_op1;
9271 
9272   if (second_test)
9273     *second_test = NULL_RTX;
9274   if (bypass_test)
9275     *bypass_test = NULL_RTX;
9276 
9277   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9278     ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9279 				  second_test, bypass_test);
9280   else
9281     ret = ix86_expand_int_compare (code, op0, op1);
9282 
9283   return ret;
9284 }
9285 
9286 /* Return true if the CODE will result in nontrivial jump sequence.  */
9287 bool
ix86_fp_jump_nontrivial_p(enum rtx_code code)9288 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9289 {
9290   enum rtx_code bypass_code, first_code, second_code;
9291   if (!TARGET_CMOVE)
9292     return true;
9293   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9294   return bypass_code != NIL || second_code != NIL;
9295 }
9296 
9297 void
ix86_expand_branch(enum rtx_code code,rtx label)9298 ix86_expand_branch (enum rtx_code code, rtx label)
9299 {
9300   rtx tmp;
9301 
9302   switch (GET_MODE (ix86_compare_op0))
9303     {
9304     case QImode:
9305     case HImode:
9306     case SImode:
9307       simple:
9308       tmp = ix86_expand_compare (code, NULL, NULL);
9309       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9310 				  gen_rtx_LABEL_REF (VOIDmode, label),
9311 				  pc_rtx);
9312       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9313       return;
9314 
9315     case SFmode:
9316     case DFmode:
9317     case XFmode:
9318       {
9319 	rtvec vec;
9320 	int use_fcomi;
9321 	enum rtx_code bypass_code, first_code, second_code;
9322 
9323 	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9324 					     &ix86_compare_op1);
9325 
9326 	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9327 
9328 	/* Check whether we will use the natural sequence with one jump.  If
9329 	   so, we can expand jump early.  Otherwise delay expansion by
9330 	   creating compound insn to not confuse optimizers.  */
9331 	if (bypass_code == NIL && second_code == NIL
9332 	    && TARGET_CMOVE)
9333 	  {
9334 	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9335 				  gen_rtx_LABEL_REF (VOIDmode, label),
9336 				  pc_rtx, NULL_RTX);
9337 	  }
9338 	else
9339 	  {
9340 	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
9341 				  ix86_compare_op0, ix86_compare_op1);
9342 	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9343 					gen_rtx_LABEL_REF (VOIDmode, label),
9344 					pc_rtx);
9345 	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9346 
9347 	    use_fcomi = ix86_use_fcomi_compare (code);
9348 	    vec = rtvec_alloc (3 + !use_fcomi);
9349 	    RTVEC_ELT (vec, 0) = tmp;
9350 	    RTVEC_ELT (vec, 1)
9351 	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9352 	    RTVEC_ELT (vec, 2)
9353 	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9354 	    if (! use_fcomi)
9355 	      RTVEC_ELT (vec, 3)
9356 		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9357 
9358 	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9359 	  }
9360 	return;
9361       }
9362 
9363     case DImode:
9364       if (TARGET_64BIT)
9365 	goto simple;
9366       /* Expand DImode branch into multiple compare+branch.  */
9367       {
9368 	rtx lo[2], hi[2], label2;
9369 	enum rtx_code code1, code2, code3;
9370 
9371 	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9372 	  {
9373 	    tmp = ix86_compare_op0;
9374 	    ix86_compare_op0 = ix86_compare_op1;
9375 	    ix86_compare_op1 = tmp;
9376 	    code = swap_condition (code);
9377 	  }
9378 	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9379 	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9380 
9381 	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9382 	   avoid two branches.  This costs one extra insn, so disable when
9383 	   optimizing for size.  */
9384 
9385 	if ((code == EQ || code == NE)
9386 	    && (!optimize_size
9387 	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
9388 	  {
9389 	    rtx xor0, xor1;
9390 
9391 	    xor1 = hi[0];
9392 	    if (hi[1] != const0_rtx)
9393 	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9394 				   NULL_RTX, 0, OPTAB_WIDEN);
9395 
9396 	    xor0 = lo[0];
9397 	    if (lo[1] != const0_rtx)
9398 	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9399 				   NULL_RTX, 0, OPTAB_WIDEN);
9400 
9401 	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9402 				NULL_RTX, 0, OPTAB_WIDEN);
9403 
9404 	    ix86_compare_op0 = tmp;
9405 	    ix86_compare_op1 = const0_rtx;
9406 	    ix86_expand_branch (code, label);
9407 	    return;
9408 	  }
9409 
9410 	/* Otherwise, if we are doing less-than or greater-or-equal-than,
9411 	   op1 is a constant and the low word is zero, then we can just
9412 	   examine the high word.  */
9413 
9414 	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9415 	  switch (code)
9416 	    {
9417 	    case LT: case LTU: case GE: case GEU:
9418 	      ix86_compare_op0 = hi[0];
9419 	      ix86_compare_op1 = hi[1];
9420 	      ix86_expand_branch (code, label);
9421 	      return;
9422 	    default:
9423 	      break;
9424 	    }
9425 
9426 	/* Otherwise, we need two or three jumps.  */
9427 
9428 	label2 = gen_label_rtx ();
9429 
9430 	code1 = code;
9431 	code2 = swap_condition (code);
9432 	code3 = unsigned_condition (code);
9433 
9434 	switch (code)
9435 	  {
9436 	  case LT: case GT: case LTU: case GTU:
9437 	    break;
9438 
9439 	  case LE:   code1 = LT;  code2 = GT;  break;
9440 	  case GE:   code1 = GT;  code2 = LT;  break;
9441 	  case LEU:  code1 = LTU; code2 = GTU; break;
9442 	  case GEU:  code1 = GTU; code2 = LTU; break;
9443 
9444 	  case EQ:   code1 = NIL; code2 = NE;  break;
9445 	  case NE:   code2 = NIL; break;
9446 
9447 	  default:
9448 	    abort ();
9449 	  }
9450 
9451 	/*
9452 	 * a < b =>
9453 	 *    if (hi(a) < hi(b)) goto true;
9454 	 *    if (hi(a) > hi(b)) goto false;
9455 	 *    if (lo(a) < lo(b)) goto true;
9456 	 *  false:
9457 	 */
9458 
9459 	ix86_compare_op0 = hi[0];
9460 	ix86_compare_op1 = hi[1];
9461 
9462 	if (code1 != NIL)
9463 	  ix86_expand_branch (code1, label);
9464 	if (code2 != NIL)
9465 	  ix86_expand_branch (code2, label2);
9466 
9467 	ix86_compare_op0 = lo[0];
9468 	ix86_compare_op1 = lo[1];
9469 	ix86_expand_branch (code3, label);
9470 
9471 	if (code2 != NIL)
9472 	  emit_label (label2);
9473 	return;
9474       }
9475 
9476     default:
9477       abort ();
9478     }
9479 }
9480 
9481 /* Split branch based on floating point condition.  */
9482 void
ix86_split_fp_branch(enum rtx_code code,rtx op1,rtx op2,rtx target1,rtx target2,rtx tmp)9483 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9484 		      rtx target1, rtx target2, rtx tmp)
9485 {
9486   rtx second, bypass;
9487   rtx label = NULL_RTX;
9488   rtx condition;
9489   int bypass_probability = -1, second_probability = -1, probability = -1;
9490   rtx i;
9491 
9492   if (target2 != pc_rtx)
9493     {
9494       rtx tmp = target2;
9495       code = reverse_condition_maybe_unordered (code);
9496       target2 = target1;
9497       target1 = tmp;
9498     }
9499 
9500   condition = ix86_expand_fp_compare (code, op1, op2,
9501 				      tmp, &second, &bypass);
9502 
9503   if (split_branch_probability >= 0)
9504     {
9505       /* Distribute the probabilities across the jumps.
9506 	 Assume the BYPASS and SECOND to be always test
9507 	 for UNORDERED.  */
9508       probability = split_branch_probability;
9509 
9510       /* Value of 1 is low enough to make no need for probability
9511 	 to be updated.  Later we may run some experiments and see
9512 	 if unordered values are more frequent in practice.  */
9513       if (bypass)
9514 	bypass_probability = 1;
9515       if (second)
9516 	second_probability = 1;
9517     }
9518   if (bypass != NULL_RTX)
9519     {
9520       label = gen_label_rtx ();
9521       i = emit_jump_insn (gen_rtx_SET
9522 			  (VOIDmode, pc_rtx,
9523 			   gen_rtx_IF_THEN_ELSE (VOIDmode,
9524 						 bypass,
9525 						 gen_rtx_LABEL_REF (VOIDmode,
9526 								    label),
9527 						 pc_rtx)));
9528       if (bypass_probability >= 0)
9529 	REG_NOTES (i)
9530 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9531 			       GEN_INT (bypass_probability),
9532 			       REG_NOTES (i));
9533     }
9534   i = emit_jump_insn (gen_rtx_SET
9535 		      (VOIDmode, pc_rtx,
9536 		       gen_rtx_IF_THEN_ELSE (VOIDmode,
9537 					     condition, target1, target2)));
9538   if (probability >= 0)
9539     REG_NOTES (i)
9540       = gen_rtx_EXPR_LIST (REG_BR_PROB,
9541 			   GEN_INT (probability),
9542 			   REG_NOTES (i));
9543   if (second != NULL_RTX)
9544     {
9545       i = emit_jump_insn (gen_rtx_SET
9546 			  (VOIDmode, pc_rtx,
9547 			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9548 						 target2)));
9549       if (second_probability >= 0)
9550 	REG_NOTES (i)
9551 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9552 			       GEN_INT (second_probability),
9553 			       REG_NOTES (i));
9554     }
9555   if (label != NULL_RTX)
9556     emit_label (label);
9557 }
9558 
9559 int
ix86_expand_setcc(enum rtx_code code,rtx dest)9560 ix86_expand_setcc (enum rtx_code code, rtx dest)
9561 {
9562   rtx ret, tmp, tmpreg, equiv;
9563   rtx second_test, bypass_test;
9564 
9565   if (GET_MODE (ix86_compare_op0) == DImode
9566       && !TARGET_64BIT)
9567     return 0; /* FAIL */
9568 
9569   if (GET_MODE (dest) != QImode)
9570     abort ();
9571 
9572   ret = ix86_expand_compare (code, &second_test, &bypass_test);
9573   PUT_MODE (ret, QImode);
9574 
9575   tmp = dest;
9576   tmpreg = dest;
9577 
9578   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9579   if (bypass_test || second_test)
9580     {
9581       rtx test = second_test;
9582       int bypass = 0;
9583       rtx tmp2 = gen_reg_rtx (QImode);
9584       if (bypass_test)
9585 	{
9586 	  if (second_test)
9587 	    abort ();
9588 	  test = bypass_test;
9589 	  bypass = 1;
9590 	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9591 	}
9592       PUT_MODE (test, QImode);
9593       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9594 
9595       if (bypass)
9596 	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9597       else
9598 	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9599     }
9600 
9601   /* Attach a REG_EQUAL note describing the comparison result.  */
9602   equiv = simplify_gen_relational (code, QImode,
9603 				   GET_MODE (ix86_compare_op0),
9604 				   ix86_compare_op0, ix86_compare_op1);
9605   set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9606 
9607   return 1; /* DONE */
9608 }
9609 
9610 /* Expand comparison setting or clearing carry flag.  Return true when
9611    successful and set pop for the operation.  */
9612 static bool
ix86_expand_carry_flag_compare(enum rtx_code code,rtx op0,rtx op1,rtx * pop)9613 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9614 {
9615   enum machine_mode mode =
9616     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9617 
9618   /* Do not handle DImode compares that go trought special path.  Also we can't
9619      deal with FP compares yet.  This is possible to add.  */
9620   if ((mode == DImode && !TARGET_64BIT))
9621     return false;
9622   if (FLOAT_MODE_P (mode))
9623     {
9624       rtx second_test = NULL, bypass_test = NULL;
9625       rtx compare_op, compare_seq;
9626 
9627       /* Shortcut:  following common codes never translate into carry flag compares.  */
9628       if (code == EQ || code == NE || code == UNEQ || code == LTGT
9629 	  || code == ORDERED || code == UNORDERED)
9630 	return false;
9631 
9632       /* These comparisons require zero flag; swap operands so they won't.  */
9633       if ((code == GT || code == UNLE || code == LE || code == UNGT)
9634 	  && !TARGET_IEEE_FP)
9635 	{
9636 	  rtx tmp = op0;
9637 	  op0 = op1;
9638 	  op1 = tmp;
9639 	  code = swap_condition (code);
9640 	}
9641 
9642       /* Try to expand the comparison and verify that we end up with carry flag
9643 	 based comparison.  This is fails to be true only when we decide to expand
9644 	 comparison using arithmetic that is not too common scenario.  */
9645       start_sequence ();
9646       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9647 					   &second_test, &bypass_test);
9648       compare_seq = get_insns ();
9649       end_sequence ();
9650 
9651       if (second_test || bypass_test)
9652 	return false;
9653       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9654 	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9655         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9656       else
9657 	code = GET_CODE (compare_op);
9658       if (code != LTU && code != GEU)
9659 	return false;
9660       emit_insn (compare_seq);
9661       *pop = compare_op;
9662       return true;
9663     }
9664   if (!INTEGRAL_MODE_P (mode))
9665     return false;
9666   switch (code)
9667     {
9668     case LTU:
9669     case GEU:
9670       break;
9671 
9672     /* Convert a==0 into (unsigned)a<1.  */
9673     case EQ:
9674     case NE:
9675       if (op1 != const0_rtx)
9676 	return false;
9677       op1 = const1_rtx;
9678       code = (code == EQ ? LTU : GEU);
9679       break;
9680 
9681     /* Convert a>b into b<a or a>=b-1.  */
9682     case GTU:
9683     case LEU:
9684       if (GET_CODE (op1) == CONST_INT)
9685 	{
9686 	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9687 	  /* Bail out on overflow.  We still can swap operands but that
9688 	     would force loading of the constant into register.  */
9689 	  if (op1 == const0_rtx
9690 	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9691 	    return false;
9692 	  code = (code == GTU ? GEU : LTU);
9693 	}
9694       else
9695 	{
9696 	  rtx tmp = op1;
9697 	  op1 = op0;
9698 	  op0 = tmp;
9699 	  code = (code == GTU ? LTU : GEU);
9700 	}
9701       break;
9702 
9703     /* Convert a>=0 into (unsigned)a<0x80000000.  */
9704     case LT:
9705     case GE:
9706       if (mode == DImode || op1 != const0_rtx)
9707 	return false;
9708       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9709       code = (code == LT ? GEU : LTU);
9710       break;
9711     case LE:
9712     case GT:
9713       if (mode == DImode || op1 != constm1_rtx)
9714 	return false;
9715       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9716       code = (code == LE ? GEU : LTU);
9717       break;
9718 
9719     default:
9720       return false;
9721     }
9722   /* Swapping operands may cause constant to appear as first operand.  */
9723   if (!nonimmediate_operand (op0, VOIDmode))
9724     {
9725       if (no_new_pseudos)
9726 	return false;
9727       op0 = force_reg (mode, op0);
9728     }
9729   ix86_compare_op0 = op0;
9730   ix86_compare_op1 = op1;
9731   *pop = ix86_expand_compare (code, NULL, NULL);
9732   if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9733     abort ();
9734   return true;
9735 }
9736 
9737 int
ix86_expand_int_movcc(rtx operands[])9738 ix86_expand_int_movcc (rtx operands[])
9739 {
9740   enum rtx_code code = GET_CODE (operands[1]), compare_code;
9741   rtx compare_seq, compare_op;
9742   rtx second_test, bypass_test;
9743   enum machine_mode mode = GET_MODE (operands[0]);
9744   bool sign_bit_compare_p = false;;
9745 
9746   start_sequence ();
9747   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9748   compare_seq = get_insns ();
9749   end_sequence ();
9750 
9751   compare_code = GET_CODE (compare_op);
9752 
9753   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9754       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9755     sign_bit_compare_p = true;
9756 
9757   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9758      HImode insns, we'd be swallowed in word prefix ops.  */
9759 
9760   if ((mode != HImode || TARGET_FAST_PREFIX)
9761       && (mode != DImode || TARGET_64BIT)
9762       && GET_CODE (operands[2]) == CONST_INT
9763       && GET_CODE (operands[3]) == CONST_INT)
9764     {
9765       rtx out = operands[0];
9766       HOST_WIDE_INT ct = INTVAL (operands[2]);
9767       HOST_WIDE_INT cf = INTVAL (operands[3]);
9768       HOST_WIDE_INT diff;
9769 
9770       diff = ct - cf;
9771       /*  Sign bit compares are better done using shifts than we do by using
9772 	  sbb.  */
9773       if (sign_bit_compare_p
9774 	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9775 					     ix86_compare_op1, &compare_op))
9776 	{
9777 	  /* Detect overlap between destination and compare sources.  */
9778 	  rtx tmp = out;
9779 
9780           if (!sign_bit_compare_p)
9781 	    {
9782 	      bool fpcmp = false;
9783 
9784 	      compare_code = GET_CODE (compare_op);
9785 
9786 	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9787 		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9788 		{
9789 		  fpcmp = true;
9790 		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
9791 		}
9792 
9793 	      /* To simplify rest of code, restrict to the GEU case.  */
9794 	      if (compare_code == LTU)
9795 		{
9796 		  HOST_WIDE_INT tmp = ct;
9797 		  ct = cf;
9798 		  cf = tmp;
9799 		  compare_code = reverse_condition (compare_code);
9800 		  code = reverse_condition (code);
9801 		}
9802 	      else
9803 		{
9804 		  if (fpcmp)
9805 		    PUT_CODE (compare_op,
9806 			      reverse_condition_maybe_unordered
9807 			        (GET_CODE (compare_op)));
9808 		  else
9809 		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9810 		}
9811 	      diff = ct - cf;
9812 
9813 	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9814 		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
9815 		tmp = gen_reg_rtx (mode);
9816 
9817 	      if (mode == DImode)
9818 		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9819 	      else
9820 		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9821 	    }
9822 	  else
9823 	    {
9824 	      if (code == GT || code == GE)
9825 		code = reverse_condition (code);
9826 	      else
9827 		{
9828 		  HOST_WIDE_INT tmp = ct;
9829 		  ct = cf;
9830 		  cf = tmp;
9831 		  diff = ct - cf;
9832 		}
9833 	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9834 				     ix86_compare_op1, VOIDmode, 0, -1);
9835 	    }
9836 
9837 	  if (diff == 1)
9838 	    {
9839 	      /*
9840 	       * cmpl op0,op1
9841 	       * sbbl dest,dest
9842 	       * [addl dest, ct]
9843 	       *
9844 	       * Size 5 - 8.
9845 	       */
9846 	      if (ct)
9847 		tmp = expand_simple_binop (mode, PLUS,
9848 					   tmp, GEN_INT (ct),
9849 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9850 	    }
9851 	  else if (cf == -1)
9852 	    {
9853 	      /*
9854 	       * cmpl op0,op1
9855 	       * sbbl dest,dest
9856 	       * orl $ct, dest
9857 	       *
9858 	       * Size 8.
9859 	       */
9860 	      tmp = expand_simple_binop (mode, IOR,
9861 					 tmp, GEN_INT (ct),
9862 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
9863 	    }
9864 	  else if (diff == -1 && ct)
9865 	    {
9866 	      /*
9867 	       * cmpl op0,op1
9868 	       * sbbl dest,dest
9869 	       * notl dest
9870 	       * [addl dest, cf]
9871 	       *
9872 	       * Size 8 - 11.
9873 	       */
9874 	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9875 	      if (cf)
9876 		tmp = expand_simple_binop (mode, PLUS,
9877 					   copy_rtx (tmp), GEN_INT (cf),
9878 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9879 	    }
9880 	  else
9881 	    {
9882 	      /*
9883 	       * cmpl op0,op1
9884 	       * sbbl dest,dest
9885 	       * [notl dest]
9886 	       * andl cf - ct, dest
9887 	       * [addl dest, ct]
9888 	       *
9889 	       * Size 8 - 11.
9890 	       */
9891 
9892 	      if (cf == 0)
9893 		{
9894 		  cf = ct;
9895 		  ct = 0;
9896 		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9897 		}
9898 
9899 	      tmp = expand_simple_binop (mode, AND,
9900 					 copy_rtx (tmp),
9901 					 gen_int_mode (cf - ct, mode),
9902 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
9903 	      if (ct)
9904 		tmp = expand_simple_binop (mode, PLUS,
9905 					   copy_rtx (tmp), GEN_INT (ct),
9906 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9907 	    }
9908 
9909 	  if (!rtx_equal_p (tmp, out))
9910 	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9911 
9912 	  return 1; /* DONE */
9913 	}
9914 
9915       if (diff < 0)
9916 	{
9917 	  HOST_WIDE_INT tmp;
9918 	  tmp = ct, ct = cf, cf = tmp;
9919 	  diff = -diff;
9920 	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9921 	    {
9922 	      /* We may be reversing unordered compare to normal compare, that
9923 		 is not valid in general (we may convert non-trapping condition
9924 		 to trapping one), however on i386 we currently emit all
9925 		 comparisons unordered.  */
9926 	      compare_code = reverse_condition_maybe_unordered (compare_code);
9927 	      code = reverse_condition_maybe_unordered (code);
9928 	    }
9929 	  else
9930 	    {
9931 	      compare_code = reverse_condition (compare_code);
9932 	      code = reverse_condition (code);
9933 	    }
9934 	}
9935 
9936       compare_code = NIL;
9937       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9938 	  && GET_CODE (ix86_compare_op1) == CONST_INT)
9939 	{
9940 	  if (ix86_compare_op1 == const0_rtx
9941 	      && (code == LT || code == GE))
9942 	    compare_code = code;
9943 	  else if (ix86_compare_op1 == constm1_rtx)
9944 	    {
9945 	      if (code == LE)
9946 		compare_code = LT;
9947 	      else if (code == GT)
9948 		compare_code = GE;
9949 	    }
9950 	}
9951 
9952       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
9953       if (compare_code != NIL
9954 	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9955 	  && (cf == -1 || ct == -1))
9956 	{
9957 	  /* If lea code below could be used, only optimize
9958 	     if it results in a 2 insn sequence.  */
9959 
9960 	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9961 		 || diff == 3 || diff == 5 || diff == 9)
9962 	      || (compare_code == LT && ct == -1)
9963 	      || (compare_code == GE && cf == -1))
9964 	    {
9965 	      /*
9966 	       * notl op1	(if necessary)
9967 	       * sarl $31, op1
9968 	       * orl cf, op1
9969 	       */
9970 	      if (ct != -1)
9971 		{
9972 		  cf = ct;
9973 		  ct = -1;
9974 		  code = reverse_condition (code);
9975 		}
9976 
9977 	      out = emit_store_flag (out, code, ix86_compare_op0,
9978 				     ix86_compare_op1, VOIDmode, 0, -1);
9979 
9980 	      out = expand_simple_binop (mode, IOR,
9981 					 out, GEN_INT (cf),
9982 					 out, 1, OPTAB_DIRECT);
9983 	      if (out != operands[0])
9984 		emit_move_insn (operands[0], out);
9985 
9986 	      return 1; /* DONE */
9987 	    }
9988 	}
9989 
9990 
9991       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9992 	   || diff == 3 || diff == 5 || diff == 9)
9993 	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9994 	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9995 	{
9996 	  /*
9997 	   * xorl dest,dest
9998 	   * cmpl op1,op2
9999 	   * setcc dest
10000 	   * lea cf(dest*(ct-cf)),dest
10001 	   *
10002 	   * Size 14.
10003 	   *
10004 	   * This also catches the degenerate setcc-only case.
10005 	   */
10006 
10007 	  rtx tmp;
10008 	  int nops;
10009 
10010 	  out = emit_store_flag (out, code, ix86_compare_op0,
10011 				 ix86_compare_op1, VOIDmode, 0, 1);
10012 
10013 	  nops = 0;
10014 	  /* On x86_64 the lea instruction operates on Pmode, so we need
10015 	     to get arithmetics done in proper mode to match.  */
10016 	  if (diff == 1)
10017 	    tmp = copy_rtx (out);
10018 	  else
10019 	    {
10020 	      rtx out1;
10021 	      out1 = copy_rtx (out);
10022 	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10023 	      nops++;
10024 	      if (diff & 1)
10025 		{
10026 		  tmp = gen_rtx_PLUS (mode, tmp, out1);
10027 		  nops++;
10028 		}
10029 	    }
10030 	  if (cf != 0)
10031 	    {
10032 	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10033 	      nops++;
10034 	    }
10035 	  if (!rtx_equal_p (tmp, out))
10036 	    {
10037 	      if (nops == 1)
10038 		out = force_operand (tmp, copy_rtx (out));
10039 	      else
10040 		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10041 	    }
10042 	  if (!rtx_equal_p (out, operands[0]))
10043 	    emit_move_insn (operands[0], copy_rtx (out));
10044 
10045 	  return 1; /* DONE */
10046 	}
10047 
10048       /*
10049        * General case:			Jumpful:
10050        *   xorl dest,dest		cmpl op1, op2
10051        *   cmpl op1, op2		movl ct, dest
10052        *   setcc dest			jcc 1f
10053        *   decl dest			movl cf, dest
10054        *   andl (cf-ct),dest		1:
10055        *   addl ct,dest
10056        *
10057        * Size 20.			Size 14.
10058        *
10059        * This is reasonably steep, but branch mispredict costs are
10060        * high on modern cpus, so consider failing only if optimizing
10061        * for space.
10062        */
10063 
10064       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10065 	  && BRANCH_COST >= 2)
10066 	{
10067 	  if (cf == 0)
10068 	    {
10069 	      cf = ct;
10070 	      ct = 0;
10071 	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10072 		/* We may be reversing unordered compare to normal compare,
10073 		   that is not valid in general (we may convert non-trapping
10074 		   condition to trapping one), however on i386 we currently
10075 		   emit all comparisons unordered.  */
10076 		code = reverse_condition_maybe_unordered (code);
10077 	      else
10078 		{
10079 		  code = reverse_condition (code);
10080 		  if (compare_code != NIL)
10081 		    compare_code = reverse_condition (compare_code);
10082 		}
10083 	    }
10084 
10085 	  if (compare_code != NIL)
10086 	    {
10087 	      /* notl op1	(if needed)
10088 		 sarl $31, op1
10089 		 andl (cf-ct), op1
10090 		 addl ct, op1
10091 
10092 		 For x < 0 (resp. x <= -1) there will be no notl,
10093 		 so if possible swap the constants to get rid of the
10094 		 complement.
10095 		 True/false will be -1/0 while code below (store flag
10096 		 followed by decrement) is 0/-1, so the constants need
10097 		 to be exchanged once more.  */
10098 
10099 	      if (compare_code == GE || !cf)
10100 		{
10101 		  code = reverse_condition (code);
10102 		  compare_code = LT;
10103 		}
10104 	      else
10105 		{
10106 		  HOST_WIDE_INT tmp = cf;
10107 		  cf = ct;
10108 		  ct = tmp;
10109 		}
10110 
10111 	      out = emit_store_flag (out, code, ix86_compare_op0,
10112 				     ix86_compare_op1, VOIDmode, 0, -1);
10113 	    }
10114 	  else
10115 	    {
10116 	      out = emit_store_flag (out, code, ix86_compare_op0,
10117 				     ix86_compare_op1, VOIDmode, 0, 1);
10118 
10119 	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10120 					 copy_rtx (out), 1, OPTAB_DIRECT);
10121 	    }
10122 
10123 	  out = expand_simple_binop (mode, AND, copy_rtx (out),
10124 				     gen_int_mode (cf - ct, mode),
10125 				     copy_rtx (out), 1, OPTAB_DIRECT);
10126 	  if (ct)
10127 	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10128 				       copy_rtx (out), 1, OPTAB_DIRECT);
10129 	  if (!rtx_equal_p (out, operands[0]))
10130 	    emit_move_insn (operands[0], copy_rtx (out));
10131 
10132 	  return 1; /* DONE */
10133 	}
10134     }
10135 
10136   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10137     {
10138       /* Try a few things more with specific constants and a variable.  */
10139 
10140       optab op;
10141       rtx var, orig_out, out, tmp;
10142 
10143       if (BRANCH_COST <= 2)
10144 	return 0; /* FAIL */
10145 
10146       /* If one of the two operands is an interesting constant, load a
10147 	 constant with the above and mask it in with a logical operation.  */
10148 
10149       if (GET_CODE (operands[2]) == CONST_INT)
10150 	{
10151 	  var = operands[3];
10152 	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10153 	    operands[3] = constm1_rtx, op = and_optab;
10154 	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10155 	    operands[3] = const0_rtx, op = ior_optab;
10156 	  else
10157 	    return 0; /* FAIL */
10158 	}
10159       else if (GET_CODE (operands[3]) == CONST_INT)
10160 	{
10161 	  var = operands[2];
10162 	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10163 	    operands[2] = constm1_rtx, op = and_optab;
10164 	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10165 	    operands[2] = const0_rtx, op = ior_optab;
10166 	  else
10167 	    return 0; /* FAIL */
10168 	}
10169       else
10170         return 0; /* FAIL */
10171 
10172       orig_out = operands[0];
10173       tmp = gen_reg_rtx (mode);
10174       operands[0] = tmp;
10175 
10176       /* Recurse to get the constant loaded.  */
10177       if (ix86_expand_int_movcc (operands) == 0)
10178         return 0; /* FAIL */
10179 
10180       /* Mask in the interesting variable.  */
10181       out = expand_binop (mode, op, var, tmp, orig_out, 0,
10182 			  OPTAB_WIDEN);
10183       if (!rtx_equal_p (out, orig_out))
10184 	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10185 
10186       return 1; /* DONE */
10187     }
10188 
10189   /*
10190    * For comparison with above,
10191    *
10192    * movl cf,dest
10193    * movl ct,tmp
10194    * cmpl op1,op2
10195    * cmovcc tmp,dest
10196    *
10197    * Size 15.
10198    */
10199 
10200   if (! nonimmediate_operand (operands[2], mode))
10201     operands[2] = force_reg (mode, operands[2]);
10202   if (! nonimmediate_operand (operands[3], mode))
10203     operands[3] = force_reg (mode, operands[3]);
10204 
10205   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10206     {
10207       rtx tmp = gen_reg_rtx (mode);
10208       emit_move_insn (tmp, operands[3]);
10209       operands[3] = tmp;
10210     }
10211   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10212     {
10213       rtx tmp = gen_reg_rtx (mode);
10214       emit_move_insn (tmp, operands[2]);
10215       operands[2] = tmp;
10216     }
10217 
10218   if (! register_operand (operands[2], VOIDmode)
10219       && (mode == QImode
10220           || ! register_operand (operands[3], VOIDmode)))
10221     operands[2] = force_reg (mode, operands[2]);
10222 
10223   if (mode == QImode
10224       && ! register_operand (operands[3], VOIDmode))
10225     operands[3] = force_reg (mode, operands[3]);
10226 
10227   emit_insn (compare_seq);
10228   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10229 			  gen_rtx_IF_THEN_ELSE (mode,
10230 						compare_op, operands[2],
10231 						operands[3])));
10232   if (bypass_test)
10233     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10234 			    gen_rtx_IF_THEN_ELSE (mode,
10235 				  bypass_test,
10236 				  copy_rtx (operands[3]),
10237 				  copy_rtx (operands[0]))));
10238   if (second_test)
10239     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10240 			    gen_rtx_IF_THEN_ELSE (mode,
10241 				  second_test,
10242 				  copy_rtx (operands[2]),
10243 				  copy_rtx (operands[0]))));
10244 
10245   return 1; /* DONE */
10246 }
10247 
10248 int
ix86_expand_fp_movcc(rtx operands[])10249 ix86_expand_fp_movcc (rtx operands[])
10250 {
10251   enum rtx_code code;
10252   rtx tmp;
10253   rtx compare_op, second_test, bypass_test;
10254 
10255   /* For SF/DFmode conditional moves based on comparisons
10256      in same mode, we may want to use SSE min/max instructions.  */
10257   if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10258        || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10259       && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10260       /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
10261       && (!TARGET_IEEE_FP
10262 	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10263       /* We may be called from the post-reload splitter.  */
10264       && (!REG_P (operands[0])
10265 	  || SSE_REG_P (operands[0])
10266 	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10267     {
10268       rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10269       code = GET_CODE (operands[1]);
10270 
10271       /* See if we have (cross) match between comparison operands and
10272          conditional move operands.  */
10273       if (rtx_equal_p (operands[2], op1))
10274 	{
10275 	  rtx tmp = op0;
10276 	  op0 = op1;
10277 	  op1 = tmp;
10278 	  code = reverse_condition_maybe_unordered (code);
10279 	}
10280       if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10281 	{
10282 	  /* Check for min operation.  */
10283 	  if (code == LT || code == UNLE)
10284 	    {
10285 	       if (code == UNLE)
10286 		{
10287 		  rtx tmp = op0;
10288 		  op0 = op1;
10289 		  op1 = tmp;
10290 		}
10291 	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10292 	       if (memory_operand (op0, VOIDmode))
10293 		 op0 = force_reg (GET_MODE (operands[0]), op0);
10294 	       if (GET_MODE (operands[0]) == SFmode)
10295 		 emit_insn (gen_minsf3 (operands[0], op0, op1));
10296 	       else
10297 		 emit_insn (gen_mindf3 (operands[0], op0, op1));
10298 	       return 1;
10299 	    }
10300 	  /* Check for max operation.  */
10301 	  if (code == GT || code == UNGE)
10302 	    {
10303 	       if (code == UNGE)
10304 		{
10305 		  rtx tmp = op0;
10306 		  op0 = op1;
10307 		  op1 = tmp;
10308 		}
10309 	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10310 	       if (memory_operand (op0, VOIDmode))
10311 		 op0 = force_reg (GET_MODE (operands[0]), op0);
10312 	       if (GET_MODE (operands[0]) == SFmode)
10313 		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10314 	       else
10315 		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10316 	       return 1;
10317 	    }
10318 	}
10319       /* Manage condition to be sse_comparison_operator.  In case we are
10320 	 in non-ieee mode, try to canonicalize the destination operand
10321 	 to be first in the comparison - this helps reload to avoid extra
10322 	 moves.  */
10323       if (!sse_comparison_operator (operands[1], VOIDmode)
10324 	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10325 	{
10326 	  rtx tmp = ix86_compare_op0;
10327 	  ix86_compare_op0 = ix86_compare_op1;
10328 	  ix86_compare_op1 = tmp;
10329 	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10330 					VOIDmode, ix86_compare_op0,
10331 					ix86_compare_op1);
10332 	}
10333       /* Similarly try to manage result to be first operand of conditional
10334 	 move. We also don't support the NE comparison on SSE, so try to
10335 	 avoid it.  */
10336       if ((rtx_equal_p (operands[0], operands[3])
10337 	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10338 	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10339 	{
10340 	  rtx tmp = operands[2];
10341 	  operands[2] = operands[3];
10342 	  operands[3] = tmp;
10343 	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10344 					  (GET_CODE (operands[1])),
10345 					VOIDmode, ix86_compare_op0,
10346 					ix86_compare_op1);
10347 	}
10348       if (GET_MODE (operands[0]) == SFmode)
10349 	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10350 				    operands[2], operands[3],
10351 				    ix86_compare_op0, ix86_compare_op1));
10352       else
10353 	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10354 				    operands[2], operands[3],
10355 				    ix86_compare_op0, ix86_compare_op1));
10356       return 1;
10357     }
10358 
10359   /* The floating point conditional move instructions don't directly
10360      support conditions resulting from a signed integer comparison.  */
10361 
10362   code = GET_CODE (operands[1]);
10363   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10364 
10365   /* The floating point conditional move instructions don't directly
10366      support signed integer comparisons.  */
10367 
10368   if (!fcmov_comparison_operator (compare_op, VOIDmode))
10369     {
10370       if (second_test != NULL || bypass_test != NULL)
10371 	abort ();
10372       tmp = gen_reg_rtx (QImode);
10373       ix86_expand_setcc (code, tmp);
10374       code = NE;
10375       ix86_compare_op0 = tmp;
10376       ix86_compare_op1 = const0_rtx;
10377       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
10378     }
10379   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10380     {
10381       tmp = gen_reg_rtx (GET_MODE (operands[0]));
10382       emit_move_insn (tmp, operands[3]);
10383       operands[3] = tmp;
10384     }
10385   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10386     {
10387       tmp = gen_reg_rtx (GET_MODE (operands[0]));
10388       emit_move_insn (tmp, operands[2]);
10389       operands[2] = tmp;
10390     }
10391 
10392   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10393 			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10394 				compare_op,
10395 				operands[2],
10396 				operands[3])));
10397   if (bypass_test)
10398     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10399 			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10400 				  bypass_test,
10401 				  operands[3],
10402 				  operands[0])));
10403   if (second_test)
10404     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10405 			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10406 				  second_test,
10407 				  operands[2],
10408 				  operands[0])));
10409 
10410   return 1;
10411 }
10412 
10413 /* Expand conditional increment or decrement using adb/sbb instructions.
10414    The default case using setcc followed by the conditional move can be
10415    done by generic code.  */
10416 int
ix86_expand_int_addcc(rtx operands[])10417 ix86_expand_int_addcc (rtx operands[])
10418 {
10419   enum rtx_code code = GET_CODE (operands[1]);
10420   rtx compare_op;
10421   rtx val = const0_rtx;
10422   bool fpcmp = false;
10423   enum machine_mode mode = GET_MODE (operands[0]);
10424 
10425   if (operands[3] != const1_rtx
10426       && operands[3] != constm1_rtx)
10427     return 0;
10428   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10429 				       ix86_compare_op1, &compare_op))
10430      return 0;
10431   code = GET_CODE (compare_op);
10432 
10433   if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10434       || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10435     {
10436       fpcmp = true;
10437       code = ix86_fp_compare_code_to_integer (code);
10438     }
10439 
10440   if (code != LTU)
10441     {
10442       val = constm1_rtx;
10443       if (fpcmp)
10444 	PUT_CODE (compare_op,
10445 		  reverse_condition_maybe_unordered
10446 		    (GET_CODE (compare_op)));
10447       else
10448 	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10449     }
10450   PUT_MODE (compare_op, mode);
10451 
10452   /* Construct either adc or sbb insn.  */
10453   if ((code == LTU) == (operands[3] == constm1_rtx))
10454     {
10455       switch (GET_MODE (operands[0]))
10456 	{
10457 	  case QImode:
10458             emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10459 	    break;
10460 	  case HImode:
10461             emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10462 	    break;
10463 	  case SImode:
10464             emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10465 	    break;
10466 	  case DImode:
10467             emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10468 	    break;
10469 	  default:
10470 	    abort ();
10471 	}
10472     }
10473   else
10474     {
10475       switch (GET_MODE (operands[0]))
10476 	{
10477 	  case QImode:
10478             emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10479 	    break;
10480 	  case HImode:
10481             emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10482 	    break;
10483 	  case SImode:
10484             emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10485 	    break;
10486 	  case DImode:
10487             emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10488 	    break;
10489 	  default:
10490 	    abort ();
10491 	}
10492     }
10493   return 1; /* DONE */
10494 }
10495 
10496 
10497 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
10498    works for floating pointer parameters and nonoffsetable memories.
10499    For pushes, it returns just stack offsets; the values will be saved
10500    in the right order.  Maximally three parts are generated.  */
10501 
10502 static int
ix86_split_to_parts(rtx operand,rtx * parts,enum machine_mode mode)10503 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10504 {
10505   int size;
10506 
10507   if (!TARGET_64BIT)
10508     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10509   else
10510     size = (GET_MODE_SIZE (mode) + 4) / 8;
10511 
10512   if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10513     abort ();
10514   if (size < 2 || size > 3)
10515     abort ();
10516 
10517   /* Optimize constant pool reference to immediates.  This is used by fp
10518      moves, that force all constants to memory to allow combining.  */
10519   if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10520     {
10521       rtx tmp = maybe_get_pool_constant (operand);
10522       if (tmp)
10523 	operand = tmp;
10524     }
10525 
10526   if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10527     {
10528       /* The only non-offsetable memories we handle are pushes.  */
10529       if (! push_operand (operand, VOIDmode))
10530 	abort ();
10531 
10532       operand = copy_rtx (operand);
10533       PUT_MODE (operand, Pmode);
10534       parts[0] = parts[1] = parts[2] = operand;
10535     }
10536   else if (!TARGET_64BIT)
10537     {
10538       if (mode == DImode)
10539 	split_di (&operand, 1, &parts[0], &parts[1]);
10540       else
10541 	{
10542 	  if (REG_P (operand))
10543 	    {
10544 	      if (!reload_completed)
10545 		abort ();
10546 	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10547 	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10548 	      if (size == 3)
10549 		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10550 	    }
10551 	  else if (offsettable_memref_p (operand))
10552 	    {
10553 	      operand = adjust_address (operand, SImode, 0);
10554 	      parts[0] = operand;
10555 	      parts[1] = adjust_address (operand, SImode, 4);
10556 	      if (size == 3)
10557 		parts[2] = adjust_address (operand, SImode, 8);
10558 	    }
10559 	  else if (GET_CODE (operand) == CONST_DOUBLE)
10560 	    {
10561 	      REAL_VALUE_TYPE r;
10562 	      long l[4];
10563 
10564 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10565 	      switch (mode)
10566 		{
10567 		case XFmode:
10568 		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10569 		  parts[2] = gen_int_mode (l[2], SImode);
10570 		  break;
10571 		case DFmode:
10572 		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10573 		  break;
10574 		default:
10575 		  abort ();
10576 		}
10577 	      parts[1] = gen_int_mode (l[1], SImode);
10578 	      parts[0] = gen_int_mode (l[0], SImode);
10579 	    }
10580 	  else
10581 	    abort ();
10582 	}
10583     }
10584   else
10585     {
10586       if (mode == TImode)
10587 	split_ti (&operand, 1, &parts[0], &parts[1]);
10588       if (mode == XFmode || mode == TFmode)
10589 	{
10590 	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10591 	  if (REG_P (operand))
10592 	    {
10593 	      if (!reload_completed)
10594 		abort ();
10595 	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10596 	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10597 	    }
10598 	  else if (offsettable_memref_p (operand))
10599 	    {
10600 	      operand = adjust_address (operand, DImode, 0);
10601 	      parts[0] = operand;
10602 	      parts[1] = adjust_address (operand, upper_mode, 8);
10603 	    }
10604 	  else if (GET_CODE (operand) == CONST_DOUBLE)
10605 	    {
10606 	      REAL_VALUE_TYPE r;
10607 	      long l[3];
10608 
10609 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10610 	      real_to_target (l, &r, mode);
10611 	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
10612 	      if (HOST_BITS_PER_WIDE_INT >= 64)
10613 	        parts[0]
10614 		  = gen_int_mode
10615 		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10616 		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10617 		       DImode);
10618 	      else
10619 	        parts[0] = immed_double_const (l[0], l[1], DImode);
10620 	      if (upper_mode == SImode)
10621 	        parts[1] = gen_int_mode (l[2], SImode);
10622 	      else if (HOST_BITS_PER_WIDE_INT >= 64)
10623 	        parts[1]
10624 		  = gen_int_mode
10625 		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10626 		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10627 		       DImode);
10628 	      else
10629 	        parts[1] = immed_double_const (l[2], l[3], DImode);
10630 	    }
10631 	  else
10632 	    abort ();
10633 	}
10634     }
10635 
10636   return size;
10637 }
10638 
10639 /* Emit insns to perform a move or push of DI, DF, and XF values.
10640    Return false when normal moves are needed; true when all required
10641    insns have been emitted.  Operands 2-4 contain the input values
10642    int the correct order; operands 5-7 contain the output values.  */
10643 
10644 void
ix86_split_long_move(rtx operands[])10645 ix86_split_long_move (rtx operands[])
10646 {
10647   rtx part[2][3];
10648   int nparts;
10649   int push = 0;
10650   int collisions = 0;
10651   enum machine_mode mode = GET_MODE (operands[0]);
10652 
10653   /* The DFmode expanders may ask us to move double.
10654      For 64bit target this is single move.  By hiding the fact
10655      here we simplify i386.md splitters.  */
10656   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10657     {
10658       /* Optimize constant pool reference to immediates.  This is used by
10659 	 fp moves, that force all constants to memory to allow combining.  */
10660 
10661       if (GET_CODE (operands[1]) == MEM
10662 	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10663 	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10664 	operands[1] = get_pool_constant (XEXP (operands[1], 0));
10665       if (push_operand (operands[0], VOIDmode))
10666 	{
10667 	  operands[0] = copy_rtx (operands[0]);
10668 	  PUT_MODE (operands[0], Pmode);
10669 	}
10670       else
10671         operands[0] = gen_lowpart (DImode, operands[0]);
10672       operands[1] = gen_lowpart (DImode, operands[1]);
10673       emit_move_insn (operands[0], operands[1]);
10674       return;
10675     }
10676 
10677   /* The only non-offsettable memory we handle is push.  */
10678   if (push_operand (operands[0], VOIDmode))
10679     push = 1;
10680   else if (GET_CODE (operands[0]) == MEM
10681 	   && ! offsettable_memref_p (operands[0]))
10682     abort ();
10683 
10684   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10685   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10686 
10687   /* When emitting push, take care for source operands on the stack.  */
10688   if (push && GET_CODE (operands[1]) == MEM
10689       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10690     {
10691       if (nparts == 3)
10692 	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10693 				     XEXP (part[1][2], 0));
10694       part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10695 				   XEXP (part[1][1], 0));
10696     }
10697 
10698   /* We need to do copy in the right order in case an address register
10699      of the source overlaps the destination.  */
10700   if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10701     {
10702       if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10703 	collisions++;
10704       if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10705 	collisions++;
10706       if (nparts == 3
10707 	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10708 	collisions++;
10709 
10710       /* Collision in the middle part can be handled by reordering.  */
10711       if (collisions == 1 && nparts == 3
10712 	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10713 	{
10714 	  rtx tmp;
10715 	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10716 	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10717 	}
10718 
10719       /* If there are more collisions, we can't handle it by reordering.
10720 	 Do an lea to the last part and use only one colliding move.  */
10721       else if (collisions > 1)
10722 	{
10723 	  rtx base;
10724 
10725 	  collisions = 1;
10726 
10727 	  base = part[0][nparts - 1];
10728 
10729 	  /* Handle the case when the last part isn't valid for lea.
10730 	     Happens in 64-bit mode storing the 12-byte XFmode.  */
10731 	  if (GET_MODE (base) != Pmode)
10732 	    base = gen_rtx_REG (Pmode, REGNO (base));
10733 
10734 	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10735 	  part[1][0] = replace_equiv_address (part[1][0], base);
10736 	  part[1][1] = replace_equiv_address (part[1][1],
10737 				      plus_constant (base, UNITS_PER_WORD));
10738 	  if (nparts == 3)
10739 	    part[1][2] = replace_equiv_address (part[1][2],
10740 				      plus_constant (base, 8));
10741 	}
10742     }
10743 
10744   if (push)
10745     {
10746       if (!TARGET_64BIT)
10747 	{
10748 	  if (nparts == 3)
10749 	    {
10750 	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10751                 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10752 	      emit_move_insn (part[0][2], part[1][2]);
10753 	    }
10754 	}
10755       else
10756 	{
10757 	  /* In 64bit mode we don't have 32bit push available.  In case this is
10758 	     register, it is OK - we will just use larger counterpart.  We also
10759 	     retype memory - these comes from attempt to avoid REX prefix on
10760 	     moving of second half of TFmode value.  */
10761 	  if (GET_MODE (part[1][1]) == SImode)
10762 	    {
10763 	      if (GET_CODE (part[1][1]) == MEM)
10764 		part[1][1] = adjust_address (part[1][1], DImode, 0);
10765 	      else if (REG_P (part[1][1]))
10766 		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10767 	      else
10768 		abort ();
10769 	      if (GET_MODE (part[1][0]) == SImode)
10770 		part[1][0] = part[1][1];
10771 	    }
10772 	}
10773       emit_move_insn (part[0][1], part[1][1]);
10774       emit_move_insn (part[0][0], part[1][0]);
10775       return;
10776     }
10777 
10778   /* Choose correct order to not overwrite the source before it is copied.  */
10779   if ((REG_P (part[0][0])
10780        && REG_P (part[1][1])
10781        && (REGNO (part[0][0]) == REGNO (part[1][1])
10782 	   || (nparts == 3
10783 	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
10784       || (collisions > 0
10785 	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10786     {
10787       if (nparts == 3)
10788 	{
10789 	  operands[2] = part[0][2];
10790 	  operands[3] = part[0][1];
10791 	  operands[4] = part[0][0];
10792 	  operands[5] = part[1][2];
10793 	  operands[6] = part[1][1];
10794 	  operands[7] = part[1][0];
10795 	}
10796       else
10797 	{
10798 	  operands[2] = part[0][1];
10799 	  operands[3] = part[0][0];
10800 	  operands[5] = part[1][1];
10801 	  operands[6] = part[1][0];
10802 	}
10803     }
10804   else
10805     {
10806       if (nparts == 3)
10807 	{
10808 	  operands[2] = part[0][0];
10809 	  operands[3] = part[0][1];
10810 	  operands[4] = part[0][2];
10811 	  operands[5] = part[1][0];
10812 	  operands[6] = part[1][1];
10813 	  operands[7] = part[1][2];
10814 	}
10815       else
10816 	{
10817 	  operands[2] = part[0][0];
10818 	  operands[3] = part[0][1];
10819 	  operands[5] = part[1][0];
10820 	  operands[6] = part[1][1];
10821 	}
10822     }
10823   emit_move_insn (operands[2], operands[5]);
10824   emit_move_insn (operands[3], operands[6]);
10825   if (nparts == 3)
10826     emit_move_insn (operands[4], operands[7]);
10827 
10828   return;
10829 }
10830 
10831 void
ix86_split_ashldi(rtx * operands,rtx scratch)10832 ix86_split_ashldi (rtx *operands, rtx scratch)
10833 {
10834   rtx low[2], high[2];
10835   int count;
10836 
10837   if (GET_CODE (operands[2]) == CONST_INT)
10838     {
10839       split_di (operands, 2, low, high);
10840       count = INTVAL (operands[2]) & 63;
10841 
10842       if (count >= 32)
10843 	{
10844 	  emit_move_insn (high[0], low[1]);
10845 	  emit_move_insn (low[0], const0_rtx);
10846 
10847 	  if (count > 32)
10848 	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10849 	}
10850       else
10851 	{
10852 	  if (!rtx_equal_p (operands[0], operands[1]))
10853 	    emit_move_insn (operands[0], operands[1]);
10854 	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10855 	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10856 	}
10857     }
10858   else
10859     {
10860       if (!rtx_equal_p (operands[0], operands[1]))
10861 	emit_move_insn (operands[0], operands[1]);
10862 
10863       split_di (operands, 1, low, high);
10864 
10865       emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10866       emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10867 
10868       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10869 	{
10870 	  if (! no_new_pseudos)
10871 	    scratch = force_reg (SImode, const0_rtx);
10872 	  else
10873 	    emit_move_insn (scratch, const0_rtx);
10874 
10875 	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10876 					  scratch));
10877 	}
10878       else
10879 	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10880     }
10881 }
10882 
10883 void
ix86_split_ashrdi(rtx * operands,rtx scratch)10884 ix86_split_ashrdi (rtx *operands, rtx scratch)
10885 {
10886   rtx low[2], high[2];
10887   int count;
10888 
10889   if (GET_CODE (operands[2]) == CONST_INT)
10890     {
10891       split_di (operands, 2, low, high);
10892       count = INTVAL (operands[2]) & 63;
10893 
10894       if (count >= 32)
10895 	{
10896 	  emit_move_insn (low[0], high[1]);
10897 
10898 	  if (! reload_completed)
10899 	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10900 	  else
10901 	    {
10902 	      emit_move_insn (high[0], low[0]);
10903 	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10904 	    }
10905 
10906 	  if (count > 32)
10907 	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10908 	}
10909       else
10910 	{
10911 	  if (!rtx_equal_p (operands[0], operands[1]))
10912 	    emit_move_insn (operands[0], operands[1]);
10913 	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10914 	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10915 	}
10916     }
10917   else
10918     {
10919       if (!rtx_equal_p (operands[0], operands[1]))
10920 	emit_move_insn (operands[0], operands[1]);
10921 
10922       split_di (operands, 1, low, high);
10923 
10924       emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10925       emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10926 
10927       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10928 	{
10929 	  if (! no_new_pseudos)
10930 	    scratch = gen_reg_rtx (SImode);
10931 	  emit_move_insn (scratch, high[0]);
10932 	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10933 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10934 					  scratch));
10935 	}
10936       else
10937 	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10938     }
10939 }
10940 
10941 void
ix86_split_lshrdi(rtx * operands,rtx scratch)10942 ix86_split_lshrdi (rtx *operands, rtx scratch)
10943 {
10944   rtx low[2], high[2];
10945   int count;
10946 
10947   if (GET_CODE (operands[2]) == CONST_INT)
10948     {
10949       split_di (operands, 2, low, high);
10950       count = INTVAL (operands[2]) & 63;
10951 
10952       if (count >= 32)
10953 	{
10954 	  emit_move_insn (low[0], high[1]);
10955 	  emit_move_insn (high[0], const0_rtx);
10956 
10957 	  if (count > 32)
10958 	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10959 	}
10960       else
10961 	{
10962 	  if (!rtx_equal_p (operands[0], operands[1]))
10963 	    emit_move_insn (operands[0], operands[1]);
10964 	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10965 	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10966 	}
10967     }
10968   else
10969     {
10970       if (!rtx_equal_p (operands[0], operands[1]))
10971 	emit_move_insn (operands[0], operands[1]);
10972 
10973       split_di (operands, 1, low, high);
10974 
10975       emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10976       emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10977 
10978       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
10979       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10980 	{
10981 	  if (! no_new_pseudos)
10982 	    scratch = force_reg (SImode, const0_rtx);
10983 	  else
10984 	    emit_move_insn (scratch, const0_rtx);
10985 
10986 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10987 					  scratch));
10988 	}
10989       else
10990 	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10991     }
10992 }
10993 
10994 /* Helper function for the string operations below.  Dest VARIABLE whether
10995    it is aligned to VALUE bytes.  If true, jump to the label.  */
10996 static rtx
ix86_expand_aligntest(rtx variable,int value)10997 ix86_expand_aligntest (rtx variable, int value)
10998 {
10999   rtx label = gen_label_rtx ();
11000   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11001   if (GET_MODE (variable) == DImode)
11002     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11003   else
11004     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11005   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11006 			   1, label);
11007   return label;
11008 }
11009 
11010 /* Adjust COUNTER by the VALUE.  */
11011 static void
ix86_adjust_counter(rtx countreg,HOST_WIDE_INT value)11012 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11013 {
11014   if (GET_MODE (countreg) == DImode)
11015     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11016   else
11017     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11018 }
11019 
11020 /* Zero extend possibly SImode EXP to Pmode register.  */
11021 rtx
ix86_zero_extend_to_Pmode(rtx exp)11022 ix86_zero_extend_to_Pmode (rtx exp)
11023 {
11024   rtx r;
11025   if (GET_MODE (exp) == VOIDmode)
11026     return force_reg (Pmode, exp);
11027   if (GET_MODE (exp) == Pmode)
11028     return copy_to_mode_reg (Pmode, exp);
11029   r = gen_reg_rtx (Pmode);
11030   emit_insn (gen_zero_extendsidi2 (r, exp));
11031   return r;
11032 }
11033 
11034 /* Expand string move (memcpy) operation.  Use i386 string operations when
11035    profitable.  expand_clrstr contains similar code.  */
11036 int
ix86_expand_movstr(rtx dst,rtx src,rtx count_exp,rtx align_exp)11037 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11038 {
11039   rtx srcreg, destreg, countreg, srcexp, destexp;
11040   enum machine_mode counter_mode;
11041   HOST_WIDE_INT align = 0;
11042   unsigned HOST_WIDE_INT count = 0;
11043 
11044   if (GET_CODE (align_exp) == CONST_INT)
11045     align = INTVAL (align_exp);
11046 
11047   /* Can't use any of this if the user has appropriated esi or edi.  */
11048   if (global_regs[4] || global_regs[5])
11049     return 0;
11050 
11051   /* This simple hack avoids all inlining code and simplifies code below.  */
11052   if (!TARGET_ALIGN_STRINGOPS)
11053     align = 64;
11054 
11055   if (GET_CODE (count_exp) == CONST_INT)
11056     {
11057       count = INTVAL (count_exp);
11058       if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11059 	return 0;
11060     }
11061 
11062   /* Figure out proper mode for counter.  For 32bits it is always SImode,
11063      for 64bits use SImode when possible, otherwise DImode.
11064      Set count to number of bytes copied when known at compile time.  */
11065   if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11066       || x86_64_zero_extended_value (count_exp))
11067     counter_mode = SImode;
11068   else
11069     counter_mode = DImode;
11070 
11071   if (counter_mode != SImode && counter_mode != DImode)
11072     abort ();
11073 
11074   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11075   if (destreg != XEXP (dst, 0))
11076     dst = replace_equiv_address_nv (dst, destreg);
11077   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11078   if (srcreg != XEXP (src, 0))
11079     src = replace_equiv_address_nv (src, srcreg);
11080 
11081   /* When optimizing for size emit simple rep ; movsb instruction for
11082      counts not divisible by 4.  */
11083 
11084   if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11085     {
11086       emit_insn (gen_cld ());
11087       countreg = ix86_zero_extend_to_Pmode (count_exp);
11088       destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11089       srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11090       emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11091 			      destexp, srcexp));
11092     }
11093 
11094   /* For constant aligned (or small unaligned) copies use rep movsl
11095      followed by code copying the rest.  For PentiumPro ensure 8 byte
11096      alignment to allow rep movsl acceleration.  */
11097 
11098   else if (count != 0
11099 	   && (align >= 8
11100 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11101 	       || optimize_size || count < (unsigned int) 64))
11102     {
11103       unsigned HOST_WIDE_INT offset = 0;
11104       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11105       rtx srcmem, dstmem;
11106 
11107       emit_insn (gen_cld ());
11108       if (count & ~(size - 1))
11109 	{
11110 	  countreg = copy_to_mode_reg (counter_mode,
11111 				       GEN_INT ((count >> (size == 4 ? 2 : 3))
11112 						& (TARGET_64BIT ? -1 : 0x3fffffff)));
11113 	  countreg = ix86_zero_extend_to_Pmode (countreg);
11114 
11115 	  destexp = gen_rtx_ASHIFT (Pmode, countreg,
11116 				    GEN_INT (size == 4 ? 2 : 3));
11117 	  srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11118 	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11119 
11120 	  emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11121 				  countreg, destexp, srcexp));
11122 	  offset = count & ~(size - 1);
11123 	}
11124       if (size == 8 && (count & 0x04))
11125 	{
11126 	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11127 						 offset);
11128 	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11129 						 offset);
11130 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11131 	  offset += 4;
11132 	}
11133       if (count & 0x02)
11134 	{
11135 	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11136 						 offset);
11137 	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11138 						 offset);
11139 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11140 	  offset += 2;
11141 	}
11142       if (count & 0x01)
11143 	{
11144 	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11145 						 offset);
11146 	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11147 						 offset);
11148 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11149 	}
11150     }
11151   /* The generic code based on the glibc implementation:
11152      - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11153      allowing accelerated copying there)
11154      - copy the data using rep movsl
11155      - copy the rest.  */
11156   else
11157     {
11158       rtx countreg2;
11159       rtx label = NULL;
11160       rtx srcmem, dstmem;
11161       int desired_alignment = (TARGET_PENTIUMPRO
11162 			       && (count == 0 || count >= (unsigned int) 260)
11163 			       ? 8 : UNITS_PER_WORD);
11164       /* Get rid of MEM_OFFSETs, they won't be accurate.  */
11165       dst = change_address (dst, BLKmode, destreg);
11166       src = change_address (src, BLKmode, srcreg);
11167 
11168       /* In case we don't know anything about the alignment, default to
11169          library version, since it is usually equally fast and result in
11170          shorter code.
11171 
11172 	 Also emit call when we know that the count is large and call overhead
11173 	 will not be important.  */
11174       if (!TARGET_INLINE_ALL_STRINGOPS
11175 	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11176 	return 0;
11177 
11178       if (TARGET_SINGLE_STRINGOP)
11179 	emit_insn (gen_cld ());
11180 
11181       countreg2 = gen_reg_rtx (Pmode);
11182       countreg = copy_to_mode_reg (counter_mode, count_exp);
11183 
11184       /* We don't use loops to align destination and to copy parts smaller
11185          than 4 bytes, because gcc is able to optimize such code better (in
11186          the case the destination or the count really is aligned, gcc is often
11187          able to predict the branches) and also it is friendlier to the
11188          hardware branch prediction.
11189 
11190          Using loops is beneficial for generic case, because we can
11191          handle small counts using the loops.  Many CPUs (such as Athlon)
11192          have large REP prefix setup costs.
11193 
11194          This is quite costly.  Maybe we can revisit this decision later or
11195          add some customizability to this code.  */
11196 
11197       if (count == 0 && align < desired_alignment)
11198 	{
11199 	  label = gen_label_rtx ();
11200 	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11201 				   LEU, 0, counter_mode, 1, label);
11202 	}
11203       if (align <= 1)
11204 	{
11205 	  rtx label = ix86_expand_aligntest (destreg, 1);
11206 	  srcmem = change_address (src, QImode, srcreg);
11207 	  dstmem = change_address (dst, QImode, destreg);
11208 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11209 	  ix86_adjust_counter (countreg, 1);
11210 	  emit_label (label);
11211 	  LABEL_NUSES (label) = 1;
11212 	}
11213       if (align <= 2)
11214 	{
11215 	  rtx label = ix86_expand_aligntest (destreg, 2);
11216 	  srcmem = change_address (src, HImode, srcreg);
11217 	  dstmem = change_address (dst, HImode, destreg);
11218 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11219 	  ix86_adjust_counter (countreg, 2);
11220 	  emit_label (label);
11221 	  LABEL_NUSES (label) = 1;
11222 	}
11223       if (align <= 4 && desired_alignment > 4)
11224 	{
11225 	  rtx label = ix86_expand_aligntest (destreg, 4);
11226 	  srcmem = change_address (src, SImode, srcreg);
11227 	  dstmem = change_address (dst, SImode, destreg);
11228 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11229 	  ix86_adjust_counter (countreg, 4);
11230 	  emit_label (label);
11231 	  LABEL_NUSES (label) = 1;
11232 	}
11233 
11234       if (label && desired_alignment > 4 && !TARGET_64BIT)
11235 	{
11236 	  emit_label (label);
11237 	  LABEL_NUSES (label) = 1;
11238 	  label = NULL_RTX;
11239 	}
11240       if (!TARGET_SINGLE_STRINGOP)
11241 	emit_insn (gen_cld ());
11242       if (TARGET_64BIT)
11243 	{
11244 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11245 				  GEN_INT (3)));
11246 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11247 	}
11248       else
11249 	{
11250 	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11251 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11252 	}
11253       srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11254       destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11255       emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11256 			      countreg2, destexp, srcexp));
11257 
11258       if (label)
11259 	{
11260 	  emit_label (label);
11261 	  LABEL_NUSES (label) = 1;
11262 	}
11263       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11264 	{
11265 	  srcmem = change_address (src, SImode, srcreg);
11266 	  dstmem = change_address (dst, SImode, destreg);
11267 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11268 	}
11269       if ((align <= 4 || count == 0) && TARGET_64BIT)
11270 	{
11271 	  rtx label = ix86_expand_aligntest (countreg, 4);
11272 	  srcmem = change_address (src, SImode, srcreg);
11273 	  dstmem = change_address (dst, SImode, destreg);
11274 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11275 	  emit_label (label);
11276 	  LABEL_NUSES (label) = 1;
11277 	}
11278       if (align > 2 && count != 0 && (count & 2))
11279 	{
11280 	  srcmem = change_address (src, HImode, srcreg);
11281 	  dstmem = change_address (dst, HImode, destreg);
11282 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11283 	}
11284       if (align <= 2 || count == 0)
11285 	{
11286 	  rtx label = ix86_expand_aligntest (countreg, 2);
11287 	  srcmem = change_address (src, HImode, srcreg);
11288 	  dstmem = change_address (dst, HImode, destreg);
11289 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11290 	  emit_label (label);
11291 	  LABEL_NUSES (label) = 1;
11292 	}
11293       if (align > 1 && count != 0 && (count & 1))
11294 	{
11295 	  srcmem = change_address (src, QImode, srcreg);
11296 	  dstmem = change_address (dst, QImode, destreg);
11297 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11298 	}
11299       if (align <= 1 || count == 0)
11300 	{
11301 	  rtx label = ix86_expand_aligntest (countreg, 1);
11302 	  srcmem = change_address (src, QImode, srcreg);
11303 	  dstmem = change_address (dst, QImode, destreg);
11304 	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11305 	  emit_label (label);
11306 	  LABEL_NUSES (label) = 1;
11307 	}
11308     }
11309 
11310   return 1;
11311 }
11312 
11313 /* Expand string clear operation (bzero).  Use i386 string operations when
11314    profitable.  expand_movstr contains similar code.  */
11315 int
ix86_expand_clrstr(rtx dst,rtx count_exp,rtx align_exp)11316 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11317 {
11318   rtx destreg, zeroreg, countreg, destexp;
11319   enum machine_mode counter_mode;
11320   HOST_WIDE_INT align = 0;
11321   unsigned HOST_WIDE_INT count = 0;
11322 
11323   if (GET_CODE (align_exp) == CONST_INT)
11324     align = INTVAL (align_exp);
11325 
11326   /* Can't use any of this if the user has appropriated esi.  */
11327   if (global_regs[4])
11328     return 0;
11329 
11330   /* This simple hack avoids all inlining code and simplifies code below.  */
11331   if (!TARGET_ALIGN_STRINGOPS)
11332     align = 32;
11333 
11334   if (GET_CODE (count_exp) == CONST_INT)
11335     {
11336       count = INTVAL (count_exp);
11337       if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11338 	return 0;
11339     }
11340   /* Figure out proper mode for counter.  For 32bits it is always SImode,
11341      for 64bits use SImode when possible, otherwise DImode.
11342      Set count to number of bytes copied when known at compile time.  */
11343   if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11344       || x86_64_zero_extended_value (count_exp))
11345     counter_mode = SImode;
11346   else
11347     counter_mode = DImode;
11348 
11349   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11350   if (destreg != XEXP (dst, 0))
11351     dst = replace_equiv_address_nv (dst, destreg);
11352 
11353   emit_insn (gen_cld ());
11354 
11355   /* When optimizing for size emit simple rep ; movsb instruction for
11356      counts not divisible by 4.  */
11357 
11358   if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11359     {
11360       countreg = ix86_zero_extend_to_Pmode (count_exp);
11361       zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11362       destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11363       emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11364     }
11365   else if (count != 0
11366 	   && (align >= 8
11367 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11368 	       || optimize_size || count < (unsigned int) 64))
11369     {
11370       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11371       unsigned HOST_WIDE_INT offset = 0;
11372 
11373       zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11374       if (count & ~(size - 1))
11375 	{
11376 	  countreg = copy_to_mode_reg (counter_mode,
11377 				       GEN_INT ((count >> (size == 4 ? 2 : 3))
11378 						& (TARGET_64BIT ? -1 : 0x3fffffff)));
11379 	  countreg = ix86_zero_extend_to_Pmode (countreg);
11380 	  destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11381 	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11382 	  emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11383 	  offset = count & ~(size - 1);
11384 	}
11385       if (size == 8 && (count & 0x04))
11386 	{
11387 	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11388 						  offset);
11389 	  emit_insn (gen_strset (destreg, mem,
11390 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11391 	  offset += 4;
11392 	}
11393       if (count & 0x02)
11394 	{
11395 	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11396 						  offset);
11397 	  emit_insn (gen_strset (destreg, mem,
11398 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11399 	  offset += 2;
11400 	}
11401       if (count & 0x01)
11402 	{
11403 	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11404 						  offset);
11405 	  emit_insn (gen_strset (destreg, mem,
11406 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11407 	}
11408     }
11409   else
11410     {
11411       rtx countreg2;
11412       rtx label = NULL;
11413       /* Compute desired alignment of the string operation.  */
11414       int desired_alignment = (TARGET_PENTIUMPRO
11415 			       && (count == 0 || count >= (unsigned int) 260)
11416 			       ? 8 : UNITS_PER_WORD);
11417 
11418       /* In case we don't know anything about the alignment, default to
11419          library version, since it is usually equally fast and result in
11420          shorter code.
11421 
11422 	 Also emit call when we know that the count is large and call overhead
11423 	 will not be important.  */
11424       if (!TARGET_INLINE_ALL_STRINGOPS
11425 	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11426 	return 0;
11427 
11428       if (TARGET_SINGLE_STRINGOP)
11429 	emit_insn (gen_cld ());
11430 
11431       countreg2 = gen_reg_rtx (Pmode);
11432       countreg = copy_to_mode_reg (counter_mode, count_exp);
11433       zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11434       /* Get rid of MEM_OFFSET, it won't be accurate.  */
11435       dst = change_address (dst, BLKmode, destreg);
11436 
11437       if (count == 0 && align < desired_alignment)
11438 	{
11439 	  label = gen_label_rtx ();
11440 	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11441 				   LEU, 0, counter_mode, 1, label);
11442 	}
11443       if (align <= 1)
11444 	{
11445 	  rtx label = ix86_expand_aligntest (destreg, 1);
11446 	  emit_insn (gen_strset (destreg, dst,
11447 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11448 	  ix86_adjust_counter (countreg, 1);
11449 	  emit_label (label);
11450 	  LABEL_NUSES (label) = 1;
11451 	}
11452       if (align <= 2)
11453 	{
11454 	  rtx label = ix86_expand_aligntest (destreg, 2);
11455 	  emit_insn (gen_strset (destreg, dst,
11456 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11457 	  ix86_adjust_counter (countreg, 2);
11458 	  emit_label (label);
11459 	  LABEL_NUSES (label) = 1;
11460 	}
11461       if (align <= 4 && desired_alignment > 4)
11462 	{
11463 	  rtx label = ix86_expand_aligntest (destreg, 4);
11464 	  emit_insn (gen_strset (destreg, dst,
11465 				 (TARGET_64BIT
11466 				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11467 				  : zeroreg)));
11468 	  ix86_adjust_counter (countreg, 4);
11469 	  emit_label (label);
11470 	  LABEL_NUSES (label) = 1;
11471 	}
11472 
11473       if (label && desired_alignment > 4 && !TARGET_64BIT)
11474 	{
11475 	  emit_label (label);
11476 	  LABEL_NUSES (label) = 1;
11477 	  label = NULL_RTX;
11478 	}
11479 
11480       if (!TARGET_SINGLE_STRINGOP)
11481 	emit_insn (gen_cld ());
11482       if (TARGET_64BIT)
11483 	{
11484 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11485 				  GEN_INT (3)));
11486 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11487 	}
11488       else
11489 	{
11490 	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11491 	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11492 	}
11493       destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11494       emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11495 
11496       if (label)
11497 	{
11498 	  emit_label (label);
11499 	  LABEL_NUSES (label) = 1;
11500 	}
11501 
11502       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11503 	emit_insn (gen_strset (destreg, dst,
11504 			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
11505       if (TARGET_64BIT && (align <= 4 || count == 0))
11506 	{
11507 	  rtx label = ix86_expand_aligntest (countreg, 4);
11508 	  emit_insn (gen_strset (destreg, dst,
11509 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11510 	  emit_label (label);
11511 	  LABEL_NUSES (label) = 1;
11512 	}
11513       if (align > 2 && count != 0 && (count & 2))
11514 	emit_insn (gen_strset (destreg, dst,
11515 			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
11516       if (align <= 2 || count == 0)
11517 	{
11518 	  rtx label = ix86_expand_aligntest (countreg, 2);
11519 	  emit_insn (gen_strset (destreg, dst,
11520 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11521 	  emit_label (label);
11522 	  LABEL_NUSES (label) = 1;
11523 	}
11524       if (align > 1 && count != 0 && (count & 1))
11525 	emit_insn (gen_strset (destreg, dst,
11526 			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
11527       if (align <= 1 || count == 0)
11528 	{
11529 	  rtx label = ix86_expand_aligntest (countreg, 1);
11530 	  emit_insn (gen_strset (destreg, dst,
11531 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11532 	  emit_label (label);
11533 	  LABEL_NUSES (label) = 1;
11534 	}
11535     }
11536   return 1;
11537 }
11538 
11539 /* Expand strlen.  */
11540 int
ix86_expand_strlen(rtx out,rtx src,rtx eoschar,rtx align)11541 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11542 {
11543   rtx addr, scratch1, scratch2, scratch3, scratch4;
11544 
11545   /* The generic case of strlen expander is long.  Avoid it's
11546      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
11547 
11548   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11549       && !TARGET_INLINE_ALL_STRINGOPS
11550       && !optimize_size
11551       && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11552     return 0;
11553 
11554   addr = force_reg (Pmode, XEXP (src, 0));
11555   scratch1 = gen_reg_rtx (Pmode);
11556 
11557   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11558       && !optimize_size)
11559     {
11560       /* Well it seems that some optimizer does not combine a call like
11561          foo(strlen(bar), strlen(bar));
11562          when the move and the subtraction is done here.  It does calculate
11563          the length just once when these instructions are done inside of
11564          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
11565          often used and I use one fewer register for the lifetime of
11566          output_strlen_unroll() this is better.  */
11567 
11568       emit_move_insn (out, addr);
11569 
11570       ix86_expand_strlensi_unroll_1 (out, src, align);
11571 
11572       /* strlensi_unroll_1 returns the address of the zero at the end of
11573          the string, like memchr(), so compute the length by subtracting
11574          the start address.  */
11575       if (TARGET_64BIT)
11576 	emit_insn (gen_subdi3 (out, out, addr));
11577       else
11578 	emit_insn (gen_subsi3 (out, out, addr));
11579     }
11580   else
11581     {
11582       rtx unspec;
11583       scratch2 = gen_reg_rtx (Pmode);
11584       scratch3 = gen_reg_rtx (Pmode);
11585       scratch4 = force_reg (Pmode, constm1_rtx);
11586 
11587       emit_move_insn (scratch3, addr);
11588       eoschar = force_reg (QImode, eoschar);
11589 
11590       emit_insn (gen_cld ());
11591       src = replace_equiv_address_nv (src, scratch3);
11592 
11593       /* If .md starts supporting :P, this can be done in .md.  */
11594       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11595 						 scratch4), UNSPEC_SCAS);
11596       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11597       if (TARGET_64BIT)
11598 	{
11599 	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11600 	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11601 	}
11602       else
11603 	{
11604 	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11605 	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11606 	}
11607     }
11608   return 1;
11609 }
11610 
11611 /* Expand the appropriate insns for doing strlen if not just doing
11612    repnz; scasb
11613 
11614    out = result, initialized with the start address
11615    align_rtx = alignment of the address.
11616    scratch = scratch register, initialized with the startaddress when
11617 	not aligned, otherwise undefined
11618 
11619    This is just the body. It needs the initializations mentioned above and
11620    some address computing at the end.  These things are done in i386.md.  */
11621 
11622 static void
ix86_expand_strlensi_unroll_1(rtx out,rtx src,rtx align_rtx)11623 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11624 {
11625   int align;
11626   rtx tmp;
11627   rtx align_2_label = NULL_RTX;
11628   rtx align_3_label = NULL_RTX;
11629   rtx align_4_label = gen_label_rtx ();
11630   rtx end_0_label = gen_label_rtx ();
11631   rtx mem;
11632   rtx tmpreg = gen_reg_rtx (SImode);
11633   rtx scratch = gen_reg_rtx (SImode);
11634   rtx cmp;
11635 
11636   align = 0;
11637   if (GET_CODE (align_rtx) == CONST_INT)
11638     align = INTVAL (align_rtx);
11639 
11640   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
11641 
11642   /* Is there a known alignment and is it less than 4?  */
11643   if (align < 4)
11644     {
11645       rtx scratch1 = gen_reg_rtx (Pmode);
11646       emit_move_insn (scratch1, out);
11647       /* Is there a known alignment and is it not 2? */
11648       if (align != 2)
11649 	{
11650 	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11651 	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11652 
11653 	  /* Leave just the 3 lower bits.  */
11654 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11655 				    NULL_RTX, 0, OPTAB_WIDEN);
11656 
11657 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11658 				   Pmode, 1, align_4_label);
11659 	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11660 				   Pmode, 1, align_2_label);
11661 	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11662 				   Pmode, 1, align_3_label);
11663 	}
11664       else
11665         {
11666 	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
11667 	     check if is aligned to 4 - byte.  */
11668 
11669 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11670 				    NULL_RTX, 0, OPTAB_WIDEN);
11671 
11672 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11673 				   Pmode, 1, align_4_label);
11674         }
11675 
11676       mem = change_address (src, QImode, out);
11677 
11678       /* Now compare the bytes.  */
11679 
11680       /* Compare the first n unaligned byte on a byte per byte basis.  */
11681       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11682 			       QImode, 1, end_0_label);
11683 
11684       /* Increment the address.  */
11685       if (TARGET_64BIT)
11686 	emit_insn (gen_adddi3 (out, out, const1_rtx));
11687       else
11688 	emit_insn (gen_addsi3 (out, out, const1_rtx));
11689 
11690       /* Not needed with an alignment of 2 */
11691       if (align != 2)
11692 	{
11693 	  emit_label (align_2_label);
11694 
11695 	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11696 				   end_0_label);
11697 
11698 	  if (TARGET_64BIT)
11699 	    emit_insn (gen_adddi3 (out, out, const1_rtx));
11700 	  else
11701 	    emit_insn (gen_addsi3 (out, out, const1_rtx));
11702 
11703 	  emit_label (align_3_label);
11704 	}
11705 
11706       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11707 			       end_0_label);
11708 
11709       if (TARGET_64BIT)
11710 	emit_insn (gen_adddi3 (out, out, const1_rtx));
11711       else
11712 	emit_insn (gen_addsi3 (out, out, const1_rtx));
11713     }
11714 
11715   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
11716      align this loop.  It gives only huge programs, but does not help to
11717      speed up.  */
11718   emit_label (align_4_label);
11719 
11720   mem = change_address (src, SImode, out);
11721   emit_move_insn (scratch, mem);
11722   if (TARGET_64BIT)
11723     emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11724   else
11725     emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11726 
11727   /* This formula yields a nonzero result iff one of the bytes is zero.
11728      This saves three branches inside loop and many cycles.  */
11729 
11730   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11731   emit_insn (gen_one_cmplsi2 (scratch, scratch));
11732   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11733   emit_insn (gen_andsi3 (tmpreg, tmpreg,
11734 			 gen_int_mode (0x80808080, SImode)));
11735   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11736 			   align_4_label);
11737 
11738   if (TARGET_CMOVE)
11739     {
11740        rtx reg = gen_reg_rtx (SImode);
11741        rtx reg2 = gen_reg_rtx (Pmode);
11742        emit_move_insn (reg, tmpreg);
11743        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11744 
11745        /* If zero is not in the first two bytes, move two bytes forward.  */
11746        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11747        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11748        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11749        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11750 			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
11751 						     reg,
11752 						     tmpreg)));
11753        /* Emit lea manually to avoid clobbering of flags.  */
11754        emit_insn (gen_rtx_SET (SImode, reg2,
11755 			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11756 
11757        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11758        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11759        emit_insn (gen_rtx_SET (VOIDmode, out,
11760 			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11761 						     reg2,
11762 						     out)));
11763 
11764     }
11765   else
11766     {
11767        rtx end_2_label = gen_label_rtx ();
11768        /* Is zero in the first two bytes? */
11769 
11770        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11771        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11772        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11773        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11774                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11775                             pc_rtx);
11776        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11777        JUMP_LABEL (tmp) = end_2_label;
11778 
11779        /* Not in the first two.  Move two bytes forward.  */
11780        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11781        if (TARGET_64BIT)
11782 	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11783        else
11784 	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11785 
11786        emit_label (end_2_label);
11787 
11788     }
11789 
11790   /* Avoid branch in fixing the byte.  */
11791   tmpreg = gen_lowpart (QImode, tmpreg);
11792   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11793   cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11794   if (TARGET_64BIT)
11795     emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11796   else
11797     emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11798 
11799   emit_label (end_0_label);
11800 }
11801 
11802 void
ix86_expand_call(rtx retval,rtx fnaddr,rtx callarg1,rtx callarg2 ATTRIBUTE_UNUSED,rtx pop,int sibcall)11803 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11804 		  rtx callarg2 ATTRIBUTE_UNUSED,
11805 		  rtx pop, int sibcall)
11806 {
11807   rtx use = NULL, call;
11808 
11809   if (pop == const0_rtx)
11810     pop = NULL;
11811   if (TARGET_64BIT && pop)
11812     abort ();
11813 
11814 #if TARGET_MACHO
11815   if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11816     fnaddr = machopic_indirect_call_target (fnaddr);
11817 #else
11818   /* Static functions and indirect calls don't need the pic register.  */
11819   if (! TARGET_64BIT && flag_pic
11820       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11821       && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11822     use_reg (&use, pic_offset_table_rtx);
11823 
11824   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11825     {
11826       rtx al = gen_rtx_REG (QImode, 0);
11827       emit_move_insn (al, callarg2);
11828       use_reg (&use, al);
11829     }
11830 #endif /* TARGET_MACHO */
11831 
11832   if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11833     {
11834       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11835       fnaddr = gen_rtx_MEM (QImode, fnaddr);
11836     }
11837   if (sibcall && TARGET_64BIT
11838       && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11839     {
11840       rtx addr;
11841       addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11842       fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11843       emit_move_insn (fnaddr, addr);
11844       fnaddr = gen_rtx_MEM (QImode, fnaddr);
11845     }
11846 
11847   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11848   if (retval)
11849     call = gen_rtx_SET (VOIDmode, retval, call);
11850   if (pop)
11851     {
11852       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11853       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11854       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11855     }
11856 
11857   call = emit_call_insn (call);
11858   if (use)
11859     CALL_INSN_FUNCTION_USAGE (call) = use;
11860 }
11861 
11862 
11863 /* Clear stack slot assignments remembered from previous functions.
11864    This is called from INIT_EXPANDERS once before RTL is emitted for each
11865    function.  */
11866 
11867 static struct machine_function *
ix86_init_machine_status(void)11868 ix86_init_machine_status (void)
11869 {
11870   struct machine_function *f;
11871 
11872   f = ggc_alloc_cleared (sizeof (struct machine_function));
11873   f->use_fast_prologue_epilogue_nregs = -1;
11874 
11875   return f;
11876 }
11877 
11878 /* Return a MEM corresponding to a stack slot with mode MODE.
11879    Allocate a new slot if necessary.
11880 
11881    The RTL for a function can have several slots available: N is
11882    which slot to use.  */
11883 
11884 rtx
assign_386_stack_local(enum machine_mode mode,int n)11885 assign_386_stack_local (enum machine_mode mode, int n)
11886 {
11887   struct stack_local_entry *s;
11888 
11889   if (n < 0 || n >= MAX_386_STACK_LOCALS)
11890     abort ();
11891 
11892   for (s = ix86_stack_locals; s; s = s->next)
11893     if (s->mode == mode && s->n == n)
11894       return s->rtl;
11895 
11896   s = (struct stack_local_entry *)
11897     ggc_alloc (sizeof (struct stack_local_entry));
11898   s->n = n;
11899   s->mode = mode;
11900   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11901 
11902   s->next = ix86_stack_locals;
11903   ix86_stack_locals = s;
11904   return s->rtl;
11905 }
11906 
11907 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
11908 
11909 static GTY(()) rtx ix86_tls_symbol;
11910 rtx
ix86_tls_get_addr(void)11911 ix86_tls_get_addr (void)
11912 {
11913 
11914   if (!ix86_tls_symbol)
11915     {
11916       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11917 					    (TARGET_GNU_TLS && !TARGET_64BIT)
11918 					    ? "___tls_get_addr"
11919 					    : "__tls_get_addr");
11920     }
11921 
11922   return ix86_tls_symbol;
11923 }
11924 
11925 /* Calculate the length of the memory address in the instruction
11926    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
11927 
11928 static int
memory_address_length(rtx addr)11929 memory_address_length (rtx addr)
11930 {
11931   struct ix86_address parts;
11932   rtx base, index, disp;
11933   int len;
11934 
11935   if (GET_CODE (addr) == PRE_DEC
11936       || GET_CODE (addr) == POST_INC
11937       || GET_CODE (addr) == PRE_MODIFY
11938       || GET_CODE (addr) == POST_MODIFY)
11939     return 0;
11940 
11941   if (! ix86_decompose_address (addr, &parts))
11942     abort ();
11943 
11944   base = parts.base;
11945   index = parts.index;
11946   disp = parts.disp;
11947   len = 0;
11948 
11949   /* Rule of thumb:
11950        - esp as the base always wants an index,
11951        - ebp as the base always wants a displacement.  */
11952 
11953   /* Register Indirect.  */
11954   if (base && !index && !disp)
11955     {
11956       /* esp (for its index) and ebp (for its displacement) need
11957 	 the two-byte modrm form.  */
11958       if (addr == stack_pointer_rtx
11959 	  || addr == arg_pointer_rtx
11960 	  || addr == frame_pointer_rtx
11961 	  || addr == hard_frame_pointer_rtx)
11962 	len = 1;
11963     }
11964 
11965   /* Direct Addressing.  */
11966   else if (disp && !base && !index)
11967     len = 4;
11968 
11969   else
11970     {
11971       /* Find the length of the displacement constant.  */
11972       if (disp)
11973 	{
11974 	  if (GET_CODE (disp) == CONST_INT
11975 	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11976 	      && base)
11977 	    len = 1;
11978 	  else
11979 	    len = 4;
11980 	}
11981       /* ebp always wants a displacement.  */
11982       else if (base == hard_frame_pointer_rtx)
11983         len = 1;
11984 
11985       /* An index requires the two-byte modrm form....  */
11986       if (index
11987 	  /* ...like esp, which always wants an index.  */
11988 	  || base == stack_pointer_rtx
11989 	  || base == arg_pointer_rtx
11990 	  || base == frame_pointer_rtx)
11991 	len += 1;
11992     }
11993 
11994   return len;
11995 }
11996 
11997 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
11998    is set, expect that insn have 8bit immediate alternative.  */
11999 int
ix86_attr_length_immediate_default(rtx insn,int shortform)12000 ix86_attr_length_immediate_default (rtx insn, int shortform)
12001 {
12002   int len = 0;
12003   int i;
12004   extract_insn_cached (insn);
12005   for (i = recog_data.n_operands - 1; i >= 0; --i)
12006     if (CONSTANT_P (recog_data.operand[i]))
12007       {
12008 	if (len)
12009 	  abort ();
12010 	if (shortform
12011 	    && GET_CODE (recog_data.operand[i]) == CONST_INT
12012 	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12013 	  len = 1;
12014 	else
12015 	  {
12016 	    switch (get_attr_mode (insn))
12017 	      {
12018 		case MODE_QI:
12019 		  len+=1;
12020 		  break;
12021 		case MODE_HI:
12022 		  len+=2;
12023 		  break;
12024 		case MODE_SI:
12025 		  len+=4;
12026 		  break;
12027 		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
12028 		case MODE_DI:
12029 		  len+=4;
12030 		  break;
12031 		default:
12032 		  fatal_insn ("unknown insn mode", insn);
12033 	      }
12034 	  }
12035       }
12036   return len;
12037 }
12038 /* Compute default value for "length_address" attribute.  */
12039 int
ix86_attr_length_address_default(rtx insn)12040 ix86_attr_length_address_default (rtx insn)
12041 {
12042   int i;
12043 
12044   if (get_attr_type (insn) == TYPE_LEA)
12045     {
12046       rtx set = PATTERN (insn);
12047       if (GET_CODE (set) == SET)
12048 	;
12049       else if (GET_CODE (set) == PARALLEL
12050 	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12051 	set = XVECEXP (set, 0, 0);
12052       else
12053 	{
12054 #ifdef ENABLE_CHECKING
12055 	  abort ();
12056 #endif
12057 	  return 0;
12058 	}
12059 
12060       return memory_address_length (SET_SRC (set));
12061     }
12062 
12063   extract_insn_cached (insn);
12064   for (i = recog_data.n_operands - 1; i >= 0; --i)
12065     if (GET_CODE (recog_data.operand[i]) == MEM)
12066       {
12067 	return memory_address_length (XEXP (recog_data.operand[i], 0));
12068 	break;
12069       }
12070   return 0;
12071 }
12072 
12073 /* Return the maximum number of instructions a cpu can issue.  */
12074 
12075 static int
ix86_issue_rate(void)12076 ix86_issue_rate (void)
12077 {
12078   switch (ix86_tune)
12079     {
12080     case PROCESSOR_PENTIUM:
12081     case PROCESSOR_K6:
12082       return 2;
12083 
12084     case PROCESSOR_PENTIUMPRO:
12085     case PROCESSOR_PENTIUM4:
12086     case PROCESSOR_ATHLON:
12087     case PROCESSOR_K8:
12088       return 3;
12089 
12090     default:
12091       return 1;
12092     }
12093 }
12094 
12095 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12096    by DEP_INSN and nothing set by DEP_INSN.  */
12097 
12098 static int
ix86_flags_dependant(rtx insn,rtx dep_insn,enum attr_type insn_type)12099 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12100 {
12101   rtx set, set2;
12102 
12103   /* Simplify the test for uninteresting insns.  */
12104   if (insn_type != TYPE_SETCC
12105       && insn_type != TYPE_ICMOV
12106       && insn_type != TYPE_FCMOV
12107       && insn_type != TYPE_IBR)
12108     return 0;
12109 
12110   if ((set = single_set (dep_insn)) != 0)
12111     {
12112       set = SET_DEST (set);
12113       set2 = NULL_RTX;
12114     }
12115   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12116 	   && XVECLEN (PATTERN (dep_insn), 0) == 2
12117 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12118 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12119     {
12120       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12121       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12122     }
12123   else
12124     return 0;
12125 
12126   if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12127     return 0;
12128 
12129   /* This test is true if the dependent insn reads the flags but
12130      not any other potentially set register.  */
12131   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12132     return 0;
12133 
12134   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12135     return 0;
12136 
12137   return 1;
12138 }
12139 
12140 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12141    address with operands set by DEP_INSN.  */
12142 
12143 static int
ix86_agi_dependant(rtx insn,rtx dep_insn,enum attr_type insn_type)12144 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12145 {
12146   rtx addr;
12147 
12148   if (insn_type == TYPE_LEA
12149       && TARGET_PENTIUM)
12150     {
12151       addr = PATTERN (insn);
12152       if (GET_CODE (addr) == SET)
12153 	;
12154       else if (GET_CODE (addr) == PARALLEL
12155 	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12156 	addr = XVECEXP (addr, 0, 0);
12157       else
12158 	abort ();
12159       addr = SET_SRC (addr);
12160     }
12161   else
12162     {
12163       int i;
12164       extract_insn_cached (insn);
12165       for (i = recog_data.n_operands - 1; i >= 0; --i)
12166 	if (GET_CODE (recog_data.operand[i]) == MEM)
12167 	  {
12168 	    addr = XEXP (recog_data.operand[i], 0);
12169 	    goto found;
12170 	  }
12171       return 0;
12172     found:;
12173     }
12174 
12175   return modified_in_p (addr, dep_insn);
12176 }
12177 
12178 static int
ix86_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)12179 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12180 {
12181   enum attr_type insn_type, dep_insn_type;
12182   enum attr_memory memory, dep_memory;
12183   rtx set, set2;
12184   int dep_insn_code_number;
12185 
12186   /* Anti and output dependencies have zero cost on all CPUs.  */
12187   if (REG_NOTE_KIND (link) != 0)
12188     return 0;
12189 
12190   dep_insn_code_number = recog_memoized (dep_insn);
12191 
12192   /* If we can't recognize the insns, we can't really do anything.  */
12193   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12194     return cost;
12195 
12196   insn_type = get_attr_type (insn);
12197   dep_insn_type = get_attr_type (dep_insn);
12198 
12199   switch (ix86_tune)
12200     {
12201     case PROCESSOR_PENTIUM:
12202       /* Address Generation Interlock adds a cycle of latency.  */
12203       if (ix86_agi_dependant (insn, dep_insn, insn_type))
12204 	cost += 1;
12205 
12206       /* ??? Compares pair with jump/setcc.  */
12207       if (ix86_flags_dependant (insn, dep_insn, insn_type))
12208 	cost = 0;
12209 
12210       /* Floating point stores require value to be ready one cycle earlier.  */
12211       if (insn_type == TYPE_FMOV
12212 	  && get_attr_memory (insn) == MEMORY_STORE
12213 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12214 	cost += 1;
12215       break;
12216 
12217     case PROCESSOR_PENTIUMPRO:
12218       memory = get_attr_memory (insn);
12219       dep_memory = get_attr_memory (dep_insn);
12220 
12221       /* Since we can't represent delayed latencies of load+operation,
12222 	 increase the cost here for non-imov insns.  */
12223       if (dep_insn_type != TYPE_IMOV
12224           && dep_insn_type != TYPE_FMOV
12225           && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12226 	cost += 1;
12227 
12228       /* INT->FP conversion is expensive.  */
12229       if (get_attr_fp_int_src (dep_insn))
12230 	cost += 5;
12231 
12232       /* There is one cycle extra latency between an FP op and a store.  */
12233       if (insn_type == TYPE_FMOV
12234 	  && (set = single_set (dep_insn)) != NULL_RTX
12235 	  && (set2 = single_set (insn)) != NULL_RTX
12236 	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12237 	  && GET_CODE (SET_DEST (set2)) == MEM)
12238 	cost += 1;
12239 
12240       /* Show ability of reorder buffer to hide latency of load by executing
12241 	 in parallel with previous instruction in case
12242 	 previous instruction is not needed to compute the address.  */
12243       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12244 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12245 	{
12246 	  /* Claim moves to take one cycle, as core can issue one load
12247 	     at time and the next load can start cycle later.  */
12248 	  if (dep_insn_type == TYPE_IMOV
12249 	      || dep_insn_type == TYPE_FMOV)
12250 	    cost = 1;
12251 	  else if (cost > 1)
12252 	    cost--;
12253 	}
12254       break;
12255 
12256     case PROCESSOR_K6:
12257       memory = get_attr_memory (insn);
12258       dep_memory = get_attr_memory (dep_insn);
12259       /* The esp dependency is resolved before the instruction is really
12260          finished.  */
12261       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12262 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12263 	return 1;
12264 
12265       /* Since we can't represent delayed latencies of load+operation,
12266 	 increase the cost here for non-imov insns.  */
12267       if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12268 	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12269 
12270       /* INT->FP conversion is expensive.  */
12271       if (get_attr_fp_int_src (dep_insn))
12272 	cost += 5;
12273 
12274       /* Show ability of reorder buffer to hide latency of load by executing
12275 	 in parallel with previous instruction in case
12276 	 previous instruction is not needed to compute the address.  */
12277       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12278 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12279 	{
12280 	  /* Claim moves to take one cycle, as core can issue one load
12281 	     at time and the next load can start cycle later.  */
12282 	  if (dep_insn_type == TYPE_IMOV
12283 	      || dep_insn_type == TYPE_FMOV)
12284 	    cost = 1;
12285 	  else if (cost > 2)
12286 	    cost -= 2;
12287 	  else
12288 	    cost = 1;
12289 	}
12290       break;
12291 
12292     case PROCESSOR_ATHLON:
12293     case PROCESSOR_K8:
12294       memory = get_attr_memory (insn);
12295       dep_memory = get_attr_memory (dep_insn);
12296 
12297       /* Show ability of reorder buffer to hide latency of load by executing
12298 	 in parallel with previous instruction in case
12299 	 previous instruction is not needed to compute the address.  */
12300       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12301 	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12302 	{
12303 	  enum attr_unit unit = get_attr_unit (insn);
12304 	  int loadcost = 3;
12305 
12306 	  /* Because of the difference between the length of integer and
12307 	     floating unit pipeline preparation stages, the memory operands
12308 	     for floating point are cheaper.
12309 
12310 	     ??? For Athlon it the difference is most probably 2.  */
12311 	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12312 	    loadcost = 3;
12313 	  else
12314 	    loadcost = TARGET_ATHLON ? 2 : 0;
12315 
12316 	  if (cost >= loadcost)
12317 	    cost -= loadcost;
12318 	  else
12319 	    cost = 0;
12320 	}
12321 
12322     default:
12323       break;
12324     }
12325 
12326   return cost;
12327 }
12328 
12329 static union
12330 {
12331   struct ppro_sched_data
12332   {
12333     rtx decode[3];
12334     int issued_this_cycle;
12335   } ppro;
12336 } ix86_sched_data;
12337 
12338 static enum attr_ppro_uops
ix86_safe_ppro_uops(rtx insn)12339 ix86_safe_ppro_uops (rtx insn)
12340 {
12341   if (recog_memoized (insn) >= 0)
12342     return get_attr_ppro_uops (insn);
12343   else
12344     return PPRO_UOPS_MANY;
12345 }
12346 
12347 static void
ix86_dump_ppro_packet(FILE * dump)12348 ix86_dump_ppro_packet (FILE *dump)
12349 {
12350   if (ix86_sched_data.ppro.decode[0])
12351     {
12352       fprintf (dump, "PPRO packet: %d",
12353 	       INSN_UID (ix86_sched_data.ppro.decode[0]));
12354       if (ix86_sched_data.ppro.decode[1])
12355 	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12356       if (ix86_sched_data.ppro.decode[2])
12357 	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12358       fputc ('\n', dump);
12359     }
12360 }
12361 
12362 /* We're beginning a new block.  Initialize data structures as necessary.  */
12363 
12364 static void
ix86_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int veclen ATTRIBUTE_UNUSED)12365 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12366 		 int sched_verbose ATTRIBUTE_UNUSED,
12367 		 int veclen ATTRIBUTE_UNUSED)
12368 {
12369   memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12370 }
12371 
12372 /* Shift INSN to SLOT, and shift everything else down.  */
12373 
12374 static void
ix86_reorder_insn(rtx * insnp,rtx * slot)12375 ix86_reorder_insn (rtx *insnp, rtx *slot)
12376 {
12377   if (insnp != slot)
12378     {
12379       rtx insn = *insnp;
12380       do
12381 	insnp[0] = insnp[1];
12382       while (++insnp != slot);
12383       *insnp = insn;
12384     }
12385 }
12386 
12387 static void
ix86_sched_reorder_ppro(rtx * ready,rtx * e_ready)12388 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12389 {
12390   rtx decode[3];
12391   enum attr_ppro_uops cur_uops;
12392   int issued_this_cycle;
12393   rtx *insnp;
12394   int i;
12395 
12396   /* At this point .ppro.decode contains the state of the three
12397      decoders from last "cycle".  That is, those insns that were
12398      actually independent.  But here we're scheduling for the
12399      decoder, and we may find things that are decodable in the
12400      same cycle.  */
12401 
12402   memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12403   issued_this_cycle = 0;
12404 
12405   insnp = e_ready;
12406   cur_uops = ix86_safe_ppro_uops (*insnp);
12407 
12408   /* If the decoders are empty, and we've a complex insn at the
12409      head of the priority queue, let it issue without complaint.  */
12410   if (decode[0] == NULL)
12411     {
12412       if (cur_uops == PPRO_UOPS_MANY)
12413 	{
12414 	  decode[0] = *insnp;
12415 	  goto ppro_done;
12416 	}
12417 
12418       /* Otherwise, search for a 2-4 uop unsn to issue.  */
12419       while (cur_uops != PPRO_UOPS_FEW)
12420 	{
12421 	  if (insnp == ready)
12422 	    break;
12423 	  cur_uops = ix86_safe_ppro_uops (*--insnp);
12424 	}
12425 
12426       /* If so, move it to the head of the line.  */
12427       if (cur_uops == PPRO_UOPS_FEW)
12428 	ix86_reorder_insn (insnp, e_ready);
12429 
12430       /* Issue the head of the queue.  */
12431       issued_this_cycle = 1;
12432       decode[0] = *e_ready--;
12433     }
12434 
12435   /* Look for simple insns to fill in the other two slots.  */
12436   for (i = 1; i < 3; ++i)
12437     if (decode[i] == NULL)
12438       {
12439 	if (ready > e_ready)
12440 	  goto ppro_done;
12441 
12442 	insnp = e_ready;
12443 	cur_uops = ix86_safe_ppro_uops (*insnp);
12444 	while (cur_uops != PPRO_UOPS_ONE)
12445 	  {
12446 	    if (insnp == ready)
12447 	      break;
12448 	    cur_uops = ix86_safe_ppro_uops (*--insnp);
12449 	  }
12450 
12451 	/* Found one.  Move it to the head of the queue and issue it.  */
12452 	if (cur_uops == PPRO_UOPS_ONE)
12453 	  {
12454 	    ix86_reorder_insn (insnp, e_ready);
12455 	    decode[i] = *e_ready--;
12456 	    issued_this_cycle++;
12457 	    continue;
12458 	  }
12459 
12460 	/* ??? Didn't find one.  Ideally, here we would do a lazy split
12461 	   of 2-uop insns, issue one and queue the other.  */
12462       }
12463 
12464  ppro_done:
12465   if (issued_this_cycle == 0)
12466     issued_this_cycle = 1;
12467   ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12468 }
12469 
12470 /* We are about to being issuing insns for this clock cycle.
12471    Override the default sort algorithm to better slot instructions.  */
12472 static int
ix86_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready,int * n_readyp,int clock_var ATTRIBUTE_UNUSED)12473 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12474 		    int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12475 		    int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12476 {
12477   int n_ready = *n_readyp;
12478   rtx *e_ready = ready + n_ready - 1;
12479 
12480   /* Make sure to go ahead and initialize key items in
12481      ix86_sched_data if we are not going to bother trying to
12482      reorder the ready queue.  */
12483   if (n_ready < 2)
12484     {
12485       ix86_sched_data.ppro.issued_this_cycle = 1;
12486       goto out;
12487     }
12488 
12489   switch (ix86_tune)
12490     {
12491     default:
12492       break;
12493 
12494     case PROCESSOR_PENTIUMPRO:
12495       ix86_sched_reorder_ppro (ready, e_ready);
12496       break;
12497     }
12498 
12499 out:
12500   return ix86_issue_rate ();
12501 }
12502 
12503 /* We are about to issue INSN.  Return the number of insns left on the
12504    ready queue that can be issued this cycle.  */
12505 
12506 static int
ix86_variable_issue(FILE * dump,int sched_verbose,rtx insn,int can_issue_more)12507 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12508 		     int can_issue_more)
12509 {
12510   int i;
12511   switch (ix86_tune)
12512     {
12513     default:
12514       return can_issue_more - 1;
12515 
12516     case PROCESSOR_PENTIUMPRO:
12517       {
12518 	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12519 
12520 	if (uops == PPRO_UOPS_MANY)
12521 	  {
12522 	    if (sched_verbose)
12523 	      ix86_dump_ppro_packet (dump);
12524 	    ix86_sched_data.ppro.decode[0] = insn;
12525 	    ix86_sched_data.ppro.decode[1] = NULL;
12526 	    ix86_sched_data.ppro.decode[2] = NULL;
12527 	    if (sched_verbose)
12528 	      ix86_dump_ppro_packet (dump);
12529 	    ix86_sched_data.ppro.decode[0] = NULL;
12530 	  }
12531 	else if (uops == PPRO_UOPS_FEW)
12532 	  {
12533 	    if (sched_verbose)
12534 	      ix86_dump_ppro_packet (dump);
12535 	    ix86_sched_data.ppro.decode[0] = insn;
12536 	    ix86_sched_data.ppro.decode[1] = NULL;
12537 	    ix86_sched_data.ppro.decode[2] = NULL;
12538 	  }
12539 	else
12540 	  {
12541 	    for (i = 0; i < 3; ++i)
12542 	      if (ix86_sched_data.ppro.decode[i] == NULL)
12543 		{
12544 		  ix86_sched_data.ppro.decode[i] = insn;
12545 		  break;
12546 		}
12547 	    if (i == 3)
12548 	      abort ();
12549 	    if (i == 2)
12550 	      {
12551 	        if (sched_verbose)
12552 	          ix86_dump_ppro_packet (dump);
12553 		ix86_sched_data.ppro.decode[0] = NULL;
12554 		ix86_sched_data.ppro.decode[1] = NULL;
12555 		ix86_sched_data.ppro.decode[2] = NULL;
12556 	      }
12557 	  }
12558       }
12559       return --ix86_sched_data.ppro.issued_this_cycle;
12560     }
12561 }
12562 
12563 static int
ia32_use_dfa_pipeline_interface(void)12564 ia32_use_dfa_pipeline_interface (void)
12565 {
12566   if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12567     return 1;
12568   return 0;
12569 }
12570 
12571 /* How many alternative schedules to try.  This should be as wide as the
12572    scheduling freedom in the DFA, but no wider.  Making this value too
12573    large results extra work for the scheduler.  */
12574 
12575 static int
ia32_multipass_dfa_lookahead(void)12576 ia32_multipass_dfa_lookahead (void)
12577 {
12578   if (ix86_tune == PROCESSOR_PENTIUM)
12579     return 2;
12580   else
12581    return 0;
12582 }
12583 
12584 
12585 /* Compute the alignment given to a constant that is being placed in memory.
12586    EXP is the constant and ALIGN is the alignment that the object would
12587    ordinarily have.
12588    The value of this function is used instead of that alignment to align
12589    the object.  */
12590 
12591 int
ix86_constant_alignment(tree exp,int align)12592 ix86_constant_alignment (tree exp, int align)
12593 {
12594   if (TREE_CODE (exp) == REAL_CST)
12595     {
12596       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12597 	return 64;
12598       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12599 	return 128;
12600     }
12601   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12602 	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12603     return BITS_PER_WORD;
12604 
12605   return align;
12606 }
12607 
12608 /* Compute the alignment for a static variable.
12609    TYPE is the data type, and ALIGN is the alignment that
12610    the object would ordinarily have.  The value of this function is used
12611    instead of that alignment to align the object.  */
12612 
12613 int
ix86_data_alignment(tree type,int align)12614 ix86_data_alignment (tree type, int align)
12615 {
12616   if (AGGREGATE_TYPE_P (type)
12617        && TYPE_SIZE (type)
12618        && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12619        && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12620 	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12621     return 256;
12622 
12623   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12624      to 16byte boundary.  */
12625   if (TARGET_64BIT)
12626     {
12627       if (AGGREGATE_TYPE_P (type)
12628 	   && TYPE_SIZE (type)
12629 	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12630 	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12631 	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12632 	return 128;
12633     }
12634 
12635   if (TREE_CODE (type) == ARRAY_TYPE)
12636     {
12637       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12638 	return 64;
12639       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12640 	return 128;
12641     }
12642   else if (TREE_CODE (type) == COMPLEX_TYPE)
12643     {
12644 
12645       if (TYPE_MODE (type) == DCmode && align < 64)
12646 	return 64;
12647       if (TYPE_MODE (type) == XCmode && align < 128)
12648 	return 128;
12649     }
12650   else if ((TREE_CODE (type) == RECORD_TYPE
12651 	    || TREE_CODE (type) == UNION_TYPE
12652 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12653 	   && TYPE_FIELDS (type))
12654     {
12655       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12656 	return 64;
12657       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12658 	return 128;
12659     }
12660   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12661 	   || TREE_CODE (type) == INTEGER_TYPE)
12662     {
12663       if (TYPE_MODE (type) == DFmode && align < 64)
12664 	return 64;
12665       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12666 	return 128;
12667     }
12668 
12669   return align;
12670 }
12671 
12672 /* Compute the alignment for a local variable.
12673    TYPE is the data type, and ALIGN is the alignment that
12674    the object would ordinarily have.  The value of this macro is used
12675    instead of that alignment to align the object.  */
12676 
12677 int
ix86_local_alignment(tree type,int align)12678 ix86_local_alignment (tree type, int align)
12679 {
12680   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12681      to 16byte boundary.  */
12682   if (TARGET_64BIT)
12683     {
12684       if (AGGREGATE_TYPE_P (type)
12685 	   && TYPE_SIZE (type)
12686 	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12687 	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12688 	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12689 	return 128;
12690     }
12691   if (TREE_CODE (type) == ARRAY_TYPE)
12692     {
12693       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12694 	return 64;
12695       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12696 	return 128;
12697     }
12698   else if (TREE_CODE (type) == COMPLEX_TYPE)
12699     {
12700       if (TYPE_MODE (type) == DCmode && align < 64)
12701 	return 64;
12702       if (TYPE_MODE (type) == XCmode && align < 128)
12703 	return 128;
12704     }
12705   else if ((TREE_CODE (type) == RECORD_TYPE
12706 	    || TREE_CODE (type) == UNION_TYPE
12707 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12708 	   && TYPE_FIELDS (type))
12709     {
12710       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12711 	return 64;
12712       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12713 	return 128;
12714     }
12715   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12716 	   || TREE_CODE (type) == INTEGER_TYPE)
12717     {
12718 
12719       if (TYPE_MODE (type) == DFmode && align < 64)
12720 	return 64;
12721       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12722 	return 128;
12723     }
12724   return align;
12725 }
12726 
12727 /* Emit RTL insns to initialize the variable parts of a trampoline.
12728    FNADDR is an RTX for the address of the function's pure code.
12729    CXT is an RTX for the static chain value for the function.  */
12730 void
x86_initialize_trampoline(rtx tramp,rtx fnaddr,rtx cxt)12731 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12732 {
12733   if (!TARGET_64BIT)
12734     {
12735       /* Compute offset from the end of the jmp to the target function.  */
12736       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12737 			       plus_constant (tramp, 10),
12738 			       NULL_RTX, 1, OPTAB_DIRECT);
12739       emit_move_insn (gen_rtx_MEM (QImode, tramp),
12740 		      gen_int_mode (0xb9, QImode));
12741       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12742       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12743 		      gen_int_mode (0xe9, QImode));
12744       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12745     }
12746   else
12747     {
12748       int offset = 0;
12749       /* Try to load address using shorter movl instead of movabs.
12750          We may want to support movq for kernel mode, but kernel does not use
12751          trampolines at the moment.  */
12752       if (x86_64_zero_extended_value (fnaddr))
12753 	{
12754 	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
12755 	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12756 			  gen_int_mode (0xbb41, HImode));
12757 	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12758 			  gen_lowpart (SImode, fnaddr));
12759 	  offset += 6;
12760 	}
12761       else
12762 	{
12763 	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12764 			  gen_int_mode (0xbb49, HImode));
12765 	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12766 			  fnaddr);
12767 	  offset += 10;
12768 	}
12769       /* Load static chain using movabs to r10.  */
12770       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12771 		      gen_int_mode (0xba49, HImode));
12772       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12773 		      cxt);
12774       offset += 10;
12775       /* Jump to the r11 */
12776       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12777 		      gen_int_mode (0xff49, HImode));
12778       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12779 		      gen_int_mode (0xe3, QImode));
12780       offset += 3;
12781       if (offset > TRAMPOLINE_SIZE)
12782 	abort ();
12783     }
12784 
12785 #ifdef ENABLE_EXECUTE_STACK
12786   emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12787 		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12788 #endif
12789 }
12790 
12791 #define def_builtin(MASK, NAME, TYPE, CODE)			\
12792 do {								\
12793   if ((MASK) & target_flags					\
12794       && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
12795     builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
12796 		      NULL, NULL_TREE);				\
12797 } while (0)
12798 
12799 struct builtin_description
12800 {
12801   const unsigned int mask;
12802   const enum insn_code icode;
12803   const char *const name;
12804   const enum ix86_builtins code;
12805   const enum rtx_code comparison;
12806   const unsigned int flag;
12807 };
12808 
12809 static const struct builtin_description bdesc_comi[] =
12810 {
12811   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12812   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12813   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12814   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12815   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12816   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12817   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12818   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12819   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12820   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12821   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12822   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12823   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12824   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12825   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12826   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12827   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12828   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12829   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12830   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12831   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12832   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12833   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12834   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12835 };
12836 
12837 static const struct builtin_description bdesc_2arg[] =
12838 {
12839   /* SSE */
12840   { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12841   { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12842   { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12843   { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12844   { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12845   { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12846   { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12847   { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12848 
12849   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12850   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12851   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12852   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12853   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12854   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12855   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12856   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12857   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12858   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12859   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12860   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12861   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12862   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12863   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12864   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12865   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12866   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12867   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12868   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12869 
12870   { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12871   { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12872   { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12873   { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12874 
12875   { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12876   { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12877   { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12878   { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12879 
12880   { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12881   { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12882   { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12883   { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12884   { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12885 
12886   /* MMX */
12887   { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12888   { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12889   { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12890   { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12891   { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12892   { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12893   { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12894   { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12895 
12896   { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12897   { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12898   { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12899   { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12900   { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12901   { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12902   { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12903   { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12904 
12905   { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12906   { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12907   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12908 
12909   { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12910   { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12911   { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12912   { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12913 
12914   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12915   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12916 
12917   { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12918   { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12919   { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12920   { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12921   { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12922   { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12923 
12924   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12925   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12926   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12927   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12928 
12929   { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12930   { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12931   { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12932   { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12933   { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12934   { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12935 
12936   /* Special.  */
12937   { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12938   { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12939   { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12940 
12941   { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12942   { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12943   { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12944 
12945   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12946   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12947   { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12948   { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12949   { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12950   { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12951 
12952   { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12953   { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12954   { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12955   { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12956   { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12957   { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12958 
12959   { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12960   { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12961   { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12962   { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12963 
12964   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12965   { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12966 
12967   /* SSE2 */
12968   { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12969   { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12970   { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12971   { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12972   { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12973   { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12974   { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12975   { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12976 
12977   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12978   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12979   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12980   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12981   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12982   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12983   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12984   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12985   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12986   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12987   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12988   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12989   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12990   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12991   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12992   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12993   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12994   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12995   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12996   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12997 
12998   { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12999   { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13000   { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13001   { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13002 
13003   { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13004   { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13005   { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13006   { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13007 
13008   { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13009   { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13010   { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13011 
13012   /* SSE2 MMX */
13013   { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13014   { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13015   { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13016   { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13017   { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13018   { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13019   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13020   { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13021 
13022   { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13023   { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13024   { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13025   { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13026   { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13027   { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13028   { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13029   { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13030 
13031   { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13032   { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13033   { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13034   { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13035 
13036   { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13037   { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13038   { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13039   { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13040 
13041   { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13042   { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13043 
13044   { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13045   { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13046   { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13047   { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13048   { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13049   { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13050 
13051   { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13052   { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13053   { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13054   { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13055 
13056   { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13057   { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13058   { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13059   { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13060   { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13061   { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13062   { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13063   { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13064 
13065   { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13066   { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13067   { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13068 
13069   { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13070   { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13071 
13072   { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13073   { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13074   { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13075   { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13076   { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13077   { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13078 
13079   { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13080   { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13081   { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13082   { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13083   { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13084   { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13085 
13086   { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13087   { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13088   { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13089   { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13090 
13091   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13092 
13093   { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13094   { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13095   { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13096   { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13097 
13098   /* SSE3 MMX */
13099   { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13100   { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13101   { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13102   { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13103   { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13104   { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13105 };
13106 
13107 static const struct builtin_description bdesc_1arg[] =
13108 {
13109   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13110   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13111 
13112   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13113   { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13114   { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13115 
13116   { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13117   { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13118   { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13119   { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13120   { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13121   { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13122 
13123   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13124   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13125   { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13126   { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13127 
13128   { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13129 
13130   { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13131   { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13132 
13133   { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13134   { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13135   { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13136   { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13137   { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13138 
13139   { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13140 
13141   { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13142   { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13143   { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13144   { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13145 
13146   { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13147   { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13148   { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13149 
13150   { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13151 
13152   /* SSE3 */
13153   { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13154   { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13155   { MASK_SSE3, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13156 };
13157 
13158 void
ix86_init_builtins(void)13159 ix86_init_builtins (void)
13160 {
13161   if (TARGET_MMX)
13162     ix86_init_mmx_sse_builtins ();
13163 }
13164 
13165 /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
13166    is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
13167    builtins.  */
13168 static void
ix86_init_mmx_sse_builtins(void)13169 ix86_init_mmx_sse_builtins (void)
13170 {
13171   const struct builtin_description * d;
13172   size_t i;
13173 
13174   tree pchar_type_node = build_pointer_type (char_type_node);
13175   tree pcchar_type_node = build_pointer_type (
13176 			     build_type_variant (char_type_node, 1, 0));
13177   tree pfloat_type_node = build_pointer_type (float_type_node);
13178   tree pcfloat_type_node = build_pointer_type (
13179 			     build_type_variant (float_type_node, 1, 0));
13180   tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13181   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13182   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13183 
13184   /* Comparisons.  */
13185   tree int_ftype_v4sf_v4sf
13186     = build_function_type_list (integer_type_node,
13187 				V4SF_type_node, V4SF_type_node, NULL_TREE);
13188   tree v4si_ftype_v4sf_v4sf
13189     = build_function_type_list (V4SI_type_node,
13190 				V4SF_type_node, V4SF_type_node, NULL_TREE);
13191   /* MMX/SSE/integer conversions.  */
13192   tree int_ftype_v4sf
13193     = build_function_type_list (integer_type_node,
13194 				V4SF_type_node, NULL_TREE);
13195   tree int64_ftype_v4sf
13196     = build_function_type_list (long_long_integer_type_node,
13197 				V4SF_type_node, NULL_TREE);
13198   tree int_ftype_v8qi
13199     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13200   tree v4sf_ftype_v4sf_int
13201     = build_function_type_list (V4SF_type_node,
13202 				V4SF_type_node, integer_type_node, NULL_TREE);
13203   tree v4sf_ftype_v4sf_int64
13204     = build_function_type_list (V4SF_type_node,
13205 				V4SF_type_node, long_long_integer_type_node,
13206 				NULL_TREE);
13207   tree v4sf_ftype_v4sf_v2si
13208     = build_function_type_list (V4SF_type_node,
13209 				V4SF_type_node, V2SI_type_node, NULL_TREE);
13210   tree int_ftype_v4hi_int
13211     = build_function_type_list (integer_type_node,
13212 				V4HI_type_node, integer_type_node, NULL_TREE);
13213   tree v4hi_ftype_v4hi_int_int
13214     = build_function_type_list (V4HI_type_node, V4HI_type_node,
13215 				integer_type_node, integer_type_node,
13216 				NULL_TREE);
13217   /* Miscellaneous.  */
13218   tree v8qi_ftype_v4hi_v4hi
13219     = build_function_type_list (V8QI_type_node,
13220 				V4HI_type_node, V4HI_type_node, NULL_TREE);
13221   tree v4hi_ftype_v2si_v2si
13222     = build_function_type_list (V4HI_type_node,
13223 				V2SI_type_node, V2SI_type_node, NULL_TREE);
13224   tree v4sf_ftype_v4sf_v4sf_int
13225     = build_function_type_list (V4SF_type_node,
13226 				V4SF_type_node, V4SF_type_node,
13227 				integer_type_node, NULL_TREE);
13228   tree v2si_ftype_v4hi_v4hi
13229     = build_function_type_list (V2SI_type_node,
13230 				V4HI_type_node, V4HI_type_node, NULL_TREE);
13231   tree v4hi_ftype_v4hi_int
13232     = build_function_type_list (V4HI_type_node,
13233 				V4HI_type_node, integer_type_node, NULL_TREE);
13234   tree v4hi_ftype_v4hi_di
13235     = build_function_type_list (V4HI_type_node,
13236 				V4HI_type_node, long_long_unsigned_type_node,
13237 				NULL_TREE);
13238   tree v2si_ftype_v2si_di
13239     = build_function_type_list (V2SI_type_node,
13240 				V2SI_type_node, long_long_unsigned_type_node,
13241 				NULL_TREE);
13242   tree void_ftype_void
13243     = build_function_type (void_type_node, void_list_node);
13244   tree void_ftype_unsigned
13245     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13246   tree void_ftype_unsigned_unsigned
13247     = build_function_type_list (void_type_node, unsigned_type_node,
13248 				unsigned_type_node, NULL_TREE);
13249   tree void_ftype_pcvoid_unsigned_unsigned
13250     = build_function_type_list (void_type_node, const_ptr_type_node,
13251 				unsigned_type_node, unsigned_type_node,
13252 				NULL_TREE);
13253   tree unsigned_ftype_void
13254     = build_function_type (unsigned_type_node, void_list_node);
13255   tree di_ftype_void
13256     = build_function_type (long_long_unsigned_type_node, void_list_node);
13257   tree v4sf_ftype_void
13258     = build_function_type (V4SF_type_node, void_list_node);
13259   tree v2si_ftype_v4sf
13260     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13261   /* Loads/stores.  */
13262   tree void_ftype_v8qi_v8qi_pchar
13263     = build_function_type_list (void_type_node,
13264 				V8QI_type_node, V8QI_type_node,
13265 				pchar_type_node, NULL_TREE);
13266   tree v4sf_ftype_pcfloat
13267     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13268   /* @@@ the type is bogus */
13269   tree v4sf_ftype_v4sf_pv2si
13270     = build_function_type_list (V4SF_type_node,
13271 				V4SF_type_node, pv2si_type_node, NULL_TREE);
13272   tree void_ftype_pv2si_v4sf
13273     = build_function_type_list (void_type_node,
13274 				pv2si_type_node, V4SF_type_node, NULL_TREE);
13275   tree void_ftype_pfloat_v4sf
13276     = build_function_type_list (void_type_node,
13277 				pfloat_type_node, V4SF_type_node, NULL_TREE);
13278   tree void_ftype_pdi_di
13279     = build_function_type_list (void_type_node,
13280 				pdi_type_node, long_long_unsigned_type_node,
13281 				NULL_TREE);
13282   tree void_ftype_pv2di_v2di
13283     = build_function_type_list (void_type_node,
13284 				pv2di_type_node, V2DI_type_node, NULL_TREE);
13285   /* Normal vector unops.  */
13286   tree v4sf_ftype_v4sf
13287     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13288 
13289   /* Normal vector binops.  */
13290   tree v4sf_ftype_v4sf_v4sf
13291     = build_function_type_list (V4SF_type_node,
13292 				V4SF_type_node, V4SF_type_node, NULL_TREE);
13293   tree v8qi_ftype_v8qi_v8qi
13294     = build_function_type_list (V8QI_type_node,
13295 				V8QI_type_node, V8QI_type_node, NULL_TREE);
13296   tree v4hi_ftype_v4hi_v4hi
13297     = build_function_type_list (V4HI_type_node,
13298 				V4HI_type_node, V4HI_type_node, NULL_TREE);
13299   tree v2si_ftype_v2si_v2si
13300     = build_function_type_list (V2SI_type_node,
13301 				V2SI_type_node, V2SI_type_node, NULL_TREE);
13302   tree di_ftype_di_di
13303     = build_function_type_list (long_long_unsigned_type_node,
13304 				long_long_unsigned_type_node,
13305 				long_long_unsigned_type_node, NULL_TREE);
13306 
13307   tree v2si_ftype_v2sf
13308     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13309   tree v2sf_ftype_v2si
13310     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13311   tree v2si_ftype_v2si
13312     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13313   tree v2sf_ftype_v2sf
13314     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13315   tree v2sf_ftype_v2sf_v2sf
13316     = build_function_type_list (V2SF_type_node,
13317 				V2SF_type_node, V2SF_type_node, NULL_TREE);
13318   tree v2si_ftype_v2sf_v2sf
13319     = build_function_type_list (V2SI_type_node,
13320 				V2SF_type_node, V2SF_type_node, NULL_TREE);
13321   tree pint_type_node    = build_pointer_type (integer_type_node);
13322   tree pcint_type_node = build_pointer_type (
13323 			     build_type_variant (integer_type_node, 1, 0));
13324   tree pdouble_type_node = build_pointer_type (double_type_node);
13325   tree pcdouble_type_node = build_pointer_type (
13326 				build_type_variant (double_type_node, 1, 0));
13327   tree int_ftype_v2df_v2df
13328     = build_function_type_list (integer_type_node,
13329 				V2DF_type_node, V2DF_type_node, NULL_TREE);
13330 
13331   tree ti_ftype_void
13332     = build_function_type (intTI_type_node, void_list_node);
13333   tree v2di_ftype_void
13334     = build_function_type (V2DI_type_node, void_list_node);
13335   tree ti_ftype_ti_ti
13336     = build_function_type_list (intTI_type_node,
13337 				intTI_type_node, intTI_type_node, NULL_TREE);
13338   tree void_ftype_pcvoid
13339     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13340   tree v2di_ftype_di
13341     = build_function_type_list (V2DI_type_node,
13342 				long_long_unsigned_type_node, NULL_TREE);
13343   tree di_ftype_v2di
13344     = build_function_type_list (long_long_unsigned_type_node,
13345 				V2DI_type_node, NULL_TREE);
13346   tree v4sf_ftype_v4si
13347     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13348   tree v4si_ftype_v4sf
13349     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13350   tree v2df_ftype_v4si
13351     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13352   tree v4si_ftype_v2df
13353     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13354   tree v2si_ftype_v2df
13355     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13356   tree v4sf_ftype_v2df
13357     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13358   tree v2df_ftype_v2si
13359     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13360   tree v2df_ftype_v4sf
13361     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13362   tree int_ftype_v2df
13363     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13364   tree int64_ftype_v2df
13365     = build_function_type_list (long_long_integer_type_node,
13366 				V2DF_type_node, NULL_TREE);
13367   tree v2df_ftype_v2df_int
13368     = build_function_type_list (V2DF_type_node,
13369 				V2DF_type_node, integer_type_node, NULL_TREE);
13370   tree v2df_ftype_v2df_int64
13371     = build_function_type_list (V2DF_type_node,
13372 				V2DF_type_node, long_long_integer_type_node,
13373 				NULL_TREE);
13374   tree v4sf_ftype_v4sf_v2df
13375     = build_function_type_list (V4SF_type_node,
13376 				V4SF_type_node, V2DF_type_node, NULL_TREE);
13377   tree v2df_ftype_v2df_v4sf
13378     = build_function_type_list (V2DF_type_node,
13379 				V2DF_type_node, V4SF_type_node, NULL_TREE);
13380   tree v2df_ftype_v2df_v2df_int
13381     = build_function_type_list (V2DF_type_node,
13382 				V2DF_type_node, V2DF_type_node,
13383 				integer_type_node,
13384 				NULL_TREE);
13385   tree v2df_ftype_v2df_pv2si
13386     = build_function_type_list (V2DF_type_node,
13387 				V2DF_type_node, pv2si_type_node, NULL_TREE);
13388   tree void_ftype_pv2si_v2df
13389     = build_function_type_list (void_type_node,
13390 				pv2si_type_node, V2DF_type_node, NULL_TREE);
13391   tree void_ftype_pdouble_v2df
13392     = build_function_type_list (void_type_node,
13393 				pdouble_type_node, V2DF_type_node, NULL_TREE);
13394   tree void_ftype_pint_int
13395     = build_function_type_list (void_type_node,
13396 				pint_type_node, integer_type_node, NULL_TREE);
13397   tree void_ftype_v16qi_v16qi_pchar
13398     = build_function_type_list (void_type_node,
13399 				V16QI_type_node, V16QI_type_node,
13400 				pchar_type_node, NULL_TREE);
13401   tree v2df_ftype_pcdouble
13402     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13403   tree v2df_ftype_v2df_v2df
13404     = build_function_type_list (V2DF_type_node,
13405 				V2DF_type_node, V2DF_type_node, NULL_TREE);
13406   tree v16qi_ftype_v16qi_v16qi
13407     = build_function_type_list (V16QI_type_node,
13408 				V16QI_type_node, V16QI_type_node, NULL_TREE);
13409   tree v8hi_ftype_v8hi_v8hi
13410     = build_function_type_list (V8HI_type_node,
13411 				V8HI_type_node, V8HI_type_node, NULL_TREE);
13412   tree v4si_ftype_v4si_v4si
13413     = build_function_type_list (V4SI_type_node,
13414 				V4SI_type_node, V4SI_type_node, NULL_TREE);
13415   tree v2di_ftype_v2di_v2di
13416     = build_function_type_list (V2DI_type_node,
13417 				V2DI_type_node, V2DI_type_node, NULL_TREE);
13418   tree v2di_ftype_v2df_v2df
13419     = build_function_type_list (V2DI_type_node,
13420 				V2DF_type_node, V2DF_type_node, NULL_TREE);
13421   tree v2df_ftype_v2df
13422     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13423   tree v2df_ftype_double
13424     = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13425   tree v2df_ftype_double_double
13426     = build_function_type_list (V2DF_type_node,
13427 				double_type_node, double_type_node, NULL_TREE);
13428   tree int_ftype_v8hi_int
13429     = build_function_type_list (integer_type_node,
13430 				V8HI_type_node, integer_type_node, NULL_TREE);
13431   tree v8hi_ftype_v8hi_int_int
13432     = build_function_type_list (V8HI_type_node,
13433 				V8HI_type_node, integer_type_node,
13434 				integer_type_node, NULL_TREE);
13435   tree v2di_ftype_v2di_int
13436     = build_function_type_list (V2DI_type_node,
13437 				V2DI_type_node, integer_type_node, NULL_TREE);
13438   tree v4si_ftype_v4si_int
13439     = build_function_type_list (V4SI_type_node,
13440 				V4SI_type_node, integer_type_node, NULL_TREE);
13441   tree v8hi_ftype_v8hi_int
13442     = build_function_type_list (V8HI_type_node,
13443 				V8HI_type_node, integer_type_node, NULL_TREE);
13444   tree v8hi_ftype_v8hi_v2di
13445     = build_function_type_list (V8HI_type_node,
13446 				V8HI_type_node, V2DI_type_node, NULL_TREE);
13447   tree v4si_ftype_v4si_v2di
13448     = build_function_type_list (V4SI_type_node,
13449 				V4SI_type_node, V2DI_type_node, NULL_TREE);
13450   tree v4si_ftype_v8hi_v8hi
13451     = build_function_type_list (V4SI_type_node,
13452 				V8HI_type_node, V8HI_type_node, NULL_TREE);
13453   tree di_ftype_v8qi_v8qi
13454     = build_function_type_list (long_long_unsigned_type_node,
13455 				V8QI_type_node, V8QI_type_node, NULL_TREE);
13456   tree v2di_ftype_v16qi_v16qi
13457     = build_function_type_list (V2DI_type_node,
13458 				V16QI_type_node, V16QI_type_node, NULL_TREE);
13459   tree int_ftype_v16qi
13460     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13461   tree v16qi_ftype_pcchar
13462     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13463   tree void_ftype_pchar_v16qi
13464     = build_function_type_list (void_type_node,
13465 			        pchar_type_node, V16QI_type_node, NULL_TREE);
13466   tree v4si_ftype_pcint
13467     = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13468   tree void_ftype_pcint_v4si
13469     = build_function_type_list (void_type_node,
13470 			        pcint_type_node, V4SI_type_node, NULL_TREE);
13471   tree v2di_ftype_v2di
13472     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13473 
13474   tree float80_type;
13475   tree float128_type;
13476 
13477   /* The __float80 type.  */
13478   if (TYPE_MODE (long_double_type_node) == XFmode)
13479     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13480 					       "__float80");
13481   else
13482     {
13483       /* The __float80 type.  */
13484       float80_type = make_node (REAL_TYPE);
13485       TYPE_PRECISION (float80_type) = 96;
13486       layout_type (float80_type);
13487       (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13488     }
13489 
13490   float128_type = make_node (REAL_TYPE);
13491   TYPE_PRECISION (float128_type) = 128;
13492   layout_type (float128_type);
13493   (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13494 
13495   /* Add all builtins that are more or less simple operations on two
13496      operands.  */
13497   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13498     {
13499       /* Use one of the operands; the target can have a different mode for
13500 	 mask-generating compares.  */
13501       enum machine_mode mode;
13502       tree type;
13503 
13504       if (d->name == 0)
13505 	continue;
13506       mode = insn_data[d->icode].operand[1].mode;
13507 
13508       switch (mode)
13509 	{
13510 	case V16QImode:
13511 	  type = v16qi_ftype_v16qi_v16qi;
13512 	  break;
13513 	case V8HImode:
13514 	  type = v8hi_ftype_v8hi_v8hi;
13515 	  break;
13516 	case V4SImode:
13517 	  type = v4si_ftype_v4si_v4si;
13518 	  break;
13519 	case V2DImode:
13520 	  type = v2di_ftype_v2di_v2di;
13521 	  break;
13522 	case V2DFmode:
13523 	  type = v2df_ftype_v2df_v2df;
13524 	  break;
13525 	case TImode:
13526 	  type = ti_ftype_ti_ti;
13527 	  break;
13528 	case V4SFmode:
13529 	  type = v4sf_ftype_v4sf_v4sf;
13530 	  break;
13531 	case V8QImode:
13532 	  type = v8qi_ftype_v8qi_v8qi;
13533 	  break;
13534 	case V4HImode:
13535 	  type = v4hi_ftype_v4hi_v4hi;
13536 	  break;
13537 	case V2SImode:
13538 	  type = v2si_ftype_v2si_v2si;
13539 	  break;
13540 	case DImode:
13541 	  type = di_ftype_di_di;
13542 	  break;
13543 
13544 	default:
13545 	  abort ();
13546 	}
13547 
13548       /* Override for comparisons.  */
13549       if (d->icode == CODE_FOR_maskcmpv4sf3
13550 	  || d->icode == CODE_FOR_maskncmpv4sf3
13551 	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
13552 	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13553 	type = v4si_ftype_v4sf_v4sf;
13554 
13555       if (d->icode == CODE_FOR_maskcmpv2df3
13556 	  || d->icode == CODE_FOR_maskncmpv2df3
13557 	  || d->icode == CODE_FOR_vmmaskcmpv2df3
13558 	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
13559 	type = v2di_ftype_v2df_v2df;
13560 
13561       def_builtin (d->mask, d->name, type, d->code);
13562     }
13563 
13564   /* Add the remaining MMX insns with somewhat more complicated types.  */
13565   def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13566   def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13567   def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13568   def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13569   def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13570 
13571   def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13572   def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13573   def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13574 
13575   def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13576   def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13577 
13578   def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13579   def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13580 
13581   /* comi/ucomi insns.  */
13582   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13583     if (d->mask == MASK_SSE2)
13584       def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13585     else
13586       def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13587 
13588   def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13589   def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13590   def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13591 
13592   def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13593   def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13594   def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13595   def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13596   def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13597   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13598   def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13599   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13600   def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13601   def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13602   def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13603 
13604   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13605   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13606 
13607   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13608 
13609   def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13610   def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13611   def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13612   def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13613   def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13614   def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13615 
13616   def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13617   def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13618   def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13619   def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13620 
13621   def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13622   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13623   def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13624   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13625 
13626   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13627 
13628   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13629 
13630   def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13631   def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13632   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13633   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13634   def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13635   def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13636 
13637   def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13638 
13639   /* Original 3DNow!  */
13640   def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13641   def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13642   def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13643   def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13644   def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13645   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13646   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13647   def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13648   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13649   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13650   def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13651   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13652   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13653   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13654   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13655   def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13656   def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13657   def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13658   def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13659   def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13660 
13661   /* 3DNow! extension as used in the Athlon CPU.  */
13662   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13663   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13664   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13665   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13666   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13667   def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13668 
13669   def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13670 
13671   /* SSE2 */
13672   def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13673   def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13674 
13675   def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13676   def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13677   def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13678 
13679   def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13680   def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13681   def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13682   def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13683   def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13684   def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13685 
13686   def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13687   def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13688   def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13689   def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13690 
13691   def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13692   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13693   def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13694   def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13695   def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13696 
13697   def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13698   def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13699   def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13700   def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13701 
13702   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13703   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13704 
13705   def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13706 
13707   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13708   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13709 
13710   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13711   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13712   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13713   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13714   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13715 
13716   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13717 
13718   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13719   def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13720   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13721   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13722 
13723   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13724   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13725   def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13726 
13727   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13728   def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13729   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13730   def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13731 
13732   def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13733   def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13734   def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13735   def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13736   def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13737   def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13738   def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13739 
13740   def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13741   def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13742   def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13743 
13744   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13745   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13746   def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13747   def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13748   def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13749   def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13750   def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13751 
13752   def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13753 
13754   def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13755   def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13756   def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13757 
13758   def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13759   def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13760   def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13761 
13762   def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13763   def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13764 
13765   def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13766   def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13767   def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13768   def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13769 
13770   def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13771   def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13772   def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13773   def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13774 
13775   def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13776   def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13777 
13778   def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13779 
13780   /* Prescott New Instructions.  */
13781   def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13782 	       void_ftype_pcvoid_unsigned_unsigned,
13783 	       IX86_BUILTIN_MONITOR);
13784   def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13785 	       void_ftype_unsigned_unsigned,
13786 	       IX86_BUILTIN_MWAIT);
13787   def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13788 	       v4sf_ftype_v4sf,
13789 	       IX86_BUILTIN_MOVSHDUP);
13790   def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13791 	       v4sf_ftype_v4sf,
13792 	       IX86_BUILTIN_MOVSLDUP);
13793   def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13794 	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13795   def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13796 	       v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13797   def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13798 	       v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13799 }
13800 
13801 /* Errors in the source file can cause expand_expr to return const0_rtx
13802    where we expect a vector.  To avoid crashing, use one of the vector
13803    clear instructions.  */
13804 static rtx
safe_vector_operand(rtx x,enum machine_mode mode)13805 safe_vector_operand (rtx x, enum machine_mode mode)
13806 {
13807   if (x != const0_rtx)
13808     return x;
13809   x = gen_reg_rtx (mode);
13810 
13811   if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13812     emit_insn (gen_mmx_clrdi (mode == DImode ? x
13813 			      : gen_rtx_SUBREG (DImode, x, 0)));
13814   else
13815     emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13816 				: gen_rtx_SUBREG (V4SFmode, x, 0),
13817 				CONST0_RTX (V4SFmode)));
13818   return x;
13819 }
13820 
13821 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
13822 
13823 static rtx
ix86_expand_binop_builtin(enum insn_code icode,tree arglist,rtx target)13824 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13825 {
13826   rtx pat;
13827   tree arg0 = TREE_VALUE (arglist);
13828   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13829   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13830   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13831   enum machine_mode tmode = insn_data[icode].operand[0].mode;
13832   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13833   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13834 
13835   if (VECTOR_MODE_P (mode0))
13836     op0 = safe_vector_operand (op0, mode0);
13837   if (VECTOR_MODE_P (mode1))
13838     op1 = safe_vector_operand (op1, mode1);
13839 
13840   if (! target
13841       || GET_MODE (target) != tmode
13842       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13843     target = gen_reg_rtx (tmode);
13844 
13845   if (GET_MODE (op1) == SImode && mode1 == TImode)
13846     {
13847       rtx x = gen_reg_rtx (V4SImode);
13848       emit_insn (gen_sse2_loadd (x, op1));
13849       op1 = gen_lowpart (TImode, x);
13850     }
13851 
13852   /* In case the insn wants input operands in modes different from
13853      the result, abort.  */
13854   if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13855       || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13856     abort ();
13857 
13858   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13859     op0 = copy_to_mode_reg (mode0, op0);
13860   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13861     op1 = copy_to_mode_reg (mode1, op1);
13862 
13863   /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13864      yet one of the two must not be a memory.  This is normally enforced
13865      by expanders, but we didn't bother to create one here.  */
13866   if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13867     op0 = copy_to_mode_reg (mode0, op0);
13868 
13869   pat = GEN_FCN (icode) (target, op0, op1);
13870   if (! pat)
13871     return 0;
13872   emit_insn (pat);
13873   return target;
13874 }
13875 
13876 /* Subroutine of ix86_expand_builtin to take care of stores.  */
13877 
13878 static rtx
ix86_expand_store_builtin(enum insn_code icode,tree arglist)13879 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13880 {
13881   rtx pat;
13882   tree arg0 = TREE_VALUE (arglist);
13883   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13884   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13885   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13886   enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13887   enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13888 
13889   if (VECTOR_MODE_P (mode1))
13890     op1 = safe_vector_operand (op1, mode1);
13891 
13892   op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13893   op1 = copy_to_mode_reg (mode1, op1);
13894 
13895   pat = GEN_FCN (icode) (op0, op1);
13896   if (pat)
13897     emit_insn (pat);
13898   return 0;
13899 }
13900 
13901 /* Subroutine of ix86_expand_builtin to take care of unop insns.  */
13902 
13903 static rtx
ix86_expand_unop_builtin(enum insn_code icode,tree arglist,rtx target,int do_load)13904 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13905 			  rtx target, int do_load)
13906 {
13907   rtx pat;
13908   tree arg0 = TREE_VALUE (arglist);
13909   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13910   enum machine_mode tmode = insn_data[icode].operand[0].mode;
13911   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13912 
13913   if (! target
13914       || GET_MODE (target) != tmode
13915       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13916     target = gen_reg_rtx (tmode);
13917   if (do_load)
13918     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13919   else
13920     {
13921       if (VECTOR_MODE_P (mode0))
13922 	op0 = safe_vector_operand (op0, mode0);
13923 
13924       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13925 	op0 = copy_to_mode_reg (mode0, op0);
13926     }
13927 
13928   pat = GEN_FCN (icode) (target, op0);
13929   if (! pat)
13930     return 0;
13931   emit_insn (pat);
13932   return target;
13933 }
13934 
13935 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13936    sqrtss, rsqrtss, rcpss.  */
13937 
13938 static rtx
ix86_expand_unop1_builtin(enum insn_code icode,tree arglist,rtx target)13939 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13940 {
13941   rtx pat;
13942   tree arg0 = TREE_VALUE (arglist);
13943   rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13944   enum machine_mode tmode = insn_data[icode].operand[0].mode;
13945   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13946 
13947   if (! target
13948       || GET_MODE (target) != tmode
13949       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13950     target = gen_reg_rtx (tmode);
13951 
13952   if (VECTOR_MODE_P (mode0))
13953     op0 = safe_vector_operand (op0, mode0);
13954 
13955   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13956     op0 = copy_to_mode_reg (mode0, op0);
13957 
13958   op1 = op0;
13959   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13960     op1 = copy_to_mode_reg (mode0, op1);
13961 
13962   pat = GEN_FCN (icode) (target, op0, op1);
13963   if (! pat)
13964     return 0;
13965   emit_insn (pat);
13966   return target;
13967 }
13968 
13969 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
13970 
13971 static rtx
ix86_expand_sse_compare(const struct builtin_description * d,tree arglist,rtx target)13972 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13973 			 rtx target)
13974 {
13975   rtx pat;
13976   tree arg0 = TREE_VALUE (arglist);
13977   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13978   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13979   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13980   rtx op2;
13981   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13982   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13983   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13984   enum rtx_code comparison = d->comparison;
13985 
13986   if (VECTOR_MODE_P (mode0))
13987     op0 = safe_vector_operand (op0, mode0);
13988   if (VECTOR_MODE_P (mode1))
13989     op1 = safe_vector_operand (op1, mode1);
13990 
13991   /* Swap operands if we have a comparison that isn't available in
13992      hardware.  */
13993   if (d->flag)
13994     {
13995       rtx tmp = gen_reg_rtx (mode1);
13996       emit_move_insn (tmp, op1);
13997       op1 = op0;
13998       op0 = tmp;
13999     }
14000 
14001   if (! target
14002       || GET_MODE (target) != tmode
14003       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14004     target = gen_reg_rtx (tmode);
14005 
14006   if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14007     op0 = copy_to_mode_reg (mode0, op0);
14008   if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14009     op1 = copy_to_mode_reg (mode1, op1);
14010 
14011   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14012   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14013   if (! pat)
14014     return 0;
14015   emit_insn (pat);
14016   return target;
14017 }
14018 
14019 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
14020 
14021 static rtx
ix86_expand_sse_comi(const struct builtin_description * d,tree arglist,rtx target)14022 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14023 		      rtx target)
14024 {
14025   rtx pat;
14026   tree arg0 = TREE_VALUE (arglist);
14027   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14028   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14029   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14030   rtx op2;
14031   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14032   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14033   enum rtx_code comparison = d->comparison;
14034 
14035   if (VECTOR_MODE_P (mode0))
14036     op0 = safe_vector_operand (op0, mode0);
14037   if (VECTOR_MODE_P (mode1))
14038     op1 = safe_vector_operand (op1, mode1);
14039 
14040   /* Swap operands if we have a comparison that isn't available in
14041      hardware.  */
14042   if (d->flag)
14043     {
14044       rtx tmp = op1;
14045       op1 = op0;
14046       op0 = tmp;
14047     }
14048 
14049   target = gen_reg_rtx (SImode);
14050   emit_move_insn (target, const0_rtx);
14051   target = gen_rtx_SUBREG (QImode, target, 0);
14052 
14053   if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14054     op0 = copy_to_mode_reg (mode0, op0);
14055   if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14056     op1 = copy_to_mode_reg (mode1, op1);
14057 
14058   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14059   pat = GEN_FCN (d->icode) (op0, op1);
14060   if (! pat)
14061     return 0;
14062   emit_insn (pat);
14063   emit_insn (gen_rtx_SET (VOIDmode,
14064 			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14065 			  gen_rtx_fmt_ee (comparison, QImode,
14066 					  SET_DEST (pat),
14067 					  const0_rtx)));
14068 
14069   return SUBREG_REG (target);
14070 }
14071 
14072 /* Expand an expression EXP that calls a built-in function,
14073    with result going to TARGET if that's convenient
14074    (and in mode MODE if that's convenient).
14075    SUBTARGET may be used as the target for computing one of EXP's operands.
14076    IGNORE is nonzero if the value is to be ignored.  */
14077 
14078 rtx
ix86_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)14079 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14080 		     enum machine_mode mode ATTRIBUTE_UNUSED,
14081 		     int ignore ATTRIBUTE_UNUSED)
14082 {
14083   const struct builtin_description *d;
14084   size_t i;
14085   enum insn_code icode;
14086   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14087   tree arglist = TREE_OPERAND (exp, 1);
14088   tree arg0, arg1, arg2;
14089   rtx op0, op1, op2, pat;
14090   enum machine_mode tmode, mode0, mode1, mode2;
14091   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14092 
14093   switch (fcode)
14094     {
14095     case IX86_BUILTIN_EMMS:
14096       emit_insn (gen_emms ());
14097       return 0;
14098 
14099     case IX86_BUILTIN_SFENCE:
14100       emit_insn (gen_sfence ());
14101       return 0;
14102 
14103     case IX86_BUILTIN_PEXTRW:
14104     case IX86_BUILTIN_PEXTRW128:
14105       icode = (fcode == IX86_BUILTIN_PEXTRW
14106 	       ? CODE_FOR_mmx_pextrw
14107 	       : CODE_FOR_sse2_pextrw);
14108       arg0 = TREE_VALUE (arglist);
14109       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14110       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14111       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14112       tmode = insn_data[icode].operand[0].mode;
14113       mode0 = insn_data[icode].operand[1].mode;
14114       mode1 = insn_data[icode].operand[2].mode;
14115 
14116       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14117 	op0 = copy_to_mode_reg (mode0, op0);
14118       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14119 	{
14120 	  error ("selector must be an integer constant in the range 0..%i",
14121 		  fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14122 	  return gen_reg_rtx (tmode);
14123 	}
14124       if (target == 0
14125 	  || GET_MODE (target) != tmode
14126 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14127 	target = gen_reg_rtx (tmode);
14128       pat = GEN_FCN (icode) (target, op0, op1);
14129       if (! pat)
14130 	return 0;
14131       emit_insn (pat);
14132       return target;
14133 
14134     case IX86_BUILTIN_PINSRW:
14135     case IX86_BUILTIN_PINSRW128:
14136       icode = (fcode == IX86_BUILTIN_PINSRW
14137 	       ? CODE_FOR_mmx_pinsrw
14138 	       : CODE_FOR_sse2_pinsrw);
14139       arg0 = TREE_VALUE (arglist);
14140       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14141       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14142       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14143       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14144       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14145       tmode = insn_data[icode].operand[0].mode;
14146       mode0 = insn_data[icode].operand[1].mode;
14147       mode1 = insn_data[icode].operand[2].mode;
14148       mode2 = insn_data[icode].operand[3].mode;
14149 
14150       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14151 	op0 = copy_to_mode_reg (mode0, op0);
14152       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14153 	op1 = copy_to_mode_reg (mode1, op1);
14154       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14155 	{
14156 	  error ("selector must be an integer constant in the range 0..%i",
14157 		  fcode == IX86_BUILTIN_PINSRW ? 15:255);
14158 	  return const0_rtx;
14159 	}
14160       if (target == 0
14161 	  || GET_MODE (target) != tmode
14162 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14163 	target = gen_reg_rtx (tmode);
14164       pat = GEN_FCN (icode) (target, op0, op1, op2);
14165       if (! pat)
14166 	return 0;
14167       emit_insn (pat);
14168       return target;
14169 
14170     case IX86_BUILTIN_MASKMOVQ:
14171     case IX86_BUILTIN_MASKMOVDQU:
14172       icode = (fcode == IX86_BUILTIN_MASKMOVQ
14173 	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14174 	       : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14175 		  : CODE_FOR_sse2_maskmovdqu));
14176       /* Note the arg order is different from the operand order.  */
14177       arg1 = TREE_VALUE (arglist);
14178       arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14179       arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14180       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14181       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14182       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14183       mode0 = insn_data[icode].operand[0].mode;
14184       mode1 = insn_data[icode].operand[1].mode;
14185       mode2 = insn_data[icode].operand[2].mode;
14186 
14187       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14188 	op0 = copy_to_mode_reg (mode0, op0);
14189       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14190 	op1 = copy_to_mode_reg (mode1, op1);
14191       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14192 	op2 = copy_to_mode_reg (mode2, op2);
14193       pat = GEN_FCN (icode) (op0, op1, op2);
14194       if (! pat)
14195 	return 0;
14196       emit_insn (pat);
14197       return 0;
14198 
14199     case IX86_BUILTIN_SQRTSS:
14200       return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14201     case IX86_BUILTIN_RSQRTSS:
14202       return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14203     case IX86_BUILTIN_RCPSS:
14204       return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14205 
14206     case IX86_BUILTIN_LOADAPS:
14207       return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14208 
14209     case IX86_BUILTIN_LOADUPS:
14210       return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14211 
14212     case IX86_BUILTIN_STOREAPS:
14213       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14214 
14215     case IX86_BUILTIN_STOREUPS:
14216       return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14217 
14218     case IX86_BUILTIN_LOADSS:
14219       return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14220 
14221     case IX86_BUILTIN_STORESS:
14222       return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14223 
14224     case IX86_BUILTIN_LOADHPS:
14225     case IX86_BUILTIN_LOADLPS:
14226     case IX86_BUILTIN_LOADHPD:
14227     case IX86_BUILTIN_LOADLPD:
14228       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14229 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14230 	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14231 	       : CODE_FOR_sse2_movsd);
14232       arg0 = TREE_VALUE (arglist);
14233       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14234       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14235       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14236       tmode = insn_data[icode].operand[0].mode;
14237       mode0 = insn_data[icode].operand[1].mode;
14238       mode1 = insn_data[icode].operand[2].mode;
14239 
14240       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14241 	op0 = copy_to_mode_reg (mode0, op0);
14242       op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14243       if (target == 0
14244 	  || GET_MODE (target) != tmode
14245 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14246 	target = gen_reg_rtx (tmode);
14247       pat = GEN_FCN (icode) (target, op0, op1);
14248       if (! pat)
14249 	return 0;
14250       emit_insn (pat);
14251       return target;
14252 
14253     case IX86_BUILTIN_STOREHPS:
14254     case IX86_BUILTIN_STORELPS:
14255     case IX86_BUILTIN_STOREHPD:
14256     case IX86_BUILTIN_STORELPD:
14257       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14258 	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14259 	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14260 	       : CODE_FOR_sse2_movsd);
14261       arg0 = TREE_VALUE (arglist);
14262       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14263       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14264       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14265       mode0 = insn_data[icode].operand[1].mode;
14266       mode1 = insn_data[icode].operand[2].mode;
14267 
14268       op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14269       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14270 	op1 = copy_to_mode_reg (mode1, op1);
14271 
14272       pat = GEN_FCN (icode) (op0, op0, op1);
14273       if (! pat)
14274 	return 0;
14275       emit_insn (pat);
14276       return 0;
14277 
14278     case IX86_BUILTIN_MOVNTPS:
14279       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14280     case IX86_BUILTIN_MOVNTQ:
14281       return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14282 
14283     case IX86_BUILTIN_LDMXCSR:
14284       op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14285       target = assign_386_stack_local (SImode, 0);
14286       emit_move_insn (target, op0);
14287       emit_insn (gen_ldmxcsr (target));
14288       return 0;
14289 
14290     case IX86_BUILTIN_STMXCSR:
14291       target = assign_386_stack_local (SImode, 0);
14292       emit_insn (gen_stmxcsr (target));
14293       return copy_to_mode_reg (SImode, target);
14294 
14295     case IX86_BUILTIN_SHUFPS:
14296     case IX86_BUILTIN_SHUFPD:
14297       icode = (fcode == IX86_BUILTIN_SHUFPS
14298 	       ? CODE_FOR_sse_shufps
14299 	       : CODE_FOR_sse2_shufpd);
14300       arg0 = TREE_VALUE (arglist);
14301       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14302       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14303       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14304       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14305       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14306       tmode = insn_data[icode].operand[0].mode;
14307       mode0 = insn_data[icode].operand[1].mode;
14308       mode1 = insn_data[icode].operand[2].mode;
14309       mode2 = insn_data[icode].operand[3].mode;
14310 
14311       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14312 	op0 = copy_to_mode_reg (mode0, op0);
14313       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14314 	op1 = copy_to_mode_reg (mode1, op1);
14315       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14316 	{
14317 	  /* @@@ better error message */
14318 	  error ("mask must be an immediate");
14319 	  return gen_reg_rtx (tmode);
14320 	}
14321       if (target == 0
14322 	  || GET_MODE (target) != tmode
14323 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14324 	target = gen_reg_rtx (tmode);
14325       pat = GEN_FCN (icode) (target, op0, op1, op2);
14326       if (! pat)
14327 	return 0;
14328       emit_insn (pat);
14329       return target;
14330 
14331     case IX86_BUILTIN_PSHUFW:
14332     case IX86_BUILTIN_PSHUFD:
14333     case IX86_BUILTIN_PSHUFHW:
14334     case IX86_BUILTIN_PSHUFLW:
14335       icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14336 	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14337 	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14338 	       : CODE_FOR_mmx_pshufw);
14339       arg0 = TREE_VALUE (arglist);
14340       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14341       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14342       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14343       tmode = insn_data[icode].operand[0].mode;
14344       mode1 = insn_data[icode].operand[1].mode;
14345       mode2 = insn_data[icode].operand[2].mode;
14346 
14347       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14348 	op0 = copy_to_mode_reg (mode1, op0);
14349       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14350 	{
14351 	  /* @@@ better error message */
14352 	  error ("mask must be an immediate");
14353 	  return const0_rtx;
14354 	}
14355       if (target == 0
14356 	  || GET_MODE (target) != tmode
14357 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14358 	target = gen_reg_rtx (tmode);
14359       pat = GEN_FCN (icode) (target, op0, op1);
14360       if (! pat)
14361 	return 0;
14362       emit_insn (pat);
14363       return target;
14364 
14365     case IX86_BUILTIN_PSLLDQI128:
14366     case IX86_BUILTIN_PSRLDQI128:
14367       icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14368 	       : CODE_FOR_sse2_lshrti3);
14369       arg0 = TREE_VALUE (arglist);
14370       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14371       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14372       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14373       tmode = insn_data[icode].operand[0].mode;
14374       mode1 = insn_data[icode].operand[1].mode;
14375       mode2 = insn_data[icode].operand[2].mode;
14376 
14377       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14378 	{
14379 	  op0 = copy_to_reg (op0);
14380 	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14381 	}
14382       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14383 	{
14384 	  error ("shift must be an immediate");
14385 	  return const0_rtx;
14386 	}
14387       target = gen_reg_rtx (V2DImode);
14388       pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14389       if (! pat)
14390 	return 0;
14391       emit_insn (pat);
14392       return target;
14393 
14394     case IX86_BUILTIN_FEMMS:
14395       emit_insn (gen_femms ());
14396       return NULL_RTX;
14397 
14398     case IX86_BUILTIN_PAVGUSB:
14399       return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14400 
14401     case IX86_BUILTIN_PF2ID:
14402       return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14403 
14404     case IX86_BUILTIN_PFACC:
14405       return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14406 
14407     case IX86_BUILTIN_PFADD:
14408      return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14409 
14410     case IX86_BUILTIN_PFCMPEQ:
14411       return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14412 
14413     case IX86_BUILTIN_PFCMPGE:
14414       return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14415 
14416     case IX86_BUILTIN_PFCMPGT:
14417       return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14418 
14419     case IX86_BUILTIN_PFMAX:
14420       return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14421 
14422     case IX86_BUILTIN_PFMIN:
14423       return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14424 
14425     case IX86_BUILTIN_PFMUL:
14426       return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14427 
14428     case IX86_BUILTIN_PFRCP:
14429       return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14430 
14431     case IX86_BUILTIN_PFRCPIT1:
14432       return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14433 
14434     case IX86_BUILTIN_PFRCPIT2:
14435       return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14436 
14437     case IX86_BUILTIN_PFRSQIT1:
14438       return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14439 
14440     case IX86_BUILTIN_PFRSQRT:
14441       return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14442 
14443     case IX86_BUILTIN_PFSUB:
14444       return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14445 
14446     case IX86_BUILTIN_PFSUBR:
14447       return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14448 
14449     case IX86_BUILTIN_PI2FD:
14450       return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14451 
14452     case IX86_BUILTIN_PMULHRW:
14453       return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14454 
14455     case IX86_BUILTIN_PF2IW:
14456       return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14457 
14458     case IX86_BUILTIN_PFNACC:
14459       return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14460 
14461     case IX86_BUILTIN_PFPNACC:
14462       return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14463 
14464     case IX86_BUILTIN_PI2FW:
14465       return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14466 
14467     case IX86_BUILTIN_PSWAPDSI:
14468       return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14469 
14470     case IX86_BUILTIN_PSWAPDSF:
14471       return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14472 
14473     case IX86_BUILTIN_SSE_ZERO:
14474       target = gen_reg_rtx (V4SFmode);
14475       emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14476       return target;
14477 
14478     case IX86_BUILTIN_MMX_ZERO:
14479       target = gen_reg_rtx (DImode);
14480       emit_insn (gen_mmx_clrdi (target));
14481       return target;
14482 
14483     case IX86_BUILTIN_CLRTI:
14484       target = gen_reg_rtx (V2DImode);
14485       emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14486       return target;
14487 
14488 
14489     case IX86_BUILTIN_SQRTSD:
14490       return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14491     case IX86_BUILTIN_LOADAPD:
14492       return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14493     case IX86_BUILTIN_LOADUPD:
14494       return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14495 
14496     case IX86_BUILTIN_STOREAPD:
14497       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14498     case IX86_BUILTIN_STOREUPD:
14499       return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14500 
14501     case IX86_BUILTIN_LOADSD:
14502       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14503 
14504     case IX86_BUILTIN_STORESD:
14505       return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14506 
14507     case IX86_BUILTIN_SETPD1:
14508       target = assign_386_stack_local (DFmode, 0);
14509       arg0 = TREE_VALUE (arglist);
14510       emit_move_insn (adjust_address (target, DFmode, 0),
14511 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14512       op0 = gen_reg_rtx (V2DFmode);
14513       emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14514       emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14515       return op0;
14516 
14517     case IX86_BUILTIN_SETPD:
14518       target = assign_386_stack_local (V2DFmode, 0);
14519       arg0 = TREE_VALUE (arglist);
14520       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14521       emit_move_insn (adjust_address (target, DFmode, 0),
14522 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14523       emit_move_insn (adjust_address (target, DFmode, 8),
14524 		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14525       op0 = gen_reg_rtx (V2DFmode);
14526       emit_insn (gen_sse2_movapd (op0, target));
14527       return op0;
14528 
14529     case IX86_BUILTIN_LOADRPD:
14530       target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14531 					 gen_reg_rtx (V2DFmode), 1);
14532       emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14533       return target;
14534 
14535     case IX86_BUILTIN_LOADPD1:
14536       target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14537 					 gen_reg_rtx (V2DFmode), 1);
14538       emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14539       return target;
14540 
14541     case IX86_BUILTIN_STOREPD1:
14542       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14543     case IX86_BUILTIN_STORERPD:
14544       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14545 
14546     case IX86_BUILTIN_CLRPD:
14547       target = gen_reg_rtx (V2DFmode);
14548       emit_insn (gen_sse_clrv2df (target));
14549       return target;
14550 
14551     case IX86_BUILTIN_MFENCE:
14552 	emit_insn (gen_sse2_mfence ());
14553 	return 0;
14554     case IX86_BUILTIN_LFENCE:
14555 	emit_insn (gen_sse2_lfence ());
14556 	return 0;
14557 
14558     case IX86_BUILTIN_CLFLUSH:
14559 	arg0 = TREE_VALUE (arglist);
14560 	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14561 	icode = CODE_FOR_sse2_clflush;
14562 	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14563 	    op0 = copy_to_mode_reg (Pmode, op0);
14564 
14565 	emit_insn (gen_sse2_clflush (op0));
14566 	return 0;
14567 
14568     case IX86_BUILTIN_MOVNTPD:
14569       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14570     case IX86_BUILTIN_MOVNTDQ:
14571       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14572     case IX86_BUILTIN_MOVNTI:
14573       return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14574 
14575     case IX86_BUILTIN_LOADDQA:
14576       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14577     case IX86_BUILTIN_LOADDQU:
14578       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14579     case IX86_BUILTIN_LOADD:
14580       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14581 
14582     case IX86_BUILTIN_STOREDQA:
14583       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14584     case IX86_BUILTIN_STOREDQU:
14585       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14586     case IX86_BUILTIN_STORED:
14587       return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14588 
14589     case IX86_BUILTIN_MONITOR:
14590       arg0 = TREE_VALUE (arglist);
14591       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14592       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14593       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14594       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14595       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14596       if (!REG_P (op0))
14597 	op0 = copy_to_mode_reg (SImode, op0);
14598       if (!REG_P (op1))
14599 	op1 = copy_to_mode_reg (SImode, op1);
14600       if (!REG_P (op2))
14601 	op2 = copy_to_mode_reg (SImode, op2);
14602       emit_insn (gen_monitor (op0, op1, op2));
14603       return 0;
14604 
14605     case IX86_BUILTIN_MWAIT:
14606       arg0 = TREE_VALUE (arglist);
14607       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14608       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14609       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14610       if (!REG_P (op0))
14611 	op0 = copy_to_mode_reg (SImode, op0);
14612       if (!REG_P (op1))
14613 	op1 = copy_to_mode_reg (SImode, op1);
14614       emit_insn (gen_mwait (op0, op1));
14615       return 0;
14616 
14617     case IX86_BUILTIN_LOADDDUP:
14618       return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14619 
14620     case IX86_BUILTIN_LDDQU:
14621       return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14622 				       1);
14623 
14624     default:
14625       break;
14626     }
14627 
14628   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14629     if (d->code == fcode)
14630       {
14631 	/* Compares are treated specially.  */
14632 	if (d->icode == CODE_FOR_maskcmpv4sf3
14633 	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
14634 	    || d->icode == CODE_FOR_maskncmpv4sf3
14635 	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
14636 	    || d->icode == CODE_FOR_maskcmpv2df3
14637 	    || d->icode == CODE_FOR_vmmaskcmpv2df3
14638 	    || d->icode == CODE_FOR_maskncmpv2df3
14639 	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
14640 	  return ix86_expand_sse_compare (d, arglist, target);
14641 
14642 	return ix86_expand_binop_builtin (d->icode, arglist, target);
14643       }
14644 
14645   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14646     if (d->code == fcode)
14647       return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14648 
14649   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14650     if (d->code == fcode)
14651       return ix86_expand_sse_comi (d, arglist, target);
14652 
14653   /* @@@ Should really do something sensible here.  */
14654   return 0;
14655 }
14656 
14657 /* Store OPERAND to the memory after reload is completed.  This means
14658    that we can't easily use assign_stack_local.  */
14659 rtx
ix86_force_to_memory(enum machine_mode mode,rtx operand)14660 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14661 {
14662   rtx result;
14663   if (!reload_completed)
14664     abort ();
14665   if (TARGET_RED_ZONE)
14666     {
14667       result = gen_rtx_MEM (mode,
14668 			    gen_rtx_PLUS (Pmode,
14669 					  stack_pointer_rtx,
14670 					  GEN_INT (-RED_ZONE_SIZE)));
14671       emit_move_insn (result, operand);
14672     }
14673   else if (!TARGET_RED_ZONE && TARGET_64BIT)
14674     {
14675       switch (mode)
14676 	{
14677 	case HImode:
14678 	case SImode:
14679 	  operand = gen_lowpart (DImode, operand);
14680 	  /* FALLTHRU */
14681 	case DImode:
14682 	  emit_insn (
14683 		      gen_rtx_SET (VOIDmode,
14684 				   gen_rtx_MEM (DImode,
14685 						gen_rtx_PRE_DEC (DImode,
14686 							stack_pointer_rtx)),
14687 				   operand));
14688 	  break;
14689 	default:
14690 	  abort ();
14691 	}
14692       result = gen_rtx_MEM (mode, stack_pointer_rtx);
14693     }
14694   else
14695     {
14696       switch (mode)
14697 	{
14698 	case DImode:
14699 	  {
14700 	    rtx operands[2];
14701 	    split_di (&operand, 1, operands, operands + 1);
14702 	    emit_insn (
14703 			gen_rtx_SET (VOIDmode,
14704 				     gen_rtx_MEM (SImode,
14705 						  gen_rtx_PRE_DEC (Pmode,
14706 							stack_pointer_rtx)),
14707 				     operands[1]));
14708 	    emit_insn (
14709 			gen_rtx_SET (VOIDmode,
14710 				     gen_rtx_MEM (SImode,
14711 						  gen_rtx_PRE_DEC (Pmode,
14712 							stack_pointer_rtx)),
14713 				     operands[0]));
14714 	  }
14715 	  break;
14716 	case HImode:
14717 	  /* It is better to store HImodes as SImodes.  */
14718 	  if (!TARGET_PARTIAL_REG_STALL)
14719 	    operand = gen_lowpart (SImode, operand);
14720 	  /* FALLTHRU */
14721 	case SImode:
14722 	  emit_insn (
14723 		      gen_rtx_SET (VOIDmode,
14724 				   gen_rtx_MEM (GET_MODE (operand),
14725 						gen_rtx_PRE_DEC (SImode,
14726 							stack_pointer_rtx)),
14727 				   operand));
14728 	  break;
14729 	default:
14730 	  abort ();
14731 	}
14732       result = gen_rtx_MEM (mode, stack_pointer_rtx);
14733     }
14734   return result;
14735 }
14736 
14737 /* Free operand from the memory.  */
14738 void
ix86_free_from_memory(enum machine_mode mode)14739 ix86_free_from_memory (enum machine_mode mode)
14740 {
14741   if (!TARGET_RED_ZONE)
14742     {
14743       int size;
14744 
14745       if (mode == DImode || TARGET_64BIT)
14746 	size = 8;
14747       else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14748 	size = 2;
14749       else
14750 	size = 4;
14751       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
14752          to pop or add instruction if registers are available.  */
14753       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14754 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14755 					    GEN_INT (size))));
14756     }
14757 }
14758 
14759 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14760    QImode must go into class Q_REGS.
14761    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
14762    movdf to do mem-to-mem moves through integer regs.  */
14763 enum reg_class
ix86_preferred_reload_class(rtx x,enum reg_class class)14764 ix86_preferred_reload_class (rtx x, enum reg_class class)
14765 {
14766   if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14767     return NO_REGS;
14768   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14769     {
14770       /* SSE can't load any constant directly yet.  */
14771       if (SSE_CLASS_P (class))
14772 	return NO_REGS;
14773       /* Floats can load 0 and 1.  */
14774       if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14775 	{
14776 	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
14777 	  if (MAYBE_SSE_CLASS_P (class))
14778 	    return (reg_class_subset_p (class, GENERAL_REGS)
14779 		    ? GENERAL_REGS : FLOAT_REGS);
14780 	  else
14781 	    return class;
14782 	}
14783       /* General regs can load everything.  */
14784       if (reg_class_subset_p (class, GENERAL_REGS))
14785 	return GENERAL_REGS;
14786       /* In case we haven't resolved FLOAT or SSE yet, give up.  */
14787       if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14788 	return NO_REGS;
14789     }
14790   if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14791     return NO_REGS;
14792   if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14793     return Q_REGS;
14794   return class;
14795 }
14796 
14797 /* If we are copying between general and FP registers, we need a memory
14798    location. The same is true for SSE and MMX registers.
14799 
14800    The macro can't work reliably when one of the CLASSES is class containing
14801    registers from multiple units (SSE, MMX, integer).  We avoid this by never
14802    combining those units in single alternative in the machine description.
14803    Ensure that this constraint holds to avoid unexpected surprises.
14804 
14805    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14806    enforce these sanity checks.  */
14807 int
ix86_secondary_memory_needed(enum reg_class class1,enum reg_class class2,enum machine_mode mode,int strict)14808 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14809 			      enum machine_mode mode, int strict)
14810 {
14811   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14812       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14813       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14814       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14815       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14816       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14817     {
14818       if (strict)
14819 	abort ();
14820       else
14821 	return 1;
14822     }
14823   return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14824 	  || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14825 	       || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14826 	      && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14827 		  || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14828 }
14829 /* Return the cost of moving data from a register in class CLASS1 to
14830    one in class CLASS2.
14831 
14832    It is not required that the cost always equal 2 when FROM is the same as TO;
14833    on some machines it is expensive to move between registers if they are not
14834    general registers.  */
14835 int
ix86_register_move_cost(enum machine_mode mode,enum reg_class class1,enum reg_class class2)14836 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14837 			 enum reg_class class2)
14838 {
14839   /* In case we require secondary memory, compute cost of the store followed
14840      by load.  In order to avoid bad register allocation choices, we need
14841      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
14842 
14843   if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14844     {
14845       int cost = 1;
14846 
14847       cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14848 		   MEMORY_MOVE_COST (mode, class1, 1));
14849       cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14850 		   MEMORY_MOVE_COST (mode, class2, 1));
14851 
14852       /* In case of copying from general_purpose_register we may emit multiple
14853          stores followed by single load causing memory size mismatch stall.
14854          Count this as arbitrarily high cost of 20.  */
14855       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14856 	cost += 20;
14857 
14858       /* In the case of FP/MMX moves, the registers actually overlap, and we
14859 	 have to switch modes in order to treat them differently.  */
14860       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14861           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14862 	cost += 20;
14863 
14864       return cost;
14865     }
14866 
14867   /* Moves between SSE/MMX and integer unit are expensive.  */
14868   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14869       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14870     return ix86_cost->mmxsse_to_integer;
14871   if (MAYBE_FLOAT_CLASS_P (class1))
14872     return ix86_cost->fp_move;
14873   if (MAYBE_SSE_CLASS_P (class1))
14874     return ix86_cost->sse_move;
14875   if (MAYBE_MMX_CLASS_P (class1))
14876     return ix86_cost->mmx_move;
14877   return 2;
14878 }
14879 
14880 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
14881 int
ix86_hard_regno_mode_ok(int regno,enum machine_mode mode)14882 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14883 {
14884   /* Flags and only flags can only hold CCmode values.  */
14885   if (CC_REGNO_P (regno))
14886     return GET_MODE_CLASS (mode) == MODE_CC;
14887   if (GET_MODE_CLASS (mode) == MODE_CC
14888       || GET_MODE_CLASS (mode) == MODE_RANDOM
14889       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14890     return 0;
14891   if (FP_REGNO_P (regno))
14892     return VALID_FP_MODE_P (mode);
14893   if (SSE_REGNO_P (regno))
14894     return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14895   if (MMX_REGNO_P (regno))
14896     return (TARGET_MMX
14897 	    ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14898   /* We handle both integer and floats in the general purpose registers.
14899      In future we should be able to handle vector modes as well.  */
14900   if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14901     return 0;
14902   /* Take care for QImode values - they can be in non-QI regs, but then
14903      they do cause partial register stalls.  */
14904   if (regno < 4 || mode != QImode || TARGET_64BIT)
14905     return 1;
14906   return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14907 }
14908 
14909 /* Return the cost of moving data of mode M between a
14910    register and memory.  A value of 2 is the default; this cost is
14911    relative to those in `REGISTER_MOVE_COST'.
14912 
14913    If moving between registers and memory is more expensive than
14914    between two registers, you should define this macro to express the
14915    relative cost.
14916 
14917    Model also increased moving costs of QImode registers in non
14918    Q_REGS classes.
14919  */
14920 int
ix86_memory_move_cost(enum machine_mode mode,enum reg_class class,int in)14921 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14922 {
14923   if (FLOAT_CLASS_P (class))
14924     {
14925       int index;
14926       switch (mode)
14927 	{
14928 	  case SFmode:
14929 	    index = 0;
14930 	    break;
14931 	  case DFmode:
14932 	    index = 1;
14933 	    break;
14934 	  case XFmode:
14935 	    index = 2;
14936 	    break;
14937 	  default:
14938 	    return 100;
14939 	}
14940       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14941     }
14942   if (SSE_CLASS_P (class))
14943     {
14944       int index;
14945       switch (GET_MODE_SIZE (mode))
14946 	{
14947 	  case 4:
14948 	    index = 0;
14949 	    break;
14950 	  case 8:
14951 	    index = 1;
14952 	    break;
14953 	  case 16:
14954 	    index = 2;
14955 	    break;
14956 	  default:
14957 	    return 100;
14958 	}
14959       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14960     }
14961   if (MMX_CLASS_P (class))
14962     {
14963       int index;
14964       switch (GET_MODE_SIZE (mode))
14965 	{
14966 	  case 4:
14967 	    index = 0;
14968 	    break;
14969 	  case 8:
14970 	    index = 1;
14971 	    break;
14972 	  default:
14973 	    return 100;
14974 	}
14975       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14976     }
14977   switch (GET_MODE_SIZE (mode))
14978     {
14979       case 1:
14980 	if (in)
14981 	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14982 		  : ix86_cost->movzbl_load);
14983 	else
14984 	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14985 		  : ix86_cost->int_store[0] + 4);
14986 	break;
14987       case 2:
14988 	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14989       default:
14990 	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
14991 	if (mode == TFmode)
14992 	  mode = XFmode;
14993 	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14994 		* (((int) GET_MODE_SIZE (mode)
14995 		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14996     }
14997 }
14998 
14999 /* Compute a (partial) cost for rtx X.  Return true if the complete
15000    cost has been computed, and false if subexpressions should be
15001    scanned.  In either case, *TOTAL contains the cost result.  */
15002 
15003 static bool
ix86_rtx_costs(rtx x,int code,int outer_code,int * total)15004 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15005 {
15006   enum machine_mode mode = GET_MODE (x);
15007 
15008   switch (code)
15009     {
15010     case CONST_INT:
15011     case CONST:
15012     case LABEL_REF:
15013     case SYMBOL_REF:
15014       if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15015 	*total = 3;
15016       else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15017 	*total = 2;
15018       else if (flag_pic && SYMBOLIC_CONST (x)
15019 	       && (!TARGET_64BIT
15020 		   || (!GET_CODE (x) != LABEL_REF
15021 		       && (GET_CODE (x) != SYMBOL_REF
15022 		           || !SYMBOL_REF_LOCAL_P (x)))))
15023 	*total = 1;
15024       else
15025 	*total = 0;
15026       return true;
15027 
15028     case CONST_DOUBLE:
15029       if (mode == VOIDmode)
15030 	*total = 0;
15031       else
15032 	switch (standard_80387_constant_p (x))
15033 	  {
15034 	  case 1: /* 0.0 */
15035 	    *total = 1;
15036 	    break;
15037 	  default: /* Other constants */
15038 	    *total = 2;
15039 	    break;
15040 	  case 0:
15041 	  case -1:
15042 	    /* Start with (MEM (SYMBOL_REF)), since that's where
15043 	       it'll probably end up.  Add a penalty for size.  */
15044 	    *total = (COSTS_N_INSNS (1)
15045 		      + (flag_pic != 0 && !TARGET_64BIT)
15046 		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15047 	    break;
15048 	  }
15049       return true;
15050 
15051     case ZERO_EXTEND:
15052       /* The zero extensions is often completely free on x86_64, so make
15053 	 it as cheap as possible.  */
15054       if (TARGET_64BIT && mode == DImode
15055 	  && GET_MODE (XEXP (x, 0)) == SImode)
15056 	*total = 1;
15057       else if (TARGET_ZERO_EXTEND_WITH_AND)
15058 	*total = COSTS_N_INSNS (ix86_cost->add);
15059       else
15060 	*total = COSTS_N_INSNS (ix86_cost->movzx);
15061       return false;
15062 
15063     case SIGN_EXTEND:
15064       *total = COSTS_N_INSNS (ix86_cost->movsx);
15065       return false;
15066 
15067     case ASHIFT:
15068       if (GET_CODE (XEXP (x, 1)) == CONST_INT
15069 	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15070 	{
15071 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15072 	  if (value == 1)
15073 	    {
15074 	      *total = COSTS_N_INSNS (ix86_cost->add);
15075 	      return false;
15076 	    }
15077 	  if ((value == 2 || value == 3)
15078 	      && !TARGET_DECOMPOSE_LEA
15079 	      && ix86_cost->lea <= ix86_cost->shift_const)
15080 	    {
15081 	      *total = COSTS_N_INSNS (ix86_cost->lea);
15082 	      return false;
15083 	    }
15084 	}
15085       /* FALLTHRU */
15086 
15087     case ROTATE:
15088     case ASHIFTRT:
15089     case LSHIFTRT:
15090     case ROTATERT:
15091       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15092 	{
15093 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15094 	    {
15095 	      if (INTVAL (XEXP (x, 1)) > 32)
15096 		*total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15097 	      else
15098 		*total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15099 	    }
15100 	  else
15101 	    {
15102 	      if (GET_CODE (XEXP (x, 1)) == AND)
15103 		*total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15104 	      else
15105 		*total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15106 	    }
15107 	}
15108       else
15109 	{
15110 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15111 	    *total = COSTS_N_INSNS (ix86_cost->shift_const);
15112 	  else
15113 	    *total = COSTS_N_INSNS (ix86_cost->shift_var);
15114 	}
15115       return false;
15116 
15117     case MULT:
15118       if (FLOAT_MODE_P (mode))
15119 	*total = COSTS_N_INSNS (ix86_cost->fmul);
15120       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15121 	{
15122 	  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15123 	  int nbits;
15124 
15125 	  for (nbits = 0; value != 0; value >>= 1)
15126 	    nbits++;
15127 
15128 	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15129 			          + nbits * ix86_cost->mult_bit);
15130 	}
15131       else
15132 	{
15133 	  /* This is arbitrary */
15134 	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15135 			          + 7 * ix86_cost->mult_bit);
15136 	}
15137       return false;
15138 
15139     case DIV:
15140     case UDIV:
15141     case MOD:
15142     case UMOD:
15143       if (FLOAT_MODE_P (mode))
15144 	*total = COSTS_N_INSNS (ix86_cost->fdiv);
15145       else
15146 	*total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15147       return false;
15148 
15149     case PLUS:
15150       if (FLOAT_MODE_P (mode))
15151 	*total = COSTS_N_INSNS (ix86_cost->fadd);
15152       else if (!TARGET_DECOMPOSE_LEA
15153 	       && GET_MODE_CLASS (mode) == MODE_INT
15154 	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15155 	{
15156 	  if (GET_CODE (XEXP (x, 0)) == PLUS
15157 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15158 	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15159 	      && CONSTANT_P (XEXP (x, 1)))
15160 	    {
15161 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15162 	      if (val == 2 || val == 4 || val == 8)
15163 		{
15164 		  *total = COSTS_N_INSNS (ix86_cost->lea);
15165 		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15166 		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15167 				      outer_code);
15168 		  *total += rtx_cost (XEXP (x, 1), outer_code);
15169 		  return true;
15170 		}
15171 	    }
15172 	  else if (GET_CODE (XEXP (x, 0)) == MULT
15173 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15174 	    {
15175 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15176 	      if (val == 2 || val == 4 || val == 8)
15177 		{
15178 		  *total = COSTS_N_INSNS (ix86_cost->lea);
15179 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15180 		  *total += rtx_cost (XEXP (x, 1), outer_code);
15181 		  return true;
15182 		}
15183 	    }
15184 	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
15185 	    {
15186 	      *total = COSTS_N_INSNS (ix86_cost->lea);
15187 	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15188 	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15189 	      *total += rtx_cost (XEXP (x, 1), outer_code);
15190 	      return true;
15191 	    }
15192 	}
15193       /* FALLTHRU */
15194 
15195     case MINUS:
15196       if (FLOAT_MODE_P (mode))
15197 	{
15198 	  *total = COSTS_N_INSNS (ix86_cost->fadd);
15199 	  return false;
15200 	}
15201       /* FALLTHRU */
15202 
15203     case AND:
15204     case IOR:
15205     case XOR:
15206       if (!TARGET_64BIT && mode == DImode)
15207 	{
15208 	  *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15209 		    + (rtx_cost (XEXP (x, 0), outer_code)
15210 		       << (GET_MODE (XEXP (x, 0)) != DImode))
15211 		    + (rtx_cost (XEXP (x, 1), outer_code)
15212 	               << (GET_MODE (XEXP (x, 1)) != DImode)));
15213 	  return true;
15214 	}
15215       /* FALLTHRU */
15216 
15217     case NEG:
15218       if (FLOAT_MODE_P (mode))
15219 	{
15220 	  *total = COSTS_N_INSNS (ix86_cost->fchs);
15221 	  return false;
15222 	}
15223       /* FALLTHRU */
15224 
15225     case NOT:
15226       if (!TARGET_64BIT && mode == DImode)
15227 	*total = COSTS_N_INSNS (ix86_cost->add * 2);
15228       else
15229 	*total = COSTS_N_INSNS (ix86_cost->add);
15230       return false;
15231 
15232     case FLOAT_EXTEND:
15233       if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15234 	*total = 0;
15235       return false;
15236 
15237     case ABS:
15238       if (FLOAT_MODE_P (mode))
15239 	*total = COSTS_N_INSNS (ix86_cost->fabs);
15240       return false;
15241 
15242     case SQRT:
15243       if (FLOAT_MODE_P (mode))
15244 	*total = COSTS_N_INSNS (ix86_cost->fsqrt);
15245       return false;
15246 
15247     case UNSPEC:
15248       if (XINT (x, 1) == UNSPEC_TP)
15249 	*total = 0;
15250       return false;
15251 
15252     default:
15253       return false;
15254     }
15255 }
15256 
15257 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15258 static void
ix86_svr3_asm_out_constructor(rtx symbol,int priority ATTRIBUTE_UNUSED)15259 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15260 {
15261   init_section ();
15262   fputs ("\tpushl $", asm_out_file);
15263   assemble_name (asm_out_file, XSTR (symbol, 0));
15264   fputc ('\n', asm_out_file);
15265 }
15266 #endif
15267 
15268 #if TARGET_MACHO
15269 
15270 static int current_machopic_label_num;
15271 
15272 /* Given a symbol name and its associated stub, write out the
15273    definition of the stub.  */
15274 
15275 void
machopic_output_stub(FILE * file,const char * symb,const char * stub)15276 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15277 {
15278   unsigned int length;
15279   char *binder_name, *symbol_name, lazy_ptr_name[32];
15280   int label = ++current_machopic_label_num;
15281 
15282   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
15283   symb = (*targetm.strip_name_encoding) (symb);
15284 
15285   length = strlen (stub);
15286   binder_name = alloca (length + 32);
15287   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15288 
15289   length = strlen (symb);
15290   symbol_name = alloca (length + 32);
15291   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15292 
15293   sprintf (lazy_ptr_name, "L%d$lz", label);
15294 
15295   if (MACHOPIC_PURE)
15296     machopic_picsymbol_stub_section ();
15297   else
15298     machopic_symbol_stub_section ();
15299 
15300   fprintf (file, "%s:\n", stub);
15301   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15302 
15303   if (MACHOPIC_PURE)
15304     {
15305       fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15306       fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15307       fprintf (file, "\tjmp %%edx\n");
15308     }
15309   else
15310     fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15311 
15312   fprintf (file, "%s:\n", binder_name);
15313 
15314   if (MACHOPIC_PURE)
15315     {
15316       fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15317       fprintf (file, "\tpushl %%eax\n");
15318     }
15319   else
15320     fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15321 
15322   fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15323 
15324   machopic_lazy_symbol_ptr_section ();
15325   fprintf (file, "%s:\n", lazy_ptr_name);
15326   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15327   fprintf (file, "\t.long %s\n", binder_name);
15328 }
15329 #endif /* TARGET_MACHO */
15330 
15331 /* Order the registers for register allocator.  */
15332 
15333 void
x86_order_regs_for_local_alloc(void)15334 x86_order_regs_for_local_alloc (void)
15335 {
15336    int pos = 0;
15337    int i;
15338 
15339    /* First allocate the local general purpose registers.  */
15340    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15341      if (GENERAL_REGNO_P (i) && call_used_regs[i])
15342 	reg_alloc_order [pos++] = i;
15343 
15344    /* Global general purpose registers.  */
15345    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15346      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15347 	reg_alloc_order [pos++] = i;
15348 
15349    /* x87 registers come first in case we are doing FP math
15350       using them.  */
15351    if (!TARGET_SSE_MATH)
15352      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15353        reg_alloc_order [pos++] = i;
15354 
15355    /* SSE registers.  */
15356    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15357      reg_alloc_order [pos++] = i;
15358    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15359      reg_alloc_order [pos++] = i;
15360 
15361    /* x87 registers.  */
15362    if (TARGET_SSE_MATH)
15363      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15364        reg_alloc_order [pos++] = i;
15365 
15366    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15367      reg_alloc_order [pos++] = i;
15368 
15369    /* Initialize the rest of array as we do not allocate some registers
15370       at all.  */
15371    while (pos < FIRST_PSEUDO_REGISTER)
15372      reg_alloc_order [pos++] = 0;
15373 }
15374 
15375 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15376 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15377 #endif
15378 
15379 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15380    struct attribute_spec.handler.  */
15381 static tree
ix86_handle_struct_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)15382 ix86_handle_struct_attribute (tree *node, tree name,
15383 			      tree args ATTRIBUTE_UNUSED,
15384 			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15385 {
15386   tree *type = NULL;
15387   if (DECL_P (*node))
15388     {
15389       if (TREE_CODE (*node) == TYPE_DECL)
15390 	type = &TREE_TYPE (*node);
15391     }
15392   else
15393     type = node;
15394 
15395   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15396 		 || TREE_CODE (*type) == UNION_TYPE)))
15397     {
15398       warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15399       *no_add_attrs = true;
15400     }
15401 
15402   else if ((is_attribute_p ("ms_struct", name)
15403 	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15404 	   || ((is_attribute_p ("gcc_struct", name)
15405 		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15406     {
15407       warning ("`%s' incompatible attribute ignored",
15408                IDENTIFIER_POINTER (name));
15409       *no_add_attrs = true;
15410     }
15411 
15412   return NULL_TREE;
15413 }
15414 
15415 static bool
ix86_ms_bitfield_layout_p(tree record_type)15416 ix86_ms_bitfield_layout_p (tree record_type)
15417 {
15418   return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15419 	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15420     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15421 }
15422 
15423 /* Returns an expression indicating where the this parameter is
15424    located on entry to the FUNCTION.  */
15425 
15426 static rtx
x86_this_parameter(tree function)15427 x86_this_parameter (tree function)
15428 {
15429   tree type = TREE_TYPE (function);
15430 
15431   if (TARGET_64BIT)
15432     {
15433       int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15434       return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15435     }
15436 
15437   if (ix86_function_regparm (type, function) > 0)
15438     {
15439       tree parm;
15440 
15441       parm = TYPE_ARG_TYPES (type);
15442       /* Figure out whether or not the function has a variable number of
15443 	 arguments.  */
15444       for (; parm; parm = TREE_CHAIN (parm))
15445 	if (TREE_VALUE (parm) == void_type_node)
15446 	  break;
15447       /* If not, the this parameter is in the first argument.  */
15448       if (parm)
15449 	{
15450 	  int regno = 0;
15451 	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15452 	    regno = 2;
15453 	  return gen_rtx_REG (SImode, regno);
15454 	}
15455     }
15456 
15457   if (aggregate_value_p (TREE_TYPE (type), type))
15458     return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15459   else
15460     return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15461 }
15462 
15463 /* Determine whether x86_output_mi_thunk can succeed.  */
15464 
15465 static bool
x86_can_output_mi_thunk(tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,tree function)15466 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15467 			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15468 			 HOST_WIDE_INT vcall_offset, tree function)
15469 {
15470   /* 64-bit can handle anything.  */
15471   if (TARGET_64BIT)
15472     return true;
15473 
15474   /* For 32-bit, everything's fine if we have one free register.  */
15475   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15476     return true;
15477 
15478   /* Need a free register for vcall_offset.  */
15479   if (vcall_offset)
15480     return false;
15481 
15482   /* Need a free register for GOT references.  */
15483   if (flag_pic && !(*targetm.binds_local_p) (function))
15484     return false;
15485 
15486   /* Otherwise ok.  */
15487   return true;
15488 }
15489 
15490 /* Output the assembler code for a thunk function.  THUNK_DECL is the
15491    declaration for the thunk function itself, FUNCTION is the decl for
15492    the target function.  DELTA is an immediate constant offset to be
15493    added to THIS.  If VCALL_OFFSET is nonzero, the word at
15494    *(*this + vcall_offset) should be added to THIS.  */
15495 
15496 static void
x86_output_mi_thunk(FILE * file ATTRIBUTE_UNUSED,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)15497 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15498 		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15499 		     HOST_WIDE_INT vcall_offset, tree function)
15500 {
15501   rtx xops[3];
15502   rtx this = x86_this_parameter (function);
15503   rtx this_reg, tmp;
15504 
15505   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
15506      pull it in now and let DELTA benefit.  */
15507   if (REG_P (this))
15508     this_reg = this;
15509   else if (vcall_offset)
15510     {
15511       /* Put the this parameter into %eax.  */
15512       xops[0] = this;
15513       xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15514       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15515     }
15516   else
15517     this_reg = NULL_RTX;
15518 
15519   /* Adjust the this parameter by a fixed constant.  */
15520   if (delta)
15521     {
15522       xops[0] = GEN_INT (delta);
15523       xops[1] = this_reg ? this_reg : this;
15524       if (TARGET_64BIT)
15525 	{
15526 	  if (!x86_64_general_operand (xops[0], DImode))
15527 	    {
15528 	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15529 	      xops[1] = tmp;
15530 	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15531 	      xops[0] = tmp;
15532 	      xops[1] = this;
15533 	    }
15534 	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15535 	}
15536       else
15537 	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15538     }
15539 
15540   /* Adjust the this parameter by a value stored in the vtable.  */
15541   if (vcall_offset)
15542     {
15543       if (TARGET_64BIT)
15544 	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15545       else
15546 	{
15547 	  int tmp_regno = 2 /* ECX */;
15548 	  if (lookup_attribute ("fastcall",
15549 	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
15550 	    tmp_regno = 0 /* EAX */;
15551 	  tmp = gen_rtx_REG (SImode, tmp_regno);
15552 	}
15553 
15554       xops[0] = gen_rtx_MEM (Pmode, this_reg);
15555       xops[1] = tmp;
15556       if (TARGET_64BIT)
15557 	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15558       else
15559 	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15560 
15561       /* Adjust the this parameter.  */
15562       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15563       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15564 	{
15565 	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15566 	  xops[0] = GEN_INT (vcall_offset);
15567 	  xops[1] = tmp2;
15568 	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15569 	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15570 	}
15571       xops[1] = this_reg;
15572       if (TARGET_64BIT)
15573 	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15574       else
15575 	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15576     }
15577 
15578   /* If necessary, drop THIS back to its stack slot.  */
15579   if (this_reg && this_reg != this)
15580     {
15581       xops[0] = this_reg;
15582       xops[1] = this;
15583       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15584     }
15585 
15586   xops[0] = XEXP (DECL_RTL (function), 0);
15587   if (TARGET_64BIT)
15588     {
15589       if (!flag_pic || (*targetm.binds_local_p) (function))
15590 	output_asm_insn ("jmp\t%P0", xops);
15591       else
15592 	{
15593 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15594 	  tmp = gen_rtx_CONST (Pmode, tmp);
15595 	  tmp = gen_rtx_MEM (QImode, tmp);
15596 	  xops[0] = tmp;
15597 	  output_asm_insn ("jmp\t%A0", xops);
15598 	}
15599     }
15600   else
15601     {
15602       if (!flag_pic || (*targetm.binds_local_p) (function))
15603 	output_asm_insn ("jmp\t%P0", xops);
15604       else
15605 #if TARGET_MACHO
15606 	if (TARGET_MACHO)
15607 	  {
15608 	    const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15609 	    tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15610 	    tmp = gen_rtx_MEM (QImode, tmp);
15611 	    xops[0] = tmp;
15612 	    output_asm_insn ("jmp\t%0", xops);
15613 	  }
15614 	else
15615 #endif /* TARGET_MACHO */
15616 	{
15617 	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15618 	  output_set_got (tmp);
15619 
15620 	  xops[1] = tmp;
15621 	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15622 	  output_asm_insn ("jmp\t{*}%1", xops);
15623 	}
15624     }
15625 }
15626 
15627 static void
x86_file_start(void)15628 x86_file_start (void)
15629 {
15630   default_file_start ();
15631   if (X86_FILE_START_VERSION_DIRECTIVE)
15632     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15633   if (X86_FILE_START_FLTUSED)
15634     fputs ("\t.global\t__fltused\n", asm_out_file);
15635   if (ix86_asm_dialect == ASM_INTEL)
15636     fputs ("\t.intel_syntax\n", asm_out_file);
15637 }
15638 
15639 int
x86_field_alignment(tree field,int computed)15640 x86_field_alignment (tree field, int computed)
15641 {
15642   enum machine_mode mode;
15643   tree type = TREE_TYPE (field);
15644 
15645   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15646     return computed;
15647   mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15648 		    ? get_inner_array_type (type) : type);
15649   if (mode == DFmode || mode == DCmode
15650       || GET_MODE_CLASS (mode) == MODE_INT
15651       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15652     return MIN (32, computed);
15653   return computed;
15654 }
15655 
15656 /* Output assembler code to FILE to increment profiler label # LABELNO
15657    for profiling a function entry.  */
15658 void
x86_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)15659 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15660 {
15661   if (TARGET_64BIT)
15662     if (flag_pic)
15663       {
15664 #ifndef NO_PROFILE_COUNTERS
15665 	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15666 #endif
15667 	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15668       }
15669     else
15670       {
15671 #ifndef NO_PROFILE_COUNTERS
15672 	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15673 #endif
15674 	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15675       }
15676   else if (flag_pic)
15677     {
15678 #ifndef NO_PROFILE_COUNTERS
15679       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15680 	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15681 #endif
15682       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15683     }
15684   else
15685     {
15686 #ifndef NO_PROFILE_COUNTERS
15687       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15688 	       PROFILE_COUNT_REGISTER);
15689 #endif
15690       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15691     }
15692 }
15693 
15694 /* We don't have exact information about the insn sizes, but we may assume
15695    quite safely that we are informed about all 1 byte insns and memory
15696    address sizes.  This is enough to eliminate unnecessary padding in
15697    99% of cases.  */
15698 
15699 static int
min_insn_size(rtx insn)15700 min_insn_size (rtx insn)
15701 {
15702   int l = 0;
15703 
15704   if (!INSN_P (insn) || !active_insn_p (insn))
15705     return 0;
15706 
15707   /* Discard alignments we've emit and jump instructions.  */
15708   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15709       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15710     return 0;
15711   if (GET_CODE (insn) == JUMP_INSN
15712       && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15713 	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15714     return 0;
15715 
15716   /* Important case - calls are always 5 bytes.
15717      It is common to have many calls in the row.  */
15718   if (GET_CODE (insn) == CALL_INSN
15719       && symbolic_reference_mentioned_p (PATTERN (insn))
15720       && !SIBLING_CALL_P (insn))
15721     return 5;
15722   if (get_attr_length (insn) <= 1)
15723     return 1;
15724 
15725   /* For normal instructions we may rely on the sizes of addresses
15726      and the presence of symbol to require 4 bytes of encoding.
15727      This is not the case for jumps where references are PC relative.  */
15728   if (GET_CODE (insn) != JUMP_INSN)
15729     {
15730       l = get_attr_length_address (insn);
15731       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15732 	l = 4;
15733     }
15734   if (l)
15735     return 1+l;
15736   else
15737     return 2;
15738 }
15739 
15740 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15741    window.  */
15742 
15743 static void
k8_avoid_jump_misspredicts(void)15744 k8_avoid_jump_misspredicts (void)
15745 {
15746   rtx insn, start = get_insns ();
15747   int nbytes = 0, njumps = 0;
15748   int isjump = 0;
15749 
15750   /* Look for all minimal intervals of instructions containing 4 jumps.
15751      The intervals are bounded by START and INSN.  NBYTES is the total
15752      size of instructions in the interval including INSN and not including
15753      START.  When the NBYTES is smaller than 16 bytes, it is possible
15754      that the end of START and INSN ends up in the same 16byte page.
15755 
15756      The smallest offset in the page INSN can start is the case where START
15757      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
15758      We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15759      */
15760   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15761     {
15762 
15763       nbytes += min_insn_size (insn);
15764       if (rtl_dump_file)
15765         fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15766 		INSN_UID (insn), min_insn_size (insn));
15767       if ((GET_CODE (insn) == JUMP_INSN
15768 	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
15769 	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15770 	  || GET_CODE (insn) == CALL_INSN)
15771 	njumps++;
15772       else
15773 	continue;
15774 
15775       while (njumps > 3)
15776 	{
15777 	  start = NEXT_INSN (start);
15778 	  if ((GET_CODE (start) == JUMP_INSN
15779 	       && GET_CODE (PATTERN (start)) != ADDR_VEC
15780 	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15781 	      || GET_CODE (start) == CALL_INSN)
15782 	    njumps--, isjump = 1;
15783 	  else
15784 	    isjump = 0;
15785 	  nbytes -= min_insn_size (start);
15786 	}
15787       if (njumps < 0)
15788 	abort ();
15789       if (rtl_dump_file)
15790         fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15791 		INSN_UID (start), INSN_UID (insn), nbytes);
15792 
15793       if (njumps == 3 && isjump && nbytes < 16)
15794 	{
15795 	  int padsize = 15 - nbytes + min_insn_size (insn);
15796 
15797 	  if (rtl_dump_file)
15798 	    fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15799           emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15800 	}
15801     }
15802 }
15803 
15804 /* Implement machine specific optimizations.
15805    At the moment we implement single transformation: AMD Athlon works faster
15806    when RET is not destination of conditional jump or directly preceded
15807    by other jump instruction.  We avoid the penalty by inserting NOP just
15808    before the RET instructions in such cases.  */
15809 static void
ix86_reorg(void)15810 ix86_reorg (void)
15811 {
15812   edge e;
15813 
15814   if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15815     return;
15816   for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15817   {
15818     basic_block bb = e->src;
15819     rtx ret = BB_END (bb);
15820     rtx prev;
15821     bool replace = false;
15822 
15823     if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15824 	|| !maybe_hot_bb_p (bb))
15825       continue;
15826     for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15827       if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15828 	break;
15829     if (prev && GET_CODE (prev) == CODE_LABEL)
15830       {
15831 	edge e;
15832 	for (e = bb->pred; e; e = e->pred_next)
15833 	  if (EDGE_FREQUENCY (e) && e->src->index >= 0
15834 	      && !(e->flags & EDGE_FALLTHRU))
15835 	    replace = true;
15836       }
15837     if (!replace)
15838       {
15839 	prev = prev_active_insn (ret);
15840 	if (prev
15841 	    && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15842 		|| GET_CODE (prev) == CALL_INSN))
15843 	  replace = true;
15844 	/* Empty functions get branch mispredict even when the jump destination
15845 	   is not visible to us.  */
15846 	if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15847 	  replace = true;
15848       }
15849     if (replace)
15850       {
15851         emit_insn_before (gen_return_internal_long (), ret);
15852 	delete_insn (ret);
15853       }
15854   }
15855   k8_avoid_jump_misspredicts ();
15856 }
15857 
15858 /* Return nonzero when QImode register that must be represented via REX prefix
15859    is used.  */
15860 bool
x86_extended_QIreg_mentioned_p(rtx insn)15861 x86_extended_QIreg_mentioned_p (rtx insn)
15862 {
15863   int i;
15864   extract_insn_cached (insn);
15865   for (i = 0; i < recog_data.n_operands; i++)
15866     if (REG_P (recog_data.operand[i])
15867 	&& REGNO (recog_data.operand[i]) >= 4)
15868        return true;
15869   return false;
15870 }
15871 
15872 /* Return nonzero when P points to register encoded via REX prefix.
15873    Called via for_each_rtx.  */
15874 static int
extended_reg_mentioned_1(rtx * p,void * data ATTRIBUTE_UNUSED)15875 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15876 {
15877    unsigned int regno;
15878    if (!REG_P (*p))
15879      return 0;
15880    regno = REGNO (*p);
15881    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15882 }
15883 
15884 /* Return true when INSN mentions register that must be encoded using REX
15885    prefix.  */
15886 bool
x86_extended_reg_mentioned_p(rtx insn)15887 x86_extended_reg_mentioned_p (rtx insn)
15888 {
15889   return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15890 }
15891 
15892 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
15893    optabs would emit if we didn't have TFmode patterns.  */
15894 
15895 void
x86_emit_floatuns(rtx operands[2])15896 x86_emit_floatuns (rtx operands[2])
15897 {
15898   rtx neglab, donelab, i0, i1, f0, in, out;
15899   enum machine_mode mode, inmode;
15900 
15901   inmode = GET_MODE (operands[1]);
15902   if (inmode != SImode
15903       && inmode != DImode)
15904     abort ();
15905 
15906   out = operands[0];
15907   in = force_reg (inmode, operands[1]);
15908   mode = GET_MODE (out);
15909   neglab = gen_label_rtx ();
15910   donelab = gen_label_rtx ();
15911   i1 = gen_reg_rtx (Pmode);
15912   f0 = gen_reg_rtx (mode);
15913 
15914   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15915 
15916   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15917   emit_jump_insn (gen_jump (donelab));
15918   emit_barrier ();
15919 
15920   emit_label (neglab);
15921 
15922   i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15923   i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15924   i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15925   expand_float (f0, i0, 0);
15926   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15927 
15928   emit_label (donelab);
15929 }
15930 
15931 /* Return if we do not know how to pass TYPE solely in registers.  */
15932 bool
ix86_must_pass_in_stack(enum machine_mode mode,tree type)15933 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15934 {
15935    if (default_must_pass_in_stack (mode, type))
15936      return true;
15937    return (!TARGET_64BIT && type && mode == TImode);
15938 }
15939 
15940 /* Initialize vector TARGET via VALS.  */
15941 void
ix86_expand_vector_init(rtx target,rtx vals)15942 ix86_expand_vector_init (rtx target, rtx vals)
15943 {
15944   enum machine_mode mode = GET_MODE (target);
15945   int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15946   int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15947   int i;
15948 
15949   for (i = n_elts - 1; i >= 0; i--)
15950     if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15951 	&& GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15952       break;
15953 
15954   /* Few special cases first...
15955      ... constants are best loaded from constant pool.  */
15956   if (i < 0)
15957     {
15958       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15959       return;
15960     }
15961 
15962   /* ... values where only first field is non-constant are best loaded
15963      from the pool and overwriten via move later.  */
15964   if (!i)
15965     {
15966       rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15967 				    GET_MODE_INNER (mode), 0);
15968 
15969       op = force_reg (mode, op);
15970       XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15971       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15972       switch (GET_MODE (target))
15973 	{
15974 	  case V2DFmode:
15975 	    emit_insn (gen_sse2_movsd (target, target, op));
15976 	    break;
15977 	  case V4SFmode:
15978 	    emit_insn (gen_sse_movss (target, target, op));
15979 	    break;
15980 	  default:
15981 	    break;
15982 	}
15983       return;
15984     }
15985 
15986   /* And the busy sequence doing rotations.  */
15987   switch (GET_MODE (target))
15988     {
15989       case V2DFmode:
15990 	{
15991 	  rtx vecop0 =
15992 	    simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15993 	  rtx vecop1 =
15994 	    simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15995 
15996 	  vecop0 = force_reg (V2DFmode, vecop0);
15997 	  vecop1 = force_reg (V2DFmode, vecop1);
15998 	  emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15999 	}
16000 	break;
16001       case V4SFmode:
16002 	{
16003 	  rtx vecop0 =
16004 	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16005 	  rtx vecop1 =
16006 	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16007 	  rtx vecop2 =
16008 	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16009 	  rtx vecop3 =
16010 	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16011 	  rtx tmp1 = gen_reg_rtx (V4SFmode);
16012 	  rtx tmp2 = gen_reg_rtx (V4SFmode);
16013 
16014 	  vecop0 = force_reg (V4SFmode, vecop0);
16015 	  vecop1 = force_reg (V4SFmode, vecop1);
16016 	  vecop2 = force_reg (V4SFmode, vecop2);
16017 	  vecop3 = force_reg (V4SFmode, vecop3);
16018 	  emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16019 	  emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16020 	  emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16021 	}
16022 	break;
16023       default:
16024 	abort ();
16025     }
16026 }
16027 
16028 #include "gt-i386.h"
16029