1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
93 };
94
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
137 };
138
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
180 };
181
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
223 };
224
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
266 };
267
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
309 };
310
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
352 };
353
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
395 };
396
397 const struct processor_costs *ix86_cost = &pentium_cost;
398
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
407
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
457
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
465
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
487 };
488
489 /* The "default" register map used in 32bit mode. */
490
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
500 };
501
502 static int const x86_64_int_parameter_registers[6] =
503 {
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507
508 static int const x86_64_int_return_registers[4] =
509 {
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523 };
524
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
588 };
589
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595
596 /* The encoding characters for the four TLS models present in ELF. */
597
598 static char const tls_model_chars[] = " GLil";
599
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
606 {
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
611 };
612
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615
616 /* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635 struct ix86_frame
636 {
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650
651 HOST_WIDE_INT local_size;
652 };
653
654 /* Used to enable/disable debugging features. */
655 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
656 /* Code model option as passed by user. */
657 const char *ix86_cmodel_string;
658 /* Parsed value. */
659 enum cmodel ix86_cmodel;
660 /* Asm dialect. */
661 const char *ix86_asm_string;
662 enum asm_dialect ix86_asm_dialect = ASM_ATT;
663 /* TLS dialext. */
664 const char *ix86_tls_dialect_string;
665 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
666
667 /* Which unit we are generating floating point math for. */
668 enum fpmath_unit ix86_fpmath;
669
670 /* Which cpu are we scheduling for. */
671 enum processor_type ix86_cpu;
672 /* Which instruction set architecture to use. */
673 enum processor_type ix86_arch;
674
675 /* Strings to hold which cpu and instruction set architecture to use. */
676 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
677 const char *ix86_arch_string; /* for -march=<xxx> */
678 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
679
680 /* # of registers to use to pass arguments. */
681 const char *ix86_regparm_string;
682
683 /* true if sse prefetch instruction is not NOOP. */
684 int x86_prefetch_sse;
685
686 /* ix86_regparm_string as a number */
687 int ix86_regparm;
688
689 /* Alignment to use for loops and jumps: */
690
691 /* Power of two alignment for loops. */
692 const char *ix86_align_loops_string;
693
694 /* Power of two alignment for non-loop jumps. */
695 const char *ix86_align_jumps_string;
696
697 /* Power of two alignment for stack boundary in bytes. */
698 const char *ix86_preferred_stack_boundary_string;
699
700 /* Preferred alignment for stack boundary in bits. */
701 int ix86_preferred_stack_boundary;
702
703 /* Values 1-5: see jump.c */
704 int ix86_branch_cost;
705 const char *ix86_branch_cost_string;
706
707 /* Power of two alignment for functions. */
708 const char *ix86_align_funcs_string;
709
710 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
711 static char internal_label_prefix[16];
712 static int internal_label_prefix_len;
713
714 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
715 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
716 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
717 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
718 int, int, FILE *));
719 static const char *get_some_local_dynamic_name PARAMS ((void));
720 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
721 static rtx maybe_get_pool_constant PARAMS ((rtx));
722 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
723 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
724 rtx *, rtx *));
725 static bool ix86_fixed_condition_code_regs PARAMS ((unsigned int *,
726 unsigned int *));
727 static enum machine_mode ix86_cc_modes_compatible PARAMS ((enum machine_mode,
728 enum machine_mode));
729 static rtx get_thread_pointer PARAMS ((void));
730 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
731 static rtx gen_push PARAMS ((rtx));
732 static int memory_address_length PARAMS ((rtx addr));
733 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
734 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
735 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
736 static void ix86_dump_ppro_packet PARAMS ((FILE *));
737 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
738 static struct machine_function * ix86_init_machine_status PARAMS ((void));
739 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
740 static int ix86_nsaved_regs PARAMS ((void));
741 static void ix86_emit_save_regs PARAMS ((void));
742 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
743 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
744 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
745 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
746 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
747 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
748 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
749 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
750 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
751 static int ix86_issue_rate PARAMS ((void));
752 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
753 static void ix86_sched_init PARAMS ((FILE *, int, int));
754 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
755 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
756 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
757 static int ia32_multipass_dfa_lookahead PARAMS ((void));
758 static void ix86_init_mmx_sse_builtins PARAMS ((void));
759 static rtx x86_this_parameter PARAMS ((tree));
760 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
761 HOST_WIDE_INT, tree));
762 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
763 HOST_WIDE_INT, tree));
764
765 struct ix86_address
766 {
767 rtx base, index, disp;
768 HOST_WIDE_INT scale;
769 };
770
771 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
772 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
773
774 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
775 static const char *ix86_strip_name_encoding PARAMS ((const char *))
776 ATTRIBUTE_UNUSED;
777
778 struct builtin_description;
779 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
780 tree, rtx));
781 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
782 tree, rtx));
783 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
784 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
785 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
786 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
787 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
788 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
789 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
790 enum rtx_code *,
791 enum rtx_code *,
792 enum rtx_code *));
793 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
794 rtx *, rtx *));
795 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
796 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
797 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
798 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
799 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
800 static int ix86_save_reg PARAMS ((unsigned int, int));
801 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
802 static int ix86_comp_type_attributes PARAMS ((tree, tree));
803 static int ix86_fntype_regparm PARAMS ((tree));
804 const struct attribute_spec ix86_attribute_table[];
805 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
806 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
807 static int ix86_value_regno PARAMS ((enum machine_mode));
808 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
809
810 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
811 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
812 #endif
813
814 /* Register class used for passing given 64bit part of the argument.
815 These represent classes as documented by the PS ABI, with the exception
816 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
817 use SF or DFmode move instead of DImode to avoid reformating penalties.
818
819 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
820 whenever possible (upper half does contain padding).
821 */
822 enum x86_64_reg_class
823 {
824 X86_64_NO_CLASS,
825 X86_64_INTEGER_CLASS,
826 X86_64_INTEGERSI_CLASS,
827 X86_64_SSE_CLASS,
828 X86_64_SSESF_CLASS,
829 X86_64_SSEDF_CLASS,
830 X86_64_SSEUP_CLASS,
831 X86_64_X87_CLASS,
832 X86_64_X87UP_CLASS,
833 X86_64_MEMORY_CLASS
834 };
835 static const char * const x86_64_reg_class_name[] =
836 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
837
838 #define MAX_CLASSES 4
839 static int classify_argument PARAMS ((enum machine_mode, tree,
840 enum x86_64_reg_class [MAX_CLASSES],
841 int));
842 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
843 int *));
844 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
845 const int *, int));
846 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
847 enum x86_64_reg_class));
848
849 /* Initialize the GCC target structure. */
850 #undef TARGET_ATTRIBUTE_TABLE
851 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
852 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
853 # undef TARGET_MERGE_DECL_ATTRIBUTES
854 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
855 #endif
856
857 #undef TARGET_COMP_TYPE_ATTRIBUTES
858 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
859
860 #undef TARGET_INIT_BUILTINS
861 #define TARGET_INIT_BUILTINS ix86_init_builtins
862
863 #undef TARGET_EXPAND_BUILTIN
864 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
865
866 #undef TARGET_ASM_FUNCTION_EPILOGUE
867 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
868
869 #undef TARGET_ASM_OPEN_PAREN
870 #define TARGET_ASM_OPEN_PAREN ""
871 #undef TARGET_ASM_CLOSE_PAREN
872 #define TARGET_ASM_CLOSE_PAREN ""
873
874 #undef TARGET_ASM_ALIGNED_HI_OP
875 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
876 #undef TARGET_ASM_ALIGNED_SI_OP
877 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
878 #ifdef ASM_QUAD
879 #undef TARGET_ASM_ALIGNED_DI_OP
880 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
881 #endif
882
883 #undef TARGET_ASM_UNALIGNED_HI_OP
884 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
885 #undef TARGET_ASM_UNALIGNED_SI_OP
886 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
887 #undef TARGET_ASM_UNALIGNED_DI_OP
888 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
889
890 #undef TARGET_SCHED_ADJUST_COST
891 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
892 #undef TARGET_SCHED_ISSUE_RATE
893 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
894 #undef TARGET_SCHED_VARIABLE_ISSUE
895 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
896 #undef TARGET_SCHED_INIT
897 #define TARGET_SCHED_INIT ix86_sched_init
898 #undef TARGET_SCHED_REORDER
899 #define TARGET_SCHED_REORDER ix86_sched_reorder
900 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
901 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
902 ia32_use_dfa_pipeline_interface
903 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
904 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
905 ia32_multipass_dfa_lookahead
906
907 #ifdef HAVE_AS_TLS
908 #undef TARGET_HAVE_TLS
909 #define TARGET_HAVE_TLS true
910 #endif
911 #undef TARGET_CANNOT_FORCE_CONST_MEM
912 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
913
914 #undef TARGET_ASM_OUTPUT_MI_THUNK
915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
918
919 #undef TARGET_FIXED_CONDITION_CODE_REGS
920 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
921 #undef TARGET_CC_MODES_COMPATIBLE
922 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
923
924 struct gcc_target targetm = TARGET_INITIALIZER;
925
926 /* The svr4 ABI for the i386 says that records and unions are returned
927 in memory. */
928 #ifndef DEFAULT_PCC_STRUCT_RETURN
929 #define DEFAULT_PCC_STRUCT_RETURN 1
930 #endif
931
932 /* Sometimes certain combinations of command options do not make
933 sense on a particular target machine. You can define a macro
934 `OVERRIDE_OPTIONS' to take account of this. This macro, if
935 defined, is executed once just after all the command options have
936 been parsed.
937
938 Don't use this macro to turn on various extra optimizations for
939 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
940
941 void
override_options()942 override_options ()
943 {
944 int i;
945 /* Comes from final.c -- no real reason to change it. */
946 #define MAX_CODE_ALIGN 16
947
948 static struct ptt
949 {
950 const struct processor_costs *cost; /* Processor costs */
951 const int target_enable; /* Target flags to enable. */
952 const int target_disable; /* Target flags to disable. */
953 const int align_loop; /* Default alignments. */
954 const int align_loop_max_skip;
955 const int align_jump;
956 const int align_jump_max_skip;
957 const int align_func;
958 const int branch_cost;
959 }
960 const processor_target_table[PROCESSOR_max] =
961 {
962 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
963 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
964 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
965 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
966 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
967 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
968 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
969 };
970
971 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
972 static struct pta
973 {
974 const char *const name; /* processor name or nickname. */
975 const enum processor_type processor;
976 const enum pta_flags
977 {
978 PTA_SSE = 1,
979 PTA_SSE2 = 2,
980 PTA_SSE3 = 4,
981 PTA_MMX = 8,
982 PTA_PREFETCH_SSE = 16,
983 PTA_3DNOW = 32,
984 PTA_3DNOW_A = 64
985 } flags;
986 }
987 const processor_alias_table[] =
988 {
989 {"i386", PROCESSOR_I386, 0},
990 {"i486", PROCESSOR_I486, 0},
991 {"i586", PROCESSOR_PENTIUM, 0},
992 {"pentium", PROCESSOR_PENTIUM, 0},
993 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
994 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
995 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
996 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
997 {"i686", PROCESSOR_PENTIUMPRO, 0},
998 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
999 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1000 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1001 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1002 | PTA_MMX | PTA_PREFETCH_SSE},
1003 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1004 | PTA_MMX | PTA_PREFETCH_SSE},
1005 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1006 | PTA_MMX | PTA_PREFETCH_SSE},
1007 {"k6", PROCESSOR_K6, PTA_MMX},
1008 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1009 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1010 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1011 | PTA_3DNOW_A},
1012 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1013 | PTA_3DNOW | PTA_3DNOW_A},
1014 {"x86-64", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1015 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1016 | PTA_3DNOW_A | PTA_SSE},
1017 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1018 | PTA_3DNOW_A | PTA_SSE},
1019 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1020 | PTA_3DNOW_A | PTA_SSE},
1021 };
1022
1023 int const pta_size = ARRAY_SIZE (processor_alias_table);
1024
1025 /* By default our XFmode is the 80-bit extended format. If we have
1026 use TFmode instead, it's also the 80-bit format, but with padding. */
1027 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1028 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1029
1030 /* Set the default values for switches whose default depends on TARGET_64BIT
1031 in case they weren't overwriten by command line options. */
1032 if (TARGET_64BIT)
1033 {
1034 if (flag_omit_frame_pointer == 2)
1035 flag_omit_frame_pointer = 1;
1036 if (flag_asynchronous_unwind_tables == 2)
1037 flag_asynchronous_unwind_tables = 1;
1038 if (flag_pcc_struct_return == 2)
1039 flag_pcc_struct_return = 0;
1040 }
1041 else
1042 {
1043 if (flag_omit_frame_pointer == 2)
1044 flag_omit_frame_pointer = 0;
1045 if (flag_asynchronous_unwind_tables == 2)
1046 flag_asynchronous_unwind_tables = 0;
1047 if (flag_pcc_struct_return == 2)
1048 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1049 }
1050
1051 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1052 SUBTARGET_OVERRIDE_OPTIONS;
1053 #endif
1054
1055 if (!ix86_cpu_string && ix86_arch_string)
1056 ix86_cpu_string = ix86_arch_string;
1057 if (!ix86_cpu_string)
1058 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1059 if (!ix86_arch_string)
1060 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1061
1062 if (ix86_cmodel_string != 0)
1063 {
1064 if (!strcmp (ix86_cmodel_string, "small"))
1065 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1066 else if (flag_pic)
1067 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1068 else if (!strcmp (ix86_cmodel_string, "32"))
1069 ix86_cmodel = CM_32;
1070 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1071 ix86_cmodel = CM_KERNEL;
1072 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1073 ix86_cmodel = CM_MEDIUM;
1074 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1075 ix86_cmodel = CM_LARGE;
1076 else
1077 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1078 }
1079 else
1080 {
1081 ix86_cmodel = CM_32;
1082 if (TARGET_64BIT)
1083 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1084 }
1085 if (ix86_asm_string != 0)
1086 {
1087 if (!strcmp (ix86_asm_string, "intel"))
1088 ix86_asm_dialect = ASM_INTEL;
1089 else if (!strcmp (ix86_asm_string, "att"))
1090 ix86_asm_dialect = ASM_ATT;
1091 else
1092 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1093 }
1094 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1095 error ("code model `%s' not supported in the %s bit mode",
1096 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1097 if (ix86_cmodel == CM_LARGE)
1098 sorry ("code model `large' not supported yet");
1099 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1100 sorry ("%i-bit mode not compiled in",
1101 (target_flags & MASK_64BIT) ? 64 : 32);
1102
1103 for (i = 0; i < pta_size; i++)
1104 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1105 {
1106 ix86_arch = processor_alias_table[i].processor;
1107 /* Default cpu tuning to the architecture. */
1108 ix86_cpu = ix86_arch;
1109 if (processor_alias_table[i].flags & PTA_MMX
1110 && !(target_flags_explicit & MASK_MMX))
1111 target_flags |= MASK_MMX;
1112 if (processor_alias_table[i].flags & PTA_3DNOW
1113 && !(target_flags_explicit & MASK_3DNOW))
1114 target_flags |= MASK_3DNOW;
1115 if (processor_alias_table[i].flags & PTA_3DNOW_A
1116 && !(target_flags_explicit & MASK_3DNOW_A))
1117 target_flags |= MASK_3DNOW_A;
1118 if (processor_alias_table[i].flags & PTA_SSE
1119 && !(target_flags_explicit & MASK_SSE))
1120 target_flags |= MASK_SSE;
1121 if (processor_alias_table[i].flags & PTA_SSE2
1122 && !(target_flags_explicit & MASK_SSE2))
1123 target_flags |= MASK_SSE2;
1124 if (processor_alias_table[i].flags & PTA_SSE3
1125 && !(target_flags_explicit & MASK_SSE3))
1126 target_flags |= MASK_SSE3;
1127 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1128 x86_prefetch_sse = true;
1129 break;
1130 }
1131
1132 if (i == pta_size)
1133 error ("bad value (%s) for -march= switch", ix86_arch_string);
1134
1135 for (i = 0; i < pta_size; i++)
1136 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1137 {
1138 ix86_cpu = processor_alias_table[i].processor;
1139 break;
1140 }
1141 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1142 x86_prefetch_sse = true;
1143 if (i == pta_size)
1144 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1145
1146 if (optimize_size)
1147 ix86_cost = &size_cost;
1148 else
1149 ix86_cost = processor_target_table[ix86_cpu].cost;
1150 target_flags |= processor_target_table[ix86_cpu].target_enable;
1151 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1152
1153 /* Arrange to set up i386_stack_locals for all functions. */
1154 init_machine_status = ix86_init_machine_status;
1155
1156 /* Validate -mregparm= value. */
1157 if (ix86_regparm_string)
1158 {
1159 i = atoi (ix86_regparm_string);
1160 if (i < 0 || i > REGPARM_MAX)
1161 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1162 else
1163 ix86_regparm = i;
1164 }
1165 else
1166 if (TARGET_64BIT)
1167 ix86_regparm = REGPARM_MAX;
1168
1169 /* If the user has provided any of the -malign-* options,
1170 warn and use that value only if -falign-* is not set.
1171 Remove this code in GCC 3.2 or later. */
1172 if (ix86_align_loops_string)
1173 {
1174 warning ("-malign-loops is obsolete, use -falign-loops");
1175 if (align_loops == 0)
1176 {
1177 i = atoi (ix86_align_loops_string);
1178 if (i < 0 || i > MAX_CODE_ALIGN)
1179 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1180 else
1181 align_loops = 1 << i;
1182 }
1183 }
1184
1185 if (ix86_align_jumps_string)
1186 {
1187 warning ("-malign-jumps is obsolete, use -falign-jumps");
1188 if (align_jumps == 0)
1189 {
1190 i = atoi (ix86_align_jumps_string);
1191 if (i < 0 || i > MAX_CODE_ALIGN)
1192 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1193 else
1194 align_jumps = 1 << i;
1195 }
1196 }
1197
1198 if (ix86_align_funcs_string)
1199 {
1200 warning ("-malign-functions is obsolete, use -falign-functions");
1201 if (align_functions == 0)
1202 {
1203 i = atoi (ix86_align_funcs_string);
1204 if (i < 0 || i > MAX_CODE_ALIGN)
1205 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1206 else
1207 align_functions = 1 << i;
1208 }
1209 }
1210
1211 /* Default align_* from the processor table. */
1212 if (align_loops == 0)
1213 {
1214 align_loops = processor_target_table[ix86_cpu].align_loop;
1215 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1216 }
1217 if (align_jumps == 0)
1218 {
1219 align_jumps = processor_target_table[ix86_cpu].align_jump;
1220 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1221 }
1222 if (align_functions == 0)
1223 {
1224 align_functions = processor_target_table[ix86_cpu].align_func;
1225 }
1226
1227 /* Validate -mpreferred-stack-boundary= value, or provide default.
1228 The default of 128 bits is for Pentium III's SSE __m128, but we
1229 don't want additional code to keep the stack aligned when
1230 optimizing for code size. */
1231 ix86_preferred_stack_boundary = (optimize_size
1232 ? TARGET_64BIT ? 128 : 32
1233 : 128);
1234 if (ix86_preferred_stack_boundary_string)
1235 {
1236 i = atoi (ix86_preferred_stack_boundary_string);
1237 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1238 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1239 TARGET_64BIT ? 4 : 2);
1240 else
1241 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1242 }
1243
1244 /* Validate -mbranch-cost= value, or provide default. */
1245 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1246 if (ix86_branch_cost_string)
1247 {
1248 i = atoi (ix86_branch_cost_string);
1249 if (i < 0 || i > 5)
1250 error ("-mbranch-cost=%d is not between 0 and 5", i);
1251 else
1252 ix86_branch_cost = i;
1253 }
1254
1255 if (ix86_tls_dialect_string)
1256 {
1257 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1258 ix86_tls_dialect = TLS_DIALECT_GNU;
1259 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1260 ix86_tls_dialect = TLS_DIALECT_SUN;
1261 else
1262 error ("bad value (%s) for -mtls-dialect= switch",
1263 ix86_tls_dialect_string);
1264 }
1265
1266 /* Keep nonleaf frame pointers. */
1267 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1268 flag_omit_frame_pointer = 1;
1269
1270 /* If we're doing fast math, we don't care about comparison order
1271 wrt NaNs. This lets us use a shorter comparison sequence. */
1272 if (flag_unsafe_math_optimizations)
1273 target_flags &= ~MASK_IEEE_FP;
1274
1275 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1276 since the insns won't need emulation. */
1277 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1278 target_flags &= ~MASK_NO_FANCY_MATH_387;
1279
1280 /* Turn on SSE2 builtins for -msse3. */
1281 if (TARGET_SSE3)
1282 target_flags |= MASK_SSE2;
1283
1284 /* Turn on SSE builtins for -msse2. */
1285 if (TARGET_SSE2)
1286 target_flags |= MASK_SSE;
1287
1288 if (TARGET_64BIT)
1289 {
1290 if (TARGET_ALIGN_DOUBLE)
1291 error ("-malign-double makes no sense in the 64bit mode");
1292 if (TARGET_RTD)
1293 error ("-mrtd calling convention not supported in the 64bit mode");
1294 /* Enable by default the SSE and MMX builtins. */
1295 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1296 ix86_fpmath = FPMATH_SSE;
1297 }
1298 else
1299 ix86_fpmath = FPMATH_387;
1300
1301 if (ix86_fpmath_string != 0)
1302 {
1303 if (! strcmp (ix86_fpmath_string, "387"))
1304 ix86_fpmath = FPMATH_387;
1305 else if (! strcmp (ix86_fpmath_string, "sse"))
1306 {
1307 if (!TARGET_SSE)
1308 {
1309 warning ("SSE instruction set disabled, using 387 arithmetics");
1310 ix86_fpmath = FPMATH_387;
1311 }
1312 else
1313 ix86_fpmath = FPMATH_SSE;
1314 }
1315 else if (! strcmp (ix86_fpmath_string, "387,sse")
1316 || ! strcmp (ix86_fpmath_string, "sse,387"))
1317 {
1318 if (!TARGET_SSE)
1319 {
1320 warning ("SSE instruction set disabled, using 387 arithmetics");
1321 ix86_fpmath = FPMATH_387;
1322 }
1323 else if (!TARGET_80387)
1324 {
1325 warning ("387 instruction set disabled, using SSE arithmetics");
1326 ix86_fpmath = FPMATH_SSE;
1327 }
1328 else
1329 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1330 }
1331 else
1332 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1333 }
1334
1335 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1336 on by -msse. */
1337 if (TARGET_SSE)
1338 {
1339 target_flags |= MASK_MMX;
1340 x86_prefetch_sse = true;
1341 }
1342
1343 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1344 if (TARGET_3DNOW)
1345 {
1346 target_flags |= MASK_MMX;
1347 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1348 extensions it adds. */
1349 if (x86_3dnow_a & (1 << ix86_arch))
1350 target_flags |= MASK_3DNOW_A;
1351 }
1352 if ((x86_accumulate_outgoing_args & CPUMASK)
1353 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1354 && !optimize_size)
1355 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1356
1357 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1358 {
1359 char *p;
1360 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1361 p = strchr (internal_label_prefix, 'X');
1362 internal_label_prefix_len = p - internal_label_prefix;
1363 *p = '\0';
1364 }
1365 }
1366
1367 void
optimization_options(level,size)1368 optimization_options (level, size)
1369 int level;
1370 int size ATTRIBUTE_UNUSED;
1371 {
1372 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1373 make the problem with not enough registers even worse. */
1374 #ifdef INSN_SCHEDULING
1375 if (level > 1)
1376 flag_schedule_insns = 0;
1377 #endif
1378
1379 /* The default values of these switches depend on the TARGET_64BIT
1380 that is not known at this moment. Mark these values with 2 and
1381 let user the to override these. In case there is no command line option
1382 specifying them, we will set the defaults in override_options. */
1383 if (optimize >= 1)
1384 flag_omit_frame_pointer = 2;
1385 flag_pcc_struct_return = 2;
1386 flag_asynchronous_unwind_tables = 2;
1387 }
1388
1389 /* Table of valid machine attributes. */
1390 const struct attribute_spec ix86_attribute_table[] =
1391 {
1392 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1393 /* Stdcall attribute says callee is responsible for popping arguments
1394 if they are not variable. */
1395 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1396 /* Cdecl attribute says the callee is a normal C declaration */
1397 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1398 /* Regparm attribute specifies how many integer arguments are to be
1399 passed in registers. */
1400 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1401 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1402 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1403 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1404 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1405 #endif
1406 { NULL, 0, 0, false, false, false, NULL }
1407 };
1408
1409 /* Handle a "cdecl" or "stdcall" attribute;
1410 arguments as in struct attribute_spec.handler. */
1411 static tree
ix86_handle_cdecl_attribute(node,name,args,flags,no_add_attrs)1412 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1413 tree *node;
1414 tree name;
1415 tree args ATTRIBUTE_UNUSED;
1416 int flags ATTRIBUTE_UNUSED;
1417 bool *no_add_attrs;
1418 {
1419 if (TREE_CODE (*node) != FUNCTION_TYPE
1420 && TREE_CODE (*node) != METHOD_TYPE
1421 && TREE_CODE (*node) != FIELD_DECL
1422 && TREE_CODE (*node) != TYPE_DECL)
1423 {
1424 warning ("`%s' attribute only applies to functions",
1425 IDENTIFIER_POINTER (name));
1426 *no_add_attrs = true;
1427 }
1428
1429 if (TARGET_64BIT)
1430 {
1431 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1432 *no_add_attrs = true;
1433 }
1434
1435 return NULL_TREE;
1436 }
1437
1438 /* Handle a "regparm" attribute;
1439 arguments as in struct attribute_spec.handler. */
1440 static tree
ix86_handle_regparm_attribute(node,name,args,flags,no_add_attrs)1441 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1442 tree *node;
1443 tree name;
1444 tree args;
1445 int flags ATTRIBUTE_UNUSED;
1446 bool *no_add_attrs;
1447 {
1448 if (TREE_CODE (*node) != FUNCTION_TYPE
1449 && TREE_CODE (*node) != METHOD_TYPE
1450 && TREE_CODE (*node) != FIELD_DECL
1451 && TREE_CODE (*node) != TYPE_DECL)
1452 {
1453 warning ("`%s' attribute only applies to functions",
1454 IDENTIFIER_POINTER (name));
1455 *no_add_attrs = true;
1456 }
1457 else
1458 {
1459 tree cst;
1460
1461 cst = TREE_VALUE (args);
1462 if (TREE_CODE (cst) != INTEGER_CST)
1463 {
1464 warning ("`%s' attribute requires an integer constant argument",
1465 IDENTIFIER_POINTER (name));
1466 *no_add_attrs = true;
1467 }
1468 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1469 {
1470 warning ("argument to `%s' attribute larger than %d",
1471 IDENTIFIER_POINTER (name), REGPARM_MAX);
1472 *no_add_attrs = true;
1473 }
1474 }
1475
1476 return NULL_TREE;
1477 }
1478
1479 /* Return 0 if the attributes for two types are incompatible, 1 if they
1480 are compatible, and 2 if they are nearly compatible (which causes a
1481 warning to be generated). */
1482
1483 static int
ix86_comp_type_attributes(type1,type2)1484 ix86_comp_type_attributes (type1, type2)
1485 tree type1;
1486 tree type2;
1487 {
1488 /* Check for mismatch of non-default calling convention. */
1489 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1490
1491 if (TREE_CODE (type1) != FUNCTION_TYPE)
1492 return 1;
1493
1494 /* Check for mismatched return types (cdecl vs stdcall). */
1495 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1496 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1497 return 0;
1498 return 1;
1499 }
1500
1501 /* Return the regparm value for a fuctio with the indicated TYPE. */
1502
1503 static int
ix86_fntype_regparm(type)1504 ix86_fntype_regparm (type)
1505 tree type;
1506 {
1507 tree attr;
1508
1509 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1510 if (attr)
1511 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1512 else
1513 return ix86_regparm;
1514 }
1515
1516 /* Value is the number of bytes of arguments automatically
1517 popped when returning from a subroutine call.
1518 FUNDECL is the declaration node of the function (as a tree),
1519 FUNTYPE is the data type of the function (as a tree),
1520 or for a library call it is an identifier node for the subroutine name.
1521 SIZE is the number of bytes of arguments passed on the stack.
1522
1523 On the 80386, the RTD insn may be used to pop them if the number
1524 of args is fixed, but if the number is variable then the caller
1525 must pop them all. RTD can't be used for library calls now
1526 because the library is compiled with the Unix compiler.
1527 Use of RTD is a selectable option, since it is incompatible with
1528 standard Unix calling sequences. If the option is not selected,
1529 the caller must always pop the args.
1530
1531 The attribute stdcall is equivalent to RTD on a per module basis. */
1532
1533 int
ix86_return_pops_args(fundecl,funtype,size)1534 ix86_return_pops_args (fundecl, funtype, size)
1535 tree fundecl;
1536 tree funtype;
1537 int size;
1538 {
1539 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1540
1541 /* Cdecl functions override -mrtd, and never pop the stack. */
1542 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1543
1544 /* Stdcall functions will pop the stack if not variable args. */
1545 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1546 rtd = 1;
1547
1548 if (rtd
1549 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1550 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1551 == void_type_node)))
1552 return size;
1553 }
1554
1555 /* Lose any fake structure return argument if it is passed on the stack. */
1556 if (aggregate_value_p (TREE_TYPE (funtype))
1557 && !TARGET_64BIT)
1558 {
1559 int nregs = ix86_fntype_regparm (funtype);
1560
1561 if (!nregs)
1562 return GET_MODE_SIZE (Pmode);
1563 }
1564
1565 return 0;
1566 }
1567
1568 /* Argument support functions. */
1569
1570 /* Return true when register may be used to pass function parameters. */
1571 bool
ix86_function_arg_regno_p(regno)1572 ix86_function_arg_regno_p (regno)
1573 int regno;
1574 {
1575 int i;
1576 if (!TARGET_64BIT)
1577 return (regno < REGPARM_MAX
1578 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1579 if (SSE_REGNO_P (regno) && TARGET_SSE)
1580 return true;
1581 /* RAX is used as hidden argument to va_arg functions. */
1582 if (!regno)
1583 return true;
1584 for (i = 0; i < REGPARM_MAX; i++)
1585 if (regno == x86_64_int_parameter_registers[i])
1586 return true;
1587 return false;
1588 }
1589
1590 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1591 for a call to a function whose data type is FNTYPE.
1592 For a library call, FNTYPE is 0. */
1593
1594 void
init_cumulative_args(cum,fntype,libname)1595 init_cumulative_args (cum, fntype, libname)
1596 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1597 tree fntype; /* tree ptr for function decl */
1598 rtx libname; /* SYMBOL_REF of library name or 0 */
1599 {
1600 static CUMULATIVE_ARGS zero_cum;
1601 tree param, next_param;
1602
1603 if (TARGET_DEBUG_ARG)
1604 {
1605 fprintf (stderr, "\ninit_cumulative_args (");
1606 if (fntype)
1607 fprintf (stderr, "fntype code = %s, ret code = %s",
1608 tree_code_name[(int) TREE_CODE (fntype)],
1609 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1610 else
1611 fprintf (stderr, "no fntype");
1612
1613 if (libname)
1614 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1615 }
1616
1617 *cum = zero_cum;
1618
1619 /* Set up the number of registers to use for passing arguments. */
1620 cum->nregs = ix86_regparm;
1621 cum->sse_nregs = SSE_REGPARM_MAX;
1622 if (fntype && !TARGET_64BIT)
1623 {
1624 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1625
1626 if (attr)
1627 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1628 }
1629 cum->maybe_vaarg = false;
1630
1631 /* Determine if this function has variable arguments. This is
1632 indicated by the last argument being 'void_type_mode' if there
1633 are no variable arguments. If there are variable arguments, then
1634 we won't pass anything in registers */
1635
1636 if (cum->nregs)
1637 {
1638 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1639 param != 0; param = next_param)
1640 {
1641 next_param = TREE_CHAIN (param);
1642 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1643 {
1644 if (!TARGET_64BIT)
1645 cum->nregs = 0;
1646 cum->maybe_vaarg = true;
1647 }
1648 }
1649 }
1650 if ((!fntype && !libname)
1651 || (fntype && !TYPE_ARG_TYPES (fntype)))
1652 cum->maybe_vaarg = 1;
1653
1654 if (TARGET_DEBUG_ARG)
1655 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1656
1657 return;
1658 }
1659
1660 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1661 of this code is to classify each 8bytes of incoming argument by the register
1662 class and assign registers accordingly. */
1663
1664 /* Return the union class of CLASS1 and CLASS2.
1665 See the x86-64 PS ABI for details. */
1666
1667 static enum x86_64_reg_class
merge_classes(class1,class2)1668 merge_classes (class1, class2)
1669 enum x86_64_reg_class class1, class2;
1670 {
1671 /* Rule #1: If both classes are equal, this is the resulting class. */
1672 if (class1 == class2)
1673 return class1;
1674
1675 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1676 the other class. */
1677 if (class1 == X86_64_NO_CLASS)
1678 return class2;
1679 if (class2 == X86_64_NO_CLASS)
1680 return class1;
1681
1682 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1683 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1684 return X86_64_MEMORY_CLASS;
1685
1686 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1687 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1688 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1689 return X86_64_INTEGERSI_CLASS;
1690 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1691 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1692 return X86_64_INTEGER_CLASS;
1693
1694 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1695 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1696 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1697 return X86_64_MEMORY_CLASS;
1698
1699 /* Rule #6: Otherwise class SSE is used. */
1700 return X86_64_SSE_CLASS;
1701 }
1702
1703 /* Classify the argument of type TYPE and mode MODE.
1704 CLASSES will be filled by the register class used to pass each word
1705 of the operand. The number of words is returned. In case the parameter
1706 should be passed in memory, 0 is returned. As a special case for zero
1707 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1708
1709 BIT_OFFSET is used internally for handling records and specifies offset
1710 of the offset in bits modulo 256 to avoid overflow cases.
1711
1712 See the x86-64 PS ABI for details.
1713 */
1714
1715 static int
classify_argument(mode,type,classes,bit_offset)1716 classify_argument (mode, type, classes, bit_offset)
1717 enum machine_mode mode;
1718 tree type;
1719 enum x86_64_reg_class classes[MAX_CLASSES];
1720 int bit_offset;
1721 {
1722 int bytes =
1723 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1724 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1725
1726 /* Variable sized entities are always passed/returned in memory. */
1727 if (bytes < 0)
1728 return 0;
1729
1730 if (mode != VOIDmode
1731 && MUST_PASS_IN_STACK (mode, type))
1732 return 0;
1733
1734 if (type && AGGREGATE_TYPE_P (type))
1735 {
1736 int i;
1737 tree field;
1738 enum x86_64_reg_class subclasses[MAX_CLASSES];
1739
1740 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1741 if (bytes > 16)
1742 return 0;
1743
1744 for (i = 0; i < words; i++)
1745 classes[i] = X86_64_NO_CLASS;
1746
1747 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1748 signalize memory class, so handle it as special case. */
1749 if (!words)
1750 {
1751 classes[0] = X86_64_NO_CLASS;
1752 return 1;
1753 }
1754
1755 /* Classify each field of record and merge classes. */
1756 if (TREE_CODE (type) == RECORD_TYPE)
1757 {
1758 /* For classes first merge in the field of the subclasses. */
1759 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1760 {
1761 tree bases = TYPE_BINFO_BASETYPES (type);
1762 int n_bases = TREE_VEC_LENGTH (bases);
1763 int basenum;
1764
1765 for (basenum = 0; basenum < n_bases; ++basenum)
1766 {
1767 tree binfo = TREE_VEC_ELT (bases, basenum);
1768 int num;
1769 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1770 tree type = BINFO_TYPE (binfo);
1771
1772 num = classify_argument (TYPE_MODE (type),
1773 type, subclasses,
1774 (offset + bit_offset) % 256);
1775 if (!num)
1776 return 0;
1777 for (i = 0; i < num; i++)
1778 {
1779 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1780 classes[i + pos] =
1781 merge_classes (subclasses[i], classes[i + pos]);
1782 }
1783 }
1784 }
1785 /* And now merge the fields of structure. */
1786 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1787 {
1788 if (TREE_CODE (field) == FIELD_DECL)
1789 {
1790 int num;
1791
1792 /* Bitfields are always classified as integer. Handle them
1793 early, since later code would consider them to be
1794 misaligned integers. */
1795 if (DECL_BIT_FIELD (field))
1796 {
1797 for (i = int_bit_position (field) / 8 / 8;
1798 i < (int_bit_position (field)
1799 + tree_low_cst (DECL_SIZE (field), 0)
1800 + 63) / 8 / 8; i++)
1801 classes[i] =
1802 merge_classes (X86_64_INTEGER_CLASS,
1803 classes[i]);
1804 }
1805 else
1806 {
1807 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1808 TREE_TYPE (field), subclasses,
1809 (int_bit_position (field)
1810 + bit_offset) % 256);
1811 if (!num)
1812 return 0;
1813 for (i = 0; i < num; i++)
1814 {
1815 int pos =
1816 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1817 classes[i + pos] =
1818 merge_classes (subclasses[i], classes[i + pos]);
1819 }
1820 }
1821 }
1822 }
1823 }
1824 /* Arrays are handled as small records. */
1825 else if (TREE_CODE (type) == ARRAY_TYPE)
1826 {
1827 int num;
1828 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1829 TREE_TYPE (type), subclasses, bit_offset);
1830 if (!num)
1831 return 0;
1832
1833 /* The partial classes are now full classes. */
1834 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1835 subclasses[0] = X86_64_SSE_CLASS;
1836 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1837 subclasses[0] = X86_64_INTEGER_CLASS;
1838
1839 for (i = 0; i < words; i++)
1840 classes[i] = subclasses[i % num];
1841 }
1842 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1843 else if (TREE_CODE (type) == UNION_TYPE
1844 || TREE_CODE (type) == QUAL_UNION_TYPE)
1845 {
1846 /* For classes first merge in the field of the subclasses. */
1847 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1848 {
1849 tree bases = TYPE_BINFO_BASETYPES (type);
1850 int n_bases = TREE_VEC_LENGTH (bases);
1851 int basenum;
1852
1853 for (basenum = 0; basenum < n_bases; ++basenum)
1854 {
1855 tree binfo = TREE_VEC_ELT (bases, basenum);
1856 int num;
1857 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1858 tree type = BINFO_TYPE (binfo);
1859
1860 num = classify_argument (TYPE_MODE (type),
1861 type, subclasses,
1862 (offset + (bit_offset % 64)) % 256);
1863 if (!num)
1864 return 0;
1865 for (i = 0; i < num; i++)
1866 {
1867 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1868 classes[i + pos] =
1869 merge_classes (subclasses[i], classes[i + pos]);
1870 }
1871 }
1872 }
1873 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1874 {
1875 if (TREE_CODE (field) == FIELD_DECL)
1876 {
1877 int num;
1878 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1879 TREE_TYPE (field), subclasses,
1880 bit_offset);
1881 if (!num)
1882 return 0;
1883 for (i = 0; i < num; i++)
1884 classes[i] = merge_classes (subclasses[i], classes[i]);
1885 }
1886 }
1887 }
1888 else
1889 abort ();
1890
1891 /* Final merger cleanup. */
1892 for (i = 0; i < words; i++)
1893 {
1894 /* If one class is MEMORY, everything should be passed in
1895 memory. */
1896 if (classes[i] == X86_64_MEMORY_CLASS)
1897 return 0;
1898
1899 /* The X86_64_SSEUP_CLASS should be always preceded by
1900 X86_64_SSE_CLASS. */
1901 if (classes[i] == X86_64_SSEUP_CLASS
1902 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1903 classes[i] = X86_64_SSE_CLASS;
1904
1905 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1906 if (classes[i] == X86_64_X87UP_CLASS
1907 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1908 classes[i] = X86_64_SSE_CLASS;
1909 }
1910 return words;
1911 }
1912
1913 /* Compute alignment needed. We align all types to natural boundaries with
1914 exception of XFmode that is aligned to 64bits. */
1915 if (mode != VOIDmode && mode != BLKmode)
1916 {
1917 int mode_alignment = GET_MODE_BITSIZE (mode);
1918
1919 if (mode == XFmode)
1920 mode_alignment = 128;
1921 else if (mode == XCmode)
1922 mode_alignment = 256;
1923 if (COMPLEX_MODE_P (mode))
1924 mode_alignment /= 2;
1925 /* Misaligned fields are always returned in memory. */
1926 if (bit_offset % mode_alignment)
1927 return 0;
1928 }
1929
1930 /* Classification of atomic types. */
1931 switch (mode)
1932 {
1933 case DImode:
1934 case SImode:
1935 case HImode:
1936 case QImode:
1937 case CSImode:
1938 case CHImode:
1939 case CQImode:
1940 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1941 classes[0] = X86_64_INTEGERSI_CLASS;
1942 else
1943 classes[0] = X86_64_INTEGER_CLASS;
1944 return 1;
1945 case CDImode:
1946 case TImode:
1947 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1948 return 2;
1949 case CTImode:
1950 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1951 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1952 return 4;
1953 case SFmode:
1954 if (!(bit_offset % 64))
1955 classes[0] = X86_64_SSESF_CLASS;
1956 else
1957 classes[0] = X86_64_SSE_CLASS;
1958 return 1;
1959 case DFmode:
1960 classes[0] = X86_64_SSEDF_CLASS;
1961 return 1;
1962 case TFmode:
1963 classes[0] = X86_64_X87_CLASS;
1964 classes[1] = X86_64_X87UP_CLASS;
1965 return 2;
1966 case TCmode:
1967 classes[0] = X86_64_X87_CLASS;
1968 classes[1] = X86_64_X87UP_CLASS;
1969 classes[2] = X86_64_X87_CLASS;
1970 classes[3] = X86_64_X87UP_CLASS;
1971 return 4;
1972 case DCmode:
1973 classes[0] = X86_64_SSEDF_CLASS;
1974 classes[1] = X86_64_SSEDF_CLASS;
1975 return 2;
1976 case SCmode:
1977 classes[0] = X86_64_SSE_CLASS;
1978 return 1;
1979 case V4SFmode:
1980 case V4SImode:
1981 case V16QImode:
1982 case V8HImode:
1983 case V2DFmode:
1984 case V2DImode:
1985 classes[0] = X86_64_SSE_CLASS;
1986 classes[1] = X86_64_SSEUP_CLASS;
1987 return 2;
1988 case V2SFmode:
1989 case V2SImode:
1990 case V4HImode:
1991 case V8QImode:
1992 return 0;
1993 case BLKmode:
1994 case VOIDmode:
1995 return 0;
1996 default:
1997 abort ();
1998 }
1999 }
2000
2001 /* Examine the argument and return set number of register required in each
2002 class. Return 0 iff parameter should be passed in memory. */
2003 static int
examine_argument(mode,type,in_return,int_nregs,sse_nregs)2004 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2005 enum machine_mode mode;
2006 tree type;
2007 int *int_nregs, *sse_nregs;
2008 int in_return;
2009 {
2010 enum x86_64_reg_class class[MAX_CLASSES];
2011 int n = classify_argument (mode, type, class, 0);
2012
2013 *int_nregs = 0;
2014 *sse_nregs = 0;
2015 if (!n)
2016 return 0;
2017 for (n--; n >= 0; n--)
2018 switch (class[n])
2019 {
2020 case X86_64_INTEGER_CLASS:
2021 case X86_64_INTEGERSI_CLASS:
2022 (*int_nregs)++;
2023 break;
2024 case X86_64_SSE_CLASS:
2025 case X86_64_SSESF_CLASS:
2026 case X86_64_SSEDF_CLASS:
2027 (*sse_nregs)++;
2028 break;
2029 case X86_64_NO_CLASS:
2030 case X86_64_SSEUP_CLASS:
2031 break;
2032 case X86_64_X87_CLASS:
2033 case X86_64_X87UP_CLASS:
2034 if (!in_return)
2035 return 0;
2036 break;
2037 case X86_64_MEMORY_CLASS:
2038 abort ();
2039 }
2040 return 1;
2041 }
2042 /* Construct container for the argument used by GCC interface. See
2043 FUNCTION_ARG for the detailed description. */
2044 static rtx
construct_container(mode,type,in_return,nintregs,nsseregs,intreg,sse_regno)2045 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2046 enum machine_mode mode;
2047 tree type;
2048 int in_return;
2049 int nintregs, nsseregs;
2050 const int * intreg;
2051 int sse_regno;
2052 {
2053 enum machine_mode tmpmode;
2054 int bytes =
2055 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2056 enum x86_64_reg_class class[MAX_CLASSES];
2057 int n;
2058 int i;
2059 int nexps = 0;
2060 int needed_sseregs, needed_intregs;
2061 rtx exp[MAX_CLASSES];
2062 rtx ret;
2063
2064 n = classify_argument (mode, type, class, 0);
2065 if (TARGET_DEBUG_ARG)
2066 {
2067 if (!n)
2068 fprintf (stderr, "Memory class\n");
2069 else
2070 {
2071 fprintf (stderr, "Classes:");
2072 for (i = 0; i < n; i++)
2073 {
2074 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2075 }
2076 fprintf (stderr, "\n");
2077 }
2078 }
2079 if (!n)
2080 return NULL;
2081 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2082 return NULL;
2083 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2084 return NULL;
2085
2086 /* First construct simple cases. Avoid SCmode, since we want to use
2087 single register to pass this type. */
2088 if (n == 1 && mode != SCmode)
2089 switch (class[0])
2090 {
2091 case X86_64_INTEGER_CLASS:
2092 case X86_64_INTEGERSI_CLASS:
2093 return gen_rtx_REG (mode, intreg[0]);
2094 case X86_64_SSE_CLASS:
2095 case X86_64_SSESF_CLASS:
2096 case X86_64_SSEDF_CLASS:
2097 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2098 case X86_64_X87_CLASS:
2099 return gen_rtx_REG (mode, FIRST_STACK_REG);
2100 case X86_64_NO_CLASS:
2101 /* Zero sized array, struct or class. */
2102 return NULL;
2103 default:
2104 abort ();
2105 }
2106 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2107 && mode != BLKmode)
2108 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2109 if (n == 2
2110 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2111 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2112 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2113 && class[1] == X86_64_INTEGER_CLASS
2114 && (mode == CDImode || mode == TImode)
2115 && intreg[0] + 1 == intreg[1])
2116 return gen_rtx_REG (mode, intreg[0]);
2117 if (n == 4
2118 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2119 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2120 && mode != BLKmode)
2121 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2122
2123 /* Otherwise figure out the entries of the PARALLEL. */
2124 for (i = 0; i < n; i++)
2125 {
2126 switch (class[i])
2127 {
2128 case X86_64_NO_CLASS:
2129 break;
2130 case X86_64_INTEGER_CLASS:
2131 case X86_64_INTEGERSI_CLASS:
2132 /* Merge TImodes on aligned occassions here too. */
2133 if (i * 8 + 8 > bytes)
2134 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2135 else if (class[i] == X86_64_INTEGERSI_CLASS)
2136 tmpmode = SImode;
2137 else
2138 tmpmode = DImode;
2139 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2140 if (tmpmode == BLKmode)
2141 tmpmode = DImode;
2142 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2143 gen_rtx_REG (tmpmode, *intreg),
2144 GEN_INT (i*8));
2145 intreg++;
2146 break;
2147 case X86_64_SSESF_CLASS:
2148 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2149 gen_rtx_REG (SFmode,
2150 SSE_REGNO (sse_regno)),
2151 GEN_INT (i*8));
2152 sse_regno++;
2153 break;
2154 case X86_64_SSEDF_CLASS:
2155 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2156 gen_rtx_REG (DFmode,
2157 SSE_REGNO (sse_regno)),
2158 GEN_INT (i*8));
2159 sse_regno++;
2160 break;
2161 case X86_64_SSE_CLASS:
2162 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2163 tmpmode = TImode;
2164 else
2165 tmpmode = DImode;
2166 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2167 gen_rtx_REG (tmpmode,
2168 SSE_REGNO (sse_regno)),
2169 GEN_INT (i*8));
2170 if (tmpmode == TImode)
2171 i++;
2172 sse_regno++;
2173 break;
2174 default:
2175 abort ();
2176 }
2177 }
2178 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2179 for (i = 0; i < nexps; i++)
2180 XVECEXP (ret, 0, i) = exp [i];
2181 return ret;
2182 }
2183
2184 /* Update the data in CUM to advance over an argument
2185 of mode MODE and data type TYPE.
2186 (TYPE is null for libcalls where that information may not be available.) */
2187
2188 void
function_arg_advance(cum,mode,type,named)2189 function_arg_advance (cum, mode, type, named)
2190 CUMULATIVE_ARGS *cum; /* current arg information */
2191 enum machine_mode mode; /* current arg mode */
2192 tree type; /* type of the argument or 0 if lib support */
2193 int named; /* whether or not the argument was named */
2194 {
2195 int bytes =
2196 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2197 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2198
2199 if (TARGET_DEBUG_ARG)
2200 fprintf (stderr,
2201 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2202 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2203 if (TARGET_64BIT)
2204 {
2205 int int_nregs, sse_nregs;
2206 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2207 cum->words += words;
2208 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2209 {
2210 cum->nregs -= int_nregs;
2211 cum->sse_nregs -= sse_nregs;
2212 cum->regno += int_nregs;
2213 cum->sse_regno += sse_nregs;
2214 }
2215 else
2216 cum->words += words;
2217 }
2218 else
2219 {
2220 if (TARGET_SSE && mode == TImode)
2221 {
2222 cum->sse_words += words;
2223 cum->sse_nregs -= 1;
2224 cum->sse_regno += 1;
2225 if (cum->sse_nregs <= 0)
2226 {
2227 cum->sse_nregs = 0;
2228 cum->sse_regno = 0;
2229 }
2230 }
2231 else
2232 {
2233 cum->words += words;
2234 cum->nregs -= words;
2235 cum->regno += words;
2236
2237 if (cum->nregs <= 0)
2238 {
2239 cum->nregs = 0;
2240 cum->regno = 0;
2241 }
2242 }
2243 }
2244 return;
2245 }
2246
2247 /* Define where to put the arguments to a function.
2248 Value is zero to push the argument on the stack,
2249 or a hard register in which to store the argument.
2250
2251 MODE is the argument's machine mode.
2252 TYPE is the data type of the argument (as a tree).
2253 This is null for libcalls where that information may
2254 not be available.
2255 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2256 the preceding args and about the function being called.
2257 NAMED is nonzero if this argument is a named parameter
2258 (otherwise it is an extra parameter matching an ellipsis). */
2259
2260 rtx
function_arg(cum,mode,type,named)2261 function_arg (cum, mode, type, named)
2262 CUMULATIVE_ARGS *cum; /* current arg information */
2263 enum machine_mode mode; /* current arg mode */
2264 tree type; /* type of the argument or 0 if lib support */
2265 int named; /* != 0 for normal args, == 0 for ... args */
2266 {
2267 rtx ret = NULL_RTX;
2268 int bytes =
2269 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2270 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2271
2272 /* Handle an hidden AL argument containing number of registers for varargs
2273 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2274 any AL settings. */
2275 if (mode == VOIDmode)
2276 {
2277 if (TARGET_64BIT)
2278 return GEN_INT (cum->maybe_vaarg
2279 ? (cum->sse_nregs < 0
2280 ? SSE_REGPARM_MAX
2281 : cum->sse_regno)
2282 : -1);
2283 else
2284 return constm1_rtx;
2285 }
2286 if (TARGET_64BIT)
2287 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2288 &x86_64_int_parameter_registers [cum->regno],
2289 cum->sse_regno);
2290 else
2291 switch (mode)
2292 {
2293 /* For now, pass fp/complex values on the stack. */
2294 default:
2295 break;
2296
2297 case BLKmode:
2298 if (bytes < 0)
2299 break;
2300 /* FALLTHRU */
2301 case DImode:
2302 case SImode:
2303 case HImode:
2304 case QImode:
2305 if (words <= cum->nregs)
2306 ret = gen_rtx_REG (mode, cum->regno);
2307 break;
2308 case TImode:
2309 if (cum->sse_nregs)
2310 ret = gen_rtx_REG (mode, cum->sse_regno);
2311 break;
2312 }
2313
2314 if (TARGET_DEBUG_ARG)
2315 {
2316 fprintf (stderr,
2317 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2318 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2319
2320 if (ret)
2321 print_simple_rtl (stderr, ret);
2322 else
2323 fprintf (stderr, ", stack");
2324
2325 fprintf (stderr, " )\n");
2326 }
2327
2328 return ret;
2329 }
2330
2331 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2332 ABI */
2333 static bool
contains_128bit_aligned_vector_p(type)2334 contains_128bit_aligned_vector_p (type)
2335 tree type;
2336 {
2337 enum machine_mode mode = TYPE_MODE (type);
2338 if (SSE_REG_MODE_P (mode)
2339 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2340 return true;
2341 if (TYPE_ALIGN (type) < 128)
2342 return false;
2343
2344 if (AGGREGATE_TYPE_P (type))
2345 {
2346 /* Walk the agregates recursivly. */
2347 if (TREE_CODE (type) == RECORD_TYPE
2348 || TREE_CODE (type) == UNION_TYPE
2349 || TREE_CODE (type) == QUAL_UNION_TYPE)
2350 {
2351 tree field;
2352
2353 if (TYPE_BINFO (type) != NULL
2354 && TYPE_BINFO_BASETYPES (type) != NULL)
2355 {
2356 tree bases = TYPE_BINFO_BASETYPES (type);
2357 int n_bases = TREE_VEC_LENGTH (bases);
2358 int i;
2359
2360 for (i = 0; i < n_bases; ++i)
2361 {
2362 tree binfo = TREE_VEC_ELT (bases, i);
2363 tree type = BINFO_TYPE (binfo);
2364
2365 if (contains_128bit_aligned_vector_p (type))
2366 return true;
2367 }
2368 }
2369 /* And now merge the fields of structure. */
2370 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2371 {
2372 if (TREE_CODE (field) == FIELD_DECL
2373 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2374 return true;
2375 }
2376 }
2377 /* Just for use if some languages passes arrays by value. */
2378 else if (TREE_CODE (type) == ARRAY_TYPE)
2379 {
2380 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2381 return true;
2382 }
2383 else
2384 abort ();
2385 }
2386 return false;
2387 }
2388
2389 /* A C expression that indicates when an argument must be passed by
2390 reference. If nonzero for an argument, a copy of that argument is
2391 made in memory and a pointer to the argument is passed instead of
2392 the argument itself. The pointer is passed in whatever way is
2393 appropriate for passing a pointer to that type. */
2394
2395 int
function_arg_pass_by_reference(cum,mode,type,named)2396 function_arg_pass_by_reference (cum, mode, type, named)
2397 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2398 enum machine_mode mode ATTRIBUTE_UNUSED;
2399 tree type;
2400 int named ATTRIBUTE_UNUSED;
2401 {
2402 if (!TARGET_64BIT)
2403 return 0;
2404
2405 if (type && int_size_in_bytes (type) == -1)
2406 {
2407 if (TARGET_DEBUG_ARG)
2408 fprintf (stderr, "function_arg_pass_by_reference\n");
2409 return 1;
2410 }
2411
2412 return 0;
2413 }
2414
2415 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2416 and type. */
2417
2418 int
ix86_function_arg_boundary(mode,type)2419 ix86_function_arg_boundary (mode, type)
2420 enum machine_mode mode;
2421 tree type;
2422 {
2423 int align;
2424 if (type)
2425 align = TYPE_ALIGN (type);
2426 else
2427 align = GET_MODE_ALIGNMENT (mode);
2428 if (align < PARM_BOUNDARY)
2429 align = PARM_BOUNDARY;
2430 if (!TARGET_64BIT)
2431 {
2432 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2433 make an exception for SSE modes since these require 128bit
2434 alignment.
2435
2436 The handling here differs from field_alignment. ICC aligns MMX
2437 arguments to 4 byte boundaries, while structure fields are aligned
2438 to 8 byte boundaries. */
2439 if (!type)
2440 {
2441 if (!SSE_REG_MODE_P (mode))
2442 align = PARM_BOUNDARY;
2443 }
2444 else
2445 {
2446 if (!contains_128bit_aligned_vector_p (type))
2447 align = PARM_BOUNDARY;
2448 }
2449 if (align != PARM_BOUNDARY && !TARGET_SSE)
2450 abort();
2451 }
2452 if (align > 128)
2453 align = 128;
2454 return align;
2455 }
2456
2457 /* Return true if N is a possible register number of function value. */
2458 bool
ix86_function_value_regno_p(regno)2459 ix86_function_value_regno_p (regno)
2460 int regno;
2461 {
2462 if (!TARGET_64BIT)
2463 {
2464 return ((regno) == 0
2465 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2466 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2467 }
2468 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2469 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2470 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2471 }
2472
2473 /* Define how to find the value returned by a function.
2474 VALTYPE is the data type of the value (as a tree).
2475 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2476 otherwise, FUNC is 0. */
2477 rtx
ix86_function_value(valtype)2478 ix86_function_value (valtype)
2479 tree valtype;
2480 {
2481 if (TARGET_64BIT)
2482 {
2483 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2484 REGPARM_MAX, SSE_REGPARM_MAX,
2485 x86_64_int_return_registers, 0);
2486 /* For zero sized structures, construct_continer return NULL, but we need
2487 to keep rest of compiler happy by returning meaningfull value. */
2488 if (!ret)
2489 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2490 return ret;
2491 }
2492 else
2493 return gen_rtx_REG (TYPE_MODE (valtype),
2494 ix86_value_regno (TYPE_MODE (valtype)));
2495 }
2496
2497 /* Return false iff type is returned in memory. */
2498 int
ix86_return_in_memory(type)2499 ix86_return_in_memory (type)
2500 tree type;
2501 {
2502 int needed_intregs, needed_sseregs, size;
2503 enum machine_mode mode = TYPE_MODE (type);
2504
2505 if (TARGET_64BIT)
2506 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2507
2508 if (mode == BLKmode)
2509 return 1;
2510
2511 size = int_size_in_bytes (type);
2512
2513 if (VECTOR_MODE_P (mode) || mode == TImode)
2514 {
2515 /* User-created vectors small enough to fit in EAX. */
2516 if (size < 8)
2517 return 0;
2518
2519 /* MMX/3dNow values are returned on the stack, since we've
2520 got to EMMS/FEMMS before returning. */
2521 if (size == 8)
2522 return 1;
2523
2524 /* SSE values are returned in XMM0. */
2525 /* ??? Except when it doesn't exist? We have a choice of
2526 either (1) being abi incompatible with a -march switch,
2527 or (2) generating an error here. Given no good solution,
2528 I think the safest thing is one warning. The user won't
2529 be able to use -Werror, but... */
2530 if (size == 16)
2531 {
2532 static bool warned;
2533
2534 if (TARGET_SSE)
2535 return 0;
2536
2537 if (!warned)
2538 {
2539 warned = true;
2540 warning ("SSE vector return without SSE enabled changes the ABI");
2541 }
2542 return 1;
2543 }
2544 }
2545
2546 if (mode == TFmode)
2547 return 0;
2548 if (size > 12)
2549 return 1;
2550 return 0;
2551 }
2552
2553 /* Define how to find the value returned by a library function
2554 assuming the value has mode MODE. */
2555 rtx
ix86_libcall_value(mode)2556 ix86_libcall_value (mode)
2557 enum machine_mode mode;
2558 {
2559 if (TARGET_64BIT)
2560 {
2561 switch (mode)
2562 {
2563 case SFmode:
2564 case SCmode:
2565 case DFmode:
2566 case DCmode:
2567 return gen_rtx_REG (mode, FIRST_SSE_REG);
2568 case TFmode:
2569 case TCmode:
2570 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2571 default:
2572 return gen_rtx_REG (mode, 0);
2573 }
2574 }
2575 else
2576 return gen_rtx_REG (mode, ix86_value_regno (mode));
2577 }
2578
2579 /* Given a mode, return the register to use for a return value. */
2580
2581 static int
ix86_value_regno(mode)2582 ix86_value_regno (mode)
2583 enum machine_mode mode;
2584 {
2585 /* Floating point return values in %st(0). */
2586 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2587 return FIRST_FLOAT_REG;
2588 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2589 we prevent this case when sse is not available. */
2590 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2591 return FIRST_SSE_REG;
2592 /* Everything else in %eax. */
2593 return 0;
2594 }
2595
2596 /* Create the va_list data type. */
2597
2598 tree
ix86_build_va_list()2599 ix86_build_va_list ()
2600 {
2601 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2602
2603 /* For i386 we use plain pointer to argument area. */
2604 if (!TARGET_64BIT)
2605 return build_pointer_type (char_type_node);
2606
2607 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2608 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2609
2610 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2611 unsigned_type_node);
2612 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2613 unsigned_type_node);
2614 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2615 ptr_type_node);
2616 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2617 ptr_type_node);
2618
2619 DECL_FIELD_CONTEXT (f_gpr) = record;
2620 DECL_FIELD_CONTEXT (f_fpr) = record;
2621 DECL_FIELD_CONTEXT (f_ovf) = record;
2622 DECL_FIELD_CONTEXT (f_sav) = record;
2623
2624 TREE_CHAIN (record) = type_decl;
2625 TYPE_NAME (record) = type_decl;
2626 TYPE_FIELDS (record) = f_gpr;
2627 TREE_CHAIN (f_gpr) = f_fpr;
2628 TREE_CHAIN (f_fpr) = f_ovf;
2629 TREE_CHAIN (f_ovf) = f_sav;
2630
2631 layout_type (record);
2632
2633 /* The correct type is an array type of one element. */
2634 return build_array_type (record, build_index_type (size_zero_node));
2635 }
2636
2637 /* Perform any needed actions needed for a function that is receiving a
2638 variable number of arguments.
2639
2640 CUM is as above.
2641
2642 MODE and TYPE are the mode and type of the current parameter.
2643
2644 PRETEND_SIZE is a variable that should be set to the amount of stack
2645 that must be pushed by the prolog to pretend that our caller pushed
2646 it.
2647
2648 Normally, this macro will push all remaining incoming registers on the
2649 stack and set PRETEND_SIZE to the length of the registers pushed. */
2650
2651 void
ix86_setup_incoming_varargs(cum,mode,type,pretend_size,no_rtl)2652 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2653 CUMULATIVE_ARGS *cum;
2654 enum machine_mode mode;
2655 tree type;
2656 int *pretend_size ATTRIBUTE_UNUSED;
2657 int no_rtl;
2658
2659 {
2660 CUMULATIVE_ARGS next_cum;
2661 rtx save_area = NULL_RTX, mem;
2662 rtx label;
2663 rtx label_ref;
2664 rtx tmp_reg;
2665 rtx nsse_reg;
2666 int set;
2667 tree fntype;
2668 int stdarg_p;
2669 int i;
2670
2671 if (!TARGET_64BIT)
2672 return;
2673
2674 /* Indicate to allocate space on the stack for varargs save area. */
2675 ix86_save_varrargs_registers = 1;
2676
2677 cfun->stack_alignment_needed = 128;
2678
2679 fntype = TREE_TYPE (current_function_decl);
2680 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2681 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2682 != void_type_node));
2683
2684 /* For varargs, we do not want to skip the dummy va_dcl argument.
2685 For stdargs, we do want to skip the last named argument. */
2686 next_cum = *cum;
2687 if (stdarg_p)
2688 function_arg_advance (&next_cum, mode, type, 1);
2689
2690 if (!no_rtl)
2691 save_area = frame_pointer_rtx;
2692
2693 set = get_varargs_alias_set ();
2694
2695 for (i = next_cum.regno; i < ix86_regparm; i++)
2696 {
2697 mem = gen_rtx_MEM (Pmode,
2698 plus_constant (save_area, i * UNITS_PER_WORD));
2699 set_mem_alias_set (mem, set);
2700 emit_move_insn (mem, gen_rtx_REG (Pmode,
2701 x86_64_int_parameter_registers[i]));
2702 }
2703
2704 if (next_cum.sse_nregs)
2705 {
2706 /* Now emit code to save SSE registers. The AX parameter contains number
2707 of SSE parameter regsiters used to call this function. We use
2708 sse_prologue_save insn template that produces computed jump across
2709 SSE saves. We need some preparation work to get this working. */
2710
2711 label = gen_label_rtx ();
2712 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2713
2714 /* Compute address to jump to :
2715 label - 5*eax + nnamed_sse_arguments*5 */
2716 tmp_reg = gen_reg_rtx (Pmode);
2717 nsse_reg = gen_reg_rtx (Pmode);
2718 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2719 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2720 gen_rtx_MULT (Pmode, nsse_reg,
2721 GEN_INT (4))));
2722 if (next_cum.sse_regno)
2723 emit_move_insn
2724 (nsse_reg,
2725 gen_rtx_CONST (DImode,
2726 gen_rtx_PLUS (DImode,
2727 label_ref,
2728 GEN_INT (next_cum.sse_regno * 4))));
2729 else
2730 emit_move_insn (nsse_reg, label_ref);
2731 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2732
2733 /* Compute address of memory block we save into. We always use pointer
2734 pointing 127 bytes after first byte to store - this is needed to keep
2735 instruction size limited by 4 bytes. */
2736 tmp_reg = gen_reg_rtx (Pmode);
2737 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2738 plus_constant (save_area,
2739 8 * REGPARM_MAX + 127)));
2740 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2741 set_mem_alias_set (mem, set);
2742 set_mem_align (mem, BITS_PER_WORD);
2743
2744 /* And finally do the dirty job! */
2745 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2746 GEN_INT (next_cum.sse_regno), label));
2747 }
2748
2749 }
2750
2751 /* Implement va_start. */
2752
2753 void
ix86_va_start(valist,nextarg)2754 ix86_va_start (valist, nextarg)
2755 tree valist;
2756 rtx nextarg;
2757 {
2758 HOST_WIDE_INT words, n_gpr, n_fpr;
2759 tree f_gpr, f_fpr, f_ovf, f_sav;
2760 tree gpr, fpr, ovf, sav, t;
2761
2762 /* Only 64bit target needs something special. */
2763 if (!TARGET_64BIT)
2764 {
2765 std_expand_builtin_va_start (valist, nextarg);
2766 return;
2767 }
2768
2769 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2770 f_fpr = TREE_CHAIN (f_gpr);
2771 f_ovf = TREE_CHAIN (f_fpr);
2772 f_sav = TREE_CHAIN (f_ovf);
2773
2774 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2775 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2776 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2777 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2778 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2779
2780 /* Count number of gp and fp argument registers used. */
2781 words = current_function_args_info.words;
2782 n_gpr = current_function_args_info.regno;
2783 n_fpr = current_function_args_info.sse_regno;
2784
2785 if (TARGET_DEBUG_ARG)
2786 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2787 (int) words, (int) n_gpr, (int) n_fpr);
2788
2789 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2790 build_int_2 (n_gpr * 8, 0));
2791 TREE_SIDE_EFFECTS (t) = 1;
2792 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2793
2794 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2795 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2796 TREE_SIDE_EFFECTS (t) = 1;
2797 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2798
2799 /* Find the overflow area. */
2800 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2801 if (words != 0)
2802 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2803 build_int_2 (words * UNITS_PER_WORD, 0));
2804 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2805 TREE_SIDE_EFFECTS (t) = 1;
2806 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2807
2808 /* Find the register save area.
2809 Prologue of the function save it right above stack frame. */
2810 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2811 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2812 TREE_SIDE_EFFECTS (t) = 1;
2813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2814 }
2815
2816 /* Implement va_arg. */
2817 rtx
ix86_va_arg(valist,type)2818 ix86_va_arg (valist, type)
2819 tree valist, type;
2820 {
2821 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2822 tree f_gpr, f_fpr, f_ovf, f_sav;
2823 tree gpr, fpr, ovf, sav, t;
2824 int size, rsize;
2825 rtx lab_false, lab_over = NULL_RTX;
2826 rtx addr_rtx, r;
2827 rtx container;
2828 int indirect_p = 0;
2829
2830 /* Only 64bit target needs something special. */
2831 if (!TARGET_64BIT)
2832 {
2833 return std_expand_builtin_va_arg (valist, type);
2834 }
2835
2836 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2837 f_fpr = TREE_CHAIN (f_gpr);
2838 f_ovf = TREE_CHAIN (f_fpr);
2839 f_sav = TREE_CHAIN (f_ovf);
2840
2841 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2842 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2843 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2844 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2845 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2846
2847 size = int_size_in_bytes (type);
2848 if (size == -1)
2849 {
2850 /* Passed by reference. */
2851 indirect_p = 1;
2852 type = build_pointer_type (type);
2853 size = int_size_in_bytes (type);
2854 }
2855 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2856
2857 container = construct_container (TYPE_MODE (type), type, 0,
2858 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2859 /*
2860 * Pull the value out of the saved registers ...
2861 */
2862
2863 addr_rtx = gen_reg_rtx (Pmode);
2864
2865 if (container)
2866 {
2867 rtx int_addr_rtx, sse_addr_rtx;
2868 int needed_intregs, needed_sseregs;
2869 int need_temp;
2870
2871 lab_over = gen_label_rtx ();
2872 lab_false = gen_label_rtx ();
2873
2874 examine_argument (TYPE_MODE (type), type, 0,
2875 &needed_intregs, &needed_sseregs);
2876
2877
2878 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2879 || TYPE_ALIGN (type) > 128);
2880
2881 /* In case we are passing structure, verify that it is consetuctive block
2882 on the register save area. If not we need to do moves. */
2883 if (!need_temp && !REG_P (container))
2884 {
2885 /* Verify that all registers are strictly consetuctive */
2886 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2887 {
2888 int i;
2889
2890 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2891 {
2892 rtx slot = XVECEXP (container, 0, i);
2893 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2894 || INTVAL (XEXP (slot, 1)) != i * 16)
2895 need_temp = 1;
2896 }
2897 }
2898 else
2899 {
2900 int i;
2901
2902 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2903 {
2904 rtx slot = XVECEXP (container, 0, i);
2905 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2906 || INTVAL (XEXP (slot, 1)) != i * 8)
2907 need_temp = 1;
2908 }
2909 }
2910 }
2911 if (!need_temp)
2912 {
2913 int_addr_rtx = addr_rtx;
2914 sse_addr_rtx = addr_rtx;
2915 }
2916 else
2917 {
2918 int_addr_rtx = gen_reg_rtx (Pmode);
2919 sse_addr_rtx = gen_reg_rtx (Pmode);
2920 }
2921 /* First ensure that we fit completely in registers. */
2922 if (needed_intregs)
2923 {
2924 emit_cmp_and_jump_insns (expand_expr
2925 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2926 GEN_INT ((REGPARM_MAX - needed_intregs +
2927 1) * 8), GE, const1_rtx, SImode,
2928 1, lab_false);
2929 }
2930 if (needed_sseregs)
2931 {
2932 emit_cmp_and_jump_insns (expand_expr
2933 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2934 GEN_INT ((SSE_REGPARM_MAX -
2935 needed_sseregs + 1) * 16 +
2936 REGPARM_MAX * 8), GE, const1_rtx,
2937 SImode, 1, lab_false);
2938 }
2939
2940 /* Compute index to start of area used for integer regs. */
2941 if (needed_intregs)
2942 {
2943 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2944 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2945 if (r != int_addr_rtx)
2946 emit_move_insn (int_addr_rtx, r);
2947 }
2948 if (needed_sseregs)
2949 {
2950 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2951 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2952 if (r != sse_addr_rtx)
2953 emit_move_insn (sse_addr_rtx, r);
2954 }
2955 if (need_temp)
2956 {
2957 int i;
2958 rtx mem;
2959 rtx x;
2960
2961 /* Never use the memory itself, as it has the alias set. */
2962 x = XEXP (assign_temp (type, 0, 1, 0), 0);
2963 mem = gen_rtx_MEM (BLKmode, x);
2964 force_operand (x, addr_rtx);
2965 set_mem_alias_set (mem, get_varargs_alias_set ());
2966 set_mem_align (mem, BITS_PER_UNIT);
2967
2968 for (i = 0; i < XVECLEN (container, 0); i++)
2969 {
2970 rtx slot = XVECEXP (container, 0, i);
2971 rtx reg = XEXP (slot, 0);
2972 enum machine_mode mode = GET_MODE (reg);
2973 rtx src_addr;
2974 rtx src_mem;
2975 int src_offset;
2976 rtx dest_mem;
2977
2978 if (SSE_REGNO_P (REGNO (reg)))
2979 {
2980 src_addr = sse_addr_rtx;
2981 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2982 }
2983 else
2984 {
2985 src_addr = int_addr_rtx;
2986 src_offset = REGNO (reg) * 8;
2987 }
2988 src_mem = gen_rtx_MEM (mode, src_addr);
2989 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2990 src_mem = adjust_address (src_mem, mode, src_offset);
2991 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2992 emit_move_insn (dest_mem, src_mem);
2993 }
2994 }
2995
2996 if (needed_intregs)
2997 {
2998 t =
2999 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3000 build_int_2 (needed_intregs * 8, 0));
3001 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3002 TREE_SIDE_EFFECTS (t) = 1;
3003 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3004 }
3005 if (needed_sseregs)
3006 {
3007 t =
3008 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3009 build_int_2 (needed_sseregs * 16, 0));
3010 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3011 TREE_SIDE_EFFECTS (t) = 1;
3012 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3013 }
3014
3015 emit_jump_insn (gen_jump (lab_over));
3016 emit_barrier ();
3017 emit_label (lab_false);
3018 }
3019
3020 /* ... otherwise out of the overflow area. */
3021
3022 /* Care for on-stack alignment if needed. */
3023 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3024 t = ovf;
3025 else
3026 {
3027 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3028 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3029 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3030 }
3031 t = save_expr (t);
3032
3033 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3034 if (r != addr_rtx)
3035 emit_move_insn (addr_rtx, r);
3036
3037 t =
3038 build (PLUS_EXPR, TREE_TYPE (t), t,
3039 build_int_2 (rsize * UNITS_PER_WORD, 0));
3040 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3041 TREE_SIDE_EFFECTS (t) = 1;
3042 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3043
3044 if (container)
3045 emit_label (lab_over);
3046
3047 if (indirect_p)
3048 {
3049 r = gen_rtx_MEM (Pmode, addr_rtx);
3050 set_mem_alias_set (r, get_varargs_alias_set ());
3051 emit_move_insn (addr_rtx, r);
3052 }
3053
3054 return addr_rtx;
3055 }
3056
3057 /* Return nonzero if OP is either a i387 or SSE fp register. */
3058 int
any_fp_register_operand(op,mode)3059 any_fp_register_operand (op, mode)
3060 rtx op;
3061 enum machine_mode mode ATTRIBUTE_UNUSED;
3062 {
3063 return ANY_FP_REG_P (op);
3064 }
3065
3066 /* Return nonzero if OP is an i387 fp register. */
3067 int
fp_register_operand(op,mode)3068 fp_register_operand (op, mode)
3069 rtx op;
3070 enum machine_mode mode ATTRIBUTE_UNUSED;
3071 {
3072 return FP_REG_P (op);
3073 }
3074
3075 /* Return nonzero if OP is a non-fp register_operand. */
3076 int
register_and_not_any_fp_reg_operand(op,mode)3077 register_and_not_any_fp_reg_operand (op, mode)
3078 rtx op;
3079 enum machine_mode mode;
3080 {
3081 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3082 }
3083
3084 /* Return nonzero of OP is a register operand other than an
3085 i387 fp register. */
3086 int
register_and_not_fp_reg_operand(op,mode)3087 register_and_not_fp_reg_operand (op, mode)
3088 rtx op;
3089 enum machine_mode mode;
3090 {
3091 return register_operand (op, mode) && !FP_REG_P (op);
3092 }
3093
3094 /* Return nonzero if OP is general operand representable on x86_64. */
3095
3096 int
x86_64_general_operand(op,mode)3097 x86_64_general_operand (op, mode)
3098 rtx op;
3099 enum machine_mode mode;
3100 {
3101 if (!TARGET_64BIT)
3102 return general_operand (op, mode);
3103 if (nonimmediate_operand (op, mode))
3104 return 1;
3105 return x86_64_sign_extended_value (op);
3106 }
3107
3108 /* Return nonzero if OP is general operand representable on x86_64
3109 as either sign extended or zero extended constant. */
3110
3111 int
x86_64_szext_general_operand(op,mode)3112 x86_64_szext_general_operand (op, mode)
3113 rtx op;
3114 enum machine_mode mode;
3115 {
3116 if (!TARGET_64BIT)
3117 return general_operand (op, mode);
3118 if (nonimmediate_operand (op, mode))
3119 return 1;
3120 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3121 }
3122
3123 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3124
3125 int
x86_64_nonmemory_operand(op,mode)3126 x86_64_nonmemory_operand (op, mode)
3127 rtx op;
3128 enum machine_mode mode;
3129 {
3130 if (!TARGET_64BIT)
3131 return nonmemory_operand (op, mode);
3132 if (register_operand (op, mode))
3133 return 1;
3134 return x86_64_sign_extended_value (op);
3135 }
3136
3137 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3138
3139 int
x86_64_movabs_operand(op,mode)3140 x86_64_movabs_operand (op, mode)
3141 rtx op;
3142 enum machine_mode mode;
3143 {
3144 if (!TARGET_64BIT || !flag_pic)
3145 return nonmemory_operand (op, mode);
3146 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3147 return 1;
3148 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3149 return 1;
3150 return 0;
3151 }
3152
3153 /* Return nonzero if OPNUM's MEM should be matched
3154 in movabs* patterns. */
3155
3156 int
ix86_check_movabs(insn,opnum)3157 ix86_check_movabs (insn, opnum)
3158 rtx insn;
3159 int opnum;
3160 {
3161 rtx set, mem;
3162
3163 set = PATTERN (insn);
3164 if (GET_CODE (set) == PARALLEL)
3165 set = XVECEXP (set, 0, 0);
3166 if (GET_CODE (set) != SET)
3167 abort ();
3168 mem = XEXP (set, opnum);
3169 while (GET_CODE (mem) == SUBREG)
3170 mem = SUBREG_REG (mem);
3171 if (GET_CODE (mem) != MEM)
3172 abort ();
3173 return (volatile_ok || !MEM_VOLATILE_P (mem));
3174 }
3175
3176 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3177
3178 int
x86_64_szext_nonmemory_operand(op,mode)3179 x86_64_szext_nonmemory_operand (op, mode)
3180 rtx op;
3181 enum machine_mode mode;
3182 {
3183 if (!TARGET_64BIT)
3184 return nonmemory_operand (op, mode);
3185 if (register_operand (op, mode))
3186 return 1;
3187 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3188 }
3189
3190 /* Return nonzero if OP is immediate operand representable on x86_64. */
3191
3192 int
x86_64_immediate_operand(op,mode)3193 x86_64_immediate_operand (op, mode)
3194 rtx op;
3195 enum machine_mode mode;
3196 {
3197 if (!TARGET_64BIT)
3198 return immediate_operand (op, mode);
3199 return x86_64_sign_extended_value (op);
3200 }
3201
3202 /* Return nonzero if OP is immediate operand representable on x86_64. */
3203
3204 int
x86_64_zext_immediate_operand(op,mode)3205 x86_64_zext_immediate_operand (op, mode)
3206 rtx op;
3207 enum machine_mode mode ATTRIBUTE_UNUSED;
3208 {
3209 return x86_64_zero_extended_value (op);
3210 }
3211
3212 /* Return nonzero if OP is (const_int 1), else return zero. */
3213
3214 int
const_int_1_operand(op,mode)3215 const_int_1_operand (op, mode)
3216 rtx op;
3217 enum machine_mode mode ATTRIBUTE_UNUSED;
3218 {
3219 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3220 }
3221
3222 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3223 for shift & compare patterns, as shifting by 0 does not change flags),
3224 else return zero. */
3225
3226 int
const_int_1_31_operand(op,mode)3227 const_int_1_31_operand (op, mode)
3228 rtx op;
3229 enum machine_mode mode ATTRIBUTE_UNUSED;
3230 {
3231 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3232 }
3233
3234 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3235 reference and a constant. */
3236
3237 int
symbolic_operand(op,mode)3238 symbolic_operand (op, mode)
3239 register rtx op;
3240 enum machine_mode mode ATTRIBUTE_UNUSED;
3241 {
3242 switch (GET_CODE (op))
3243 {
3244 case SYMBOL_REF:
3245 case LABEL_REF:
3246 return 1;
3247
3248 case CONST:
3249 op = XEXP (op, 0);
3250 if (GET_CODE (op) == SYMBOL_REF
3251 || GET_CODE (op) == LABEL_REF
3252 || (GET_CODE (op) == UNSPEC
3253 && (XINT (op, 1) == UNSPEC_GOT
3254 || XINT (op, 1) == UNSPEC_GOTOFF
3255 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3256 return 1;
3257 if (GET_CODE (op) != PLUS
3258 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3259 return 0;
3260
3261 op = XEXP (op, 0);
3262 if (GET_CODE (op) == SYMBOL_REF
3263 || GET_CODE (op) == LABEL_REF)
3264 return 1;
3265 /* Only @GOTOFF gets offsets. */
3266 if (GET_CODE (op) != UNSPEC
3267 || XINT (op, 1) != UNSPEC_GOTOFF)
3268 return 0;
3269
3270 op = XVECEXP (op, 0, 0);
3271 if (GET_CODE (op) == SYMBOL_REF
3272 || GET_CODE (op) == LABEL_REF)
3273 return 1;
3274 return 0;
3275
3276 default:
3277 return 0;
3278 }
3279 }
3280
3281 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3282
3283 int
pic_symbolic_operand(op,mode)3284 pic_symbolic_operand (op, mode)
3285 register rtx op;
3286 enum machine_mode mode ATTRIBUTE_UNUSED;
3287 {
3288 if (GET_CODE (op) != CONST)
3289 return 0;
3290 op = XEXP (op, 0);
3291 if (TARGET_64BIT)
3292 {
3293 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3294 return 1;
3295 }
3296 else
3297 {
3298 if (GET_CODE (op) == UNSPEC)
3299 return 1;
3300 if (GET_CODE (op) != PLUS
3301 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3302 return 0;
3303 op = XEXP (op, 0);
3304 if (GET_CODE (op) == UNSPEC)
3305 return 1;
3306 }
3307 return 0;
3308 }
3309
3310 /* Return true if OP is a symbolic operand that resolves locally. */
3311
3312 static int
local_symbolic_operand(op,mode)3313 local_symbolic_operand (op, mode)
3314 rtx op;
3315 enum machine_mode mode ATTRIBUTE_UNUSED;
3316 {
3317 if (GET_CODE (op) == CONST
3318 && GET_CODE (XEXP (op, 0)) == PLUS
3319 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3320 op = XEXP (XEXP (op, 0), 0);
3321
3322 if (GET_CODE (op) == LABEL_REF)
3323 return 1;
3324
3325 if (GET_CODE (op) != SYMBOL_REF)
3326 return 0;
3327
3328 /* These we've been told are local by varasm and encode_section_info
3329 respectively. */
3330 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3331 return 1;
3332
3333 /* There is, however, a not insubstantial body of code in the rest of
3334 the compiler that assumes it can just stick the results of
3335 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3336 /* ??? This is a hack. Should update the body of the compiler to
3337 always create a DECL an invoke targetm.encode_section_info. */
3338 if (strncmp (XSTR (op, 0), internal_label_prefix,
3339 internal_label_prefix_len) == 0)
3340 return 1;
3341
3342 return 0;
3343 }
3344
3345 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3346
3347 int
tls_symbolic_operand(op,mode)3348 tls_symbolic_operand (op, mode)
3349 register rtx op;
3350 enum machine_mode mode ATTRIBUTE_UNUSED;
3351 {
3352 const char *symbol_str;
3353
3354 if (GET_CODE (op) != SYMBOL_REF)
3355 return 0;
3356 symbol_str = XSTR (op, 0);
3357
3358 if (symbol_str[0] != '%')
3359 return 0;
3360 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3361 }
3362
3363 static int
tls_symbolic_operand_1(op,kind)3364 tls_symbolic_operand_1 (op, kind)
3365 rtx op;
3366 enum tls_model kind;
3367 {
3368 const char *symbol_str;
3369
3370 if (GET_CODE (op) != SYMBOL_REF)
3371 return 0;
3372 symbol_str = XSTR (op, 0);
3373
3374 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3375 }
3376
3377 int
global_dynamic_symbolic_operand(op,mode)3378 global_dynamic_symbolic_operand (op, mode)
3379 register rtx op;
3380 enum machine_mode mode ATTRIBUTE_UNUSED;
3381 {
3382 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3383 }
3384
3385 int
local_dynamic_symbolic_operand(op,mode)3386 local_dynamic_symbolic_operand (op, mode)
3387 register rtx op;
3388 enum machine_mode mode ATTRIBUTE_UNUSED;
3389 {
3390 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3391 }
3392
3393 int
initial_exec_symbolic_operand(op,mode)3394 initial_exec_symbolic_operand (op, mode)
3395 register rtx op;
3396 enum machine_mode mode ATTRIBUTE_UNUSED;
3397 {
3398 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3399 }
3400
3401 int
local_exec_symbolic_operand(op,mode)3402 local_exec_symbolic_operand (op, mode)
3403 register rtx op;
3404 enum machine_mode mode ATTRIBUTE_UNUSED;
3405 {
3406 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3407 }
3408
3409 /* Test for a valid operand for a call instruction. Don't allow the
3410 arg pointer register or virtual regs since they may decay into
3411 reg + const, which the patterns can't handle. */
3412
3413 int
call_insn_operand(op,mode)3414 call_insn_operand (op, mode)
3415 rtx op;
3416 enum machine_mode mode ATTRIBUTE_UNUSED;
3417 {
3418 /* Disallow indirect through a virtual register. This leads to
3419 compiler aborts when trying to eliminate them. */
3420 if (GET_CODE (op) == REG
3421 && (op == arg_pointer_rtx
3422 || op == frame_pointer_rtx
3423 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3424 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3425 return 0;
3426
3427 /* Disallow `call 1234'. Due to varying assembler lameness this
3428 gets either rejected or translated to `call .+1234'. */
3429 if (GET_CODE (op) == CONST_INT)
3430 return 0;
3431
3432 /* Explicitly allow SYMBOL_REF even if pic. */
3433 if (GET_CODE (op) == SYMBOL_REF)
3434 return 1;
3435
3436 /* Otherwise we can allow any general_operand in the address. */
3437 return general_operand (op, Pmode);
3438 }
3439
3440 int
constant_call_address_operand(op,mode)3441 constant_call_address_operand (op, mode)
3442 rtx op;
3443 enum machine_mode mode ATTRIBUTE_UNUSED;
3444 {
3445 if (GET_CODE (op) == CONST
3446 && GET_CODE (XEXP (op, 0)) == PLUS
3447 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3448 op = XEXP (XEXP (op, 0), 0);
3449 return GET_CODE (op) == SYMBOL_REF;
3450 }
3451
3452 /* Match exactly zero and one. */
3453
3454 int
const0_operand(op,mode)3455 const0_operand (op, mode)
3456 register rtx op;
3457 enum machine_mode mode;
3458 {
3459 return op == CONST0_RTX (mode);
3460 }
3461
3462 int
const1_operand(op,mode)3463 const1_operand (op, mode)
3464 register rtx op;
3465 enum machine_mode mode ATTRIBUTE_UNUSED;
3466 {
3467 return op == const1_rtx;
3468 }
3469
3470 /* Match 2, 4, or 8. Used for leal multiplicands. */
3471
3472 int
const248_operand(op,mode)3473 const248_operand (op, mode)
3474 register rtx op;
3475 enum machine_mode mode ATTRIBUTE_UNUSED;
3476 {
3477 return (GET_CODE (op) == CONST_INT
3478 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3479 }
3480
3481 /* True if this is a constant appropriate for an increment or decremenmt. */
3482
3483 int
incdec_operand(op,mode)3484 incdec_operand (op, mode)
3485 register rtx op;
3486 enum machine_mode mode ATTRIBUTE_UNUSED;
3487 {
3488 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3489 registers, since carry flag is not set. */
3490 if (TARGET_PENTIUM4 && !optimize_size)
3491 return 0;
3492 return op == const1_rtx || op == constm1_rtx;
3493 }
3494
3495 /* Return nonzero if OP is acceptable as operand of DImode shift
3496 expander. */
3497
3498 int
shiftdi_operand(op,mode)3499 shiftdi_operand (op, mode)
3500 rtx op;
3501 enum machine_mode mode ATTRIBUTE_UNUSED;
3502 {
3503 if (TARGET_64BIT)
3504 return nonimmediate_operand (op, mode);
3505 else
3506 return register_operand (op, mode);
3507 }
3508
3509 /* Return false if this is the stack pointer, or any other fake
3510 register eliminable to the stack pointer. Otherwise, this is
3511 a register operand.
3512
3513 This is used to prevent esp from being used as an index reg.
3514 Which would only happen in pathological cases. */
3515
3516 int
reg_no_sp_operand(op,mode)3517 reg_no_sp_operand (op, mode)
3518 register rtx op;
3519 enum machine_mode mode;
3520 {
3521 rtx t = op;
3522 if (GET_CODE (t) == SUBREG)
3523 t = SUBREG_REG (t);
3524 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3525 return 0;
3526
3527 return register_operand (op, mode);
3528 }
3529
3530 int
mmx_reg_operand(op,mode)3531 mmx_reg_operand (op, mode)
3532 register rtx op;
3533 enum machine_mode mode ATTRIBUTE_UNUSED;
3534 {
3535 return MMX_REG_P (op);
3536 }
3537
3538 /* Return false if this is any eliminable register. Otherwise
3539 general_operand. */
3540
3541 int
general_no_elim_operand(op,mode)3542 general_no_elim_operand (op, mode)
3543 register rtx op;
3544 enum machine_mode mode;
3545 {
3546 rtx t = op;
3547 if (GET_CODE (t) == SUBREG)
3548 t = SUBREG_REG (t);
3549 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3550 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3551 || t == virtual_stack_dynamic_rtx)
3552 return 0;
3553 if (REG_P (t)
3554 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3555 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3556 return 0;
3557
3558 return general_operand (op, mode);
3559 }
3560
3561 /* Return false if this is any eliminable register. Otherwise
3562 register_operand or const_int. */
3563
3564 int
nonmemory_no_elim_operand(op,mode)3565 nonmemory_no_elim_operand (op, mode)
3566 register rtx op;
3567 enum machine_mode mode;
3568 {
3569 rtx t = op;
3570 if (GET_CODE (t) == SUBREG)
3571 t = SUBREG_REG (t);
3572 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3573 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3574 || t == virtual_stack_dynamic_rtx)
3575 return 0;
3576
3577 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3578 }
3579
3580 /* Return false if this is any eliminable register or stack register,
3581 otherwise work like register_operand. */
3582
3583 int
index_register_operand(op,mode)3584 index_register_operand (op, mode)
3585 register rtx op;
3586 enum machine_mode mode;
3587 {
3588 rtx t = op;
3589 if (GET_CODE (t) == SUBREG)
3590 t = SUBREG_REG (t);
3591 if (!REG_P (t))
3592 return 0;
3593 if (t == arg_pointer_rtx
3594 || t == frame_pointer_rtx
3595 || t == virtual_incoming_args_rtx
3596 || t == virtual_stack_vars_rtx
3597 || t == virtual_stack_dynamic_rtx
3598 || REGNO (t) == STACK_POINTER_REGNUM)
3599 return 0;
3600
3601 return general_operand (op, mode);
3602 }
3603
3604 /* Return true if op is a Q_REGS class register. */
3605
3606 int
q_regs_operand(op,mode)3607 q_regs_operand (op, mode)
3608 register rtx op;
3609 enum machine_mode mode;
3610 {
3611 if (mode != VOIDmode && GET_MODE (op) != mode)
3612 return 0;
3613 if (GET_CODE (op) == SUBREG)
3614 op = SUBREG_REG (op);
3615 return ANY_QI_REG_P (op);
3616 }
3617
3618 /* Return true if op is an flags register. */
3619
3620 int
flags_reg_operand(op,mode)3621 flags_reg_operand (op, mode)
3622 register rtx op;
3623 enum machine_mode mode;
3624 {
3625 if (mode != VOIDmode && GET_MODE (op) != mode)
3626 return 0;
3627 return (GET_CODE (op) == REG
3628 && REGNO (op) == FLAGS_REG
3629 && GET_MODE (op) != VOIDmode);
3630 }
3631
3632 /* Return true if op is a NON_Q_REGS class register. */
3633
3634 int
non_q_regs_operand(op,mode)3635 non_q_regs_operand (op, mode)
3636 register rtx op;
3637 enum machine_mode mode;
3638 {
3639 if (mode != VOIDmode && GET_MODE (op) != mode)
3640 return 0;
3641 if (GET_CODE (op) == SUBREG)
3642 op = SUBREG_REG (op);
3643 return NON_QI_REG_P (op);
3644 }
3645
3646 /* Return 1 when OP is operand acceptable for standard SSE move. */
3647 int
vector_move_operand(op,mode)3648 vector_move_operand (op, mode)
3649 rtx op;
3650 enum machine_mode mode;
3651 {
3652 if (nonimmediate_operand (op, mode))
3653 return 1;
3654 if (GET_MODE (op) != mode && mode != VOIDmode)
3655 return 0;
3656 return (op == CONST0_RTX (GET_MODE (op)));
3657 }
3658
3659 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3660 insns. */
3661 int
sse_comparison_operator(op,mode)3662 sse_comparison_operator (op, mode)
3663 rtx op;
3664 enum machine_mode mode ATTRIBUTE_UNUSED;
3665 {
3666 enum rtx_code code = GET_CODE (op);
3667 switch (code)
3668 {
3669 /* Operations supported directly. */
3670 case EQ:
3671 case LT:
3672 case LE:
3673 case UNORDERED:
3674 case NE:
3675 case UNGE:
3676 case UNGT:
3677 case ORDERED:
3678 return 1;
3679 /* These are equivalent to ones above in non-IEEE comparisons. */
3680 case UNEQ:
3681 case UNLT:
3682 case UNLE:
3683 case LTGT:
3684 case GE:
3685 case GT:
3686 return !TARGET_IEEE_FP;
3687 default:
3688 return 0;
3689 }
3690 }
3691 /* Return 1 if OP is a valid comparison operator in valid mode. */
3692 int
ix86_comparison_operator(op,mode)3693 ix86_comparison_operator (op, mode)
3694 register rtx op;
3695 enum machine_mode mode;
3696 {
3697 enum machine_mode inmode;
3698 enum rtx_code code = GET_CODE (op);
3699 if (mode != VOIDmode && GET_MODE (op) != mode)
3700 return 0;
3701 if (GET_RTX_CLASS (code) != '<')
3702 return 0;
3703 inmode = GET_MODE (XEXP (op, 0));
3704
3705 if (inmode == CCFPmode || inmode == CCFPUmode)
3706 {
3707 enum rtx_code second_code, bypass_code;
3708 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3709 return (bypass_code == NIL && second_code == NIL);
3710 }
3711 switch (code)
3712 {
3713 case EQ: case NE:
3714 return 1;
3715 case LT: case GE:
3716 if (inmode == CCmode || inmode == CCGCmode
3717 || inmode == CCGOCmode || inmode == CCNOmode)
3718 return 1;
3719 return 0;
3720 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3721 if (inmode == CCmode)
3722 return 1;
3723 return 0;
3724 case GT: case LE:
3725 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3726 return 1;
3727 return 0;
3728 default:
3729 return 0;
3730 }
3731 }
3732
3733 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3734
3735 int
fcmov_comparison_operator(op,mode)3736 fcmov_comparison_operator (op, mode)
3737 register rtx op;
3738 enum machine_mode mode;
3739 {
3740 enum machine_mode inmode;
3741 enum rtx_code code = GET_CODE (op);
3742 if (mode != VOIDmode && GET_MODE (op) != mode)
3743 return 0;
3744 if (GET_RTX_CLASS (code) != '<')
3745 return 0;
3746 inmode = GET_MODE (XEXP (op, 0));
3747 if (inmode == CCFPmode || inmode == CCFPUmode)
3748 {
3749 enum rtx_code second_code, bypass_code;
3750 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3751 if (bypass_code != NIL || second_code != NIL)
3752 return 0;
3753 code = ix86_fp_compare_code_to_integer (code);
3754 }
3755 /* i387 supports just limited amount of conditional codes. */
3756 switch (code)
3757 {
3758 case LTU: case GTU: case LEU: case GEU:
3759 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3760 return 1;
3761 return 0;
3762 case ORDERED: case UNORDERED:
3763 case EQ: case NE:
3764 return 1;
3765 default:
3766 return 0;
3767 }
3768 }
3769
3770 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3771
3772 int
promotable_binary_operator(op,mode)3773 promotable_binary_operator (op, mode)
3774 register rtx op;
3775 enum machine_mode mode ATTRIBUTE_UNUSED;
3776 {
3777 switch (GET_CODE (op))
3778 {
3779 case MULT:
3780 /* Modern CPUs have same latency for HImode and SImode multiply,
3781 but 386 and 486 do HImode multiply faster. */
3782 return ix86_cpu > PROCESSOR_I486;
3783 case PLUS:
3784 case AND:
3785 case IOR:
3786 case XOR:
3787 case ASHIFT:
3788 return 1;
3789 default:
3790 return 0;
3791 }
3792 }
3793
3794 /* Nearly general operand, but accept any const_double, since we wish
3795 to be able to drop them into memory rather than have them get pulled
3796 into registers. */
3797
3798 int
cmp_fp_expander_operand(op,mode)3799 cmp_fp_expander_operand (op, mode)
3800 register rtx op;
3801 enum machine_mode mode;
3802 {
3803 if (mode != VOIDmode && mode != GET_MODE (op))
3804 return 0;
3805 if (GET_CODE (op) == CONST_DOUBLE)
3806 return 1;
3807 return general_operand (op, mode);
3808 }
3809
3810 /* Match an SI or HImode register for a zero_extract. */
3811
3812 int
ext_register_operand(op,mode)3813 ext_register_operand (op, mode)
3814 register rtx op;
3815 enum machine_mode mode ATTRIBUTE_UNUSED;
3816 {
3817 int regno;
3818 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3819 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3820 return 0;
3821
3822 if (!register_operand (op, VOIDmode))
3823 return 0;
3824
3825 /* Be curefull to accept only registers having upper parts. */
3826 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3827 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3828 }
3829
3830 /* Return 1 if this is a valid binary floating-point operation.
3831 OP is the expression matched, and MODE is its mode. */
3832
3833 int
binary_fp_operator(op,mode)3834 binary_fp_operator (op, mode)
3835 register rtx op;
3836 enum machine_mode mode;
3837 {
3838 if (mode != VOIDmode && mode != GET_MODE (op))
3839 return 0;
3840
3841 switch (GET_CODE (op))
3842 {
3843 case PLUS:
3844 case MINUS:
3845 case MULT:
3846 case DIV:
3847 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3848
3849 default:
3850 return 0;
3851 }
3852 }
3853
3854 int
mult_operator(op,mode)3855 mult_operator (op, mode)
3856 register rtx op;
3857 enum machine_mode mode ATTRIBUTE_UNUSED;
3858 {
3859 return GET_CODE (op) == MULT;
3860 }
3861
3862 int
div_operator(op,mode)3863 div_operator (op, mode)
3864 register rtx op;
3865 enum machine_mode mode ATTRIBUTE_UNUSED;
3866 {
3867 return GET_CODE (op) == DIV;
3868 }
3869
3870 int
arith_or_logical_operator(op,mode)3871 arith_or_logical_operator (op, mode)
3872 rtx op;
3873 enum machine_mode mode;
3874 {
3875 return ((mode == VOIDmode || GET_MODE (op) == mode)
3876 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3877 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3878 }
3879
3880 /* Returns 1 if OP is memory operand with a displacement. */
3881
3882 int
memory_displacement_operand(op,mode)3883 memory_displacement_operand (op, mode)
3884 register rtx op;
3885 enum machine_mode mode;
3886 {
3887 struct ix86_address parts;
3888
3889 if (! memory_operand (op, mode))
3890 return 0;
3891
3892 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3893 abort ();
3894
3895 return parts.disp != NULL_RTX;
3896 }
3897
3898 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3899 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3900
3901 ??? It seems likely that this will only work because cmpsi is an
3902 expander, and no actual insns use this. */
3903
3904 int
cmpsi_operand(op,mode)3905 cmpsi_operand (op, mode)
3906 rtx op;
3907 enum machine_mode mode;
3908 {
3909 if (nonimmediate_operand (op, mode))
3910 return 1;
3911
3912 if (GET_CODE (op) == AND
3913 && GET_MODE (op) == SImode
3914 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3915 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3916 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3917 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3918 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3919 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3920 return 1;
3921
3922 return 0;
3923 }
3924
3925 /* Returns 1 if OP is memory operand that can not be represented by the
3926 modRM array. */
3927
3928 int
long_memory_operand(op,mode)3929 long_memory_operand (op, mode)
3930 register rtx op;
3931 enum machine_mode mode;
3932 {
3933 if (! memory_operand (op, mode))
3934 return 0;
3935
3936 return memory_address_length (op) != 0;
3937 }
3938
3939 /* Return nonzero if the rtx is known aligned. */
3940
3941 int
aligned_operand(op,mode)3942 aligned_operand (op, mode)
3943 rtx op;
3944 enum machine_mode mode;
3945 {
3946 struct ix86_address parts;
3947
3948 if (!general_operand (op, mode))
3949 return 0;
3950
3951 /* Registers and immediate operands are always "aligned". */
3952 if (GET_CODE (op) != MEM)
3953 return 1;
3954
3955 /* Don't even try to do any aligned optimizations with volatiles. */
3956 if (MEM_VOLATILE_P (op))
3957 return 0;
3958
3959 op = XEXP (op, 0);
3960
3961 /* Pushes and pops are only valid on the stack pointer. */
3962 if (GET_CODE (op) == PRE_DEC
3963 || GET_CODE (op) == POST_INC)
3964 return 1;
3965
3966 /* Decode the address. */
3967 if (! ix86_decompose_address (op, &parts))
3968 abort ();
3969
3970 /* Look for some component that isn't known to be aligned. */
3971 if (parts.index)
3972 {
3973 if (parts.scale < 4
3974 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3975 return 0;
3976 }
3977 if (parts.base)
3978 {
3979 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3980 return 0;
3981 }
3982 if (parts.disp)
3983 {
3984 if (GET_CODE (parts.disp) != CONST_INT
3985 || (INTVAL (parts.disp) & 3) != 0)
3986 return 0;
3987 }
3988
3989 /* Didn't find one -- this must be an aligned address. */
3990 return 1;
3991 }
3992
3993 int
compare_operator(op,mode)3994 compare_operator (op, mode)
3995 rtx op;
3996 enum machine_mode mode ATTRIBUTE_UNUSED;
3997 {
3998 return GET_CODE (op) == COMPARE;
3999 }
4000
4001 /* Return true if the constant is something that can be loaded with
4002 a special instruction. Only handle 0.0 and 1.0; others are less
4003 worthwhile. */
4004
4005 int
standard_80387_constant_p(x)4006 standard_80387_constant_p (x)
4007 rtx x;
4008 {
4009 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4010 return -1;
4011 /* Note that on the 80387, other constants, such as pi, that we should support
4012 too. On some machines, these are much slower to load as standard constant,
4013 than to load from doubles in memory. */
4014 if (x == CONST0_RTX (GET_MODE (x)))
4015 return 1;
4016 if (x == CONST1_RTX (GET_MODE (x)))
4017 return 2;
4018 return 0;
4019 }
4020
4021 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4022 */
4023 int
standard_sse_constant_p(x)4024 standard_sse_constant_p (x)
4025 rtx x;
4026 {
4027 if (x == const0_rtx)
4028 return 1;
4029 return (x == CONST0_RTX (GET_MODE (x)));
4030 }
4031
4032 /* Returns 1 if OP contains a symbol reference */
4033
4034 int
symbolic_reference_mentioned_p(op)4035 symbolic_reference_mentioned_p (op)
4036 rtx op;
4037 {
4038 register const char *fmt;
4039 register int i;
4040
4041 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4042 return 1;
4043
4044 fmt = GET_RTX_FORMAT (GET_CODE (op));
4045 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4046 {
4047 if (fmt[i] == 'E')
4048 {
4049 register int j;
4050
4051 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4052 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4053 return 1;
4054 }
4055
4056 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4057 return 1;
4058 }
4059
4060 return 0;
4061 }
4062
4063 /* Return 1 if it is appropriate to emit `ret' instructions in the
4064 body of a function. Do this only if the epilogue is simple, needing a
4065 couple of insns. Prior to reloading, we can't tell how many registers
4066 must be saved, so return 0 then. Return 0 if there is no frame
4067 marker to de-allocate.
4068
4069 If NON_SAVING_SETJMP is defined and true, then it is not possible
4070 for the epilogue to be simple, so return 0. This is a special case
4071 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4072 until final, but jump_optimize may need to know sooner if a
4073 `return' is OK. */
4074
4075 int
ix86_can_use_return_insn_p()4076 ix86_can_use_return_insn_p ()
4077 {
4078 struct ix86_frame frame;
4079
4080 #ifdef NON_SAVING_SETJMP
4081 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4082 return 0;
4083 #endif
4084
4085 if (! reload_completed || frame_pointer_needed)
4086 return 0;
4087
4088 /* Don't allow more than 32 pop, since that's all we can do
4089 with one instruction. */
4090 if (current_function_pops_args
4091 && current_function_args_size >= 32768)
4092 return 0;
4093
4094 ix86_compute_frame_layout (&frame);
4095 return frame.to_allocate == 0 && frame.nregs == 0;
4096 }
4097
4098 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4099 int
x86_64_sign_extended_value(value)4100 x86_64_sign_extended_value (value)
4101 rtx value;
4102 {
4103 switch (GET_CODE (value))
4104 {
4105 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4106 to be at least 32 and this all acceptable constants are
4107 represented as CONST_INT. */
4108 case CONST_INT:
4109 if (HOST_BITS_PER_WIDE_INT == 32)
4110 return 1;
4111 else
4112 {
4113 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4114 return trunc_int_for_mode (val, SImode) == val;
4115 }
4116 break;
4117
4118 /* For certain code models, the symbolic references are known to fit.
4119 in CM_SMALL_PIC model we know it fits if it is local to the shared
4120 library. Don't count TLS SYMBOL_REFs here, since they should fit
4121 only if inside of UNSPEC handled below. */
4122 case SYMBOL_REF:
4123 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4124
4125 /* For certain code models, the code is near as well. */
4126 case LABEL_REF:
4127 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4128 || ix86_cmodel == CM_KERNEL);
4129
4130 /* We also may accept the offsetted memory references in certain special
4131 cases. */
4132 case CONST:
4133 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4134 switch (XINT (XEXP (value, 0), 1))
4135 {
4136 case UNSPEC_GOTPCREL:
4137 case UNSPEC_DTPOFF:
4138 case UNSPEC_GOTNTPOFF:
4139 case UNSPEC_NTPOFF:
4140 return 1;
4141 default:
4142 break;
4143 }
4144 if (GET_CODE (XEXP (value, 0)) == PLUS)
4145 {
4146 rtx op1 = XEXP (XEXP (value, 0), 0);
4147 rtx op2 = XEXP (XEXP (value, 0), 1);
4148 HOST_WIDE_INT offset;
4149
4150 if (ix86_cmodel == CM_LARGE)
4151 return 0;
4152 if (GET_CODE (op2) != CONST_INT)
4153 return 0;
4154 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4155 switch (GET_CODE (op1))
4156 {
4157 case SYMBOL_REF:
4158 /* For CM_SMALL assume that latest object is 16MB before
4159 end of 31bits boundary. We may also accept pretty
4160 large negative constants knowing that all objects are
4161 in the positive half of address space. */
4162 if (ix86_cmodel == CM_SMALL
4163 && offset < 16*1024*1024
4164 && trunc_int_for_mode (offset, SImode) == offset)
4165 return 1;
4166 /* For CM_KERNEL we know that all object resist in the
4167 negative half of 32bits address space. We may not
4168 accept negative offsets, since they may be just off
4169 and we may accept pretty large positive ones. */
4170 if (ix86_cmodel == CM_KERNEL
4171 && offset > 0
4172 && trunc_int_for_mode (offset, SImode) == offset)
4173 return 1;
4174 break;
4175 case LABEL_REF:
4176 /* These conditions are similar to SYMBOL_REF ones, just the
4177 constraints for code models differ. */
4178 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4179 && offset < 16*1024*1024
4180 && trunc_int_for_mode (offset, SImode) == offset)
4181 return 1;
4182 if (ix86_cmodel == CM_KERNEL
4183 && offset > 0
4184 && trunc_int_for_mode (offset, SImode) == offset)
4185 return 1;
4186 break;
4187 case UNSPEC:
4188 switch (XINT (op1, 1))
4189 {
4190 case UNSPEC_DTPOFF:
4191 case UNSPEC_NTPOFF:
4192 if (offset > 0
4193 && trunc_int_for_mode (offset, SImode) == offset)
4194 return 1;
4195 }
4196 break;
4197 default:
4198 return 0;
4199 }
4200 }
4201 return 0;
4202 default:
4203 return 0;
4204 }
4205 }
4206
4207 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4208 int
x86_64_zero_extended_value(value)4209 x86_64_zero_extended_value (value)
4210 rtx value;
4211 {
4212 switch (GET_CODE (value))
4213 {
4214 case CONST_DOUBLE:
4215 if (HOST_BITS_PER_WIDE_INT == 32)
4216 return (GET_MODE (value) == VOIDmode
4217 && !CONST_DOUBLE_HIGH (value));
4218 else
4219 return 0;
4220 case CONST_INT:
4221 if (HOST_BITS_PER_WIDE_INT == 32)
4222 return INTVAL (value) >= 0;
4223 else
4224 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4225 break;
4226
4227 /* For certain code models, the symbolic references are known to fit. */
4228 case SYMBOL_REF:
4229 return ix86_cmodel == CM_SMALL;
4230
4231 /* For certain code models, the code is near as well. */
4232 case LABEL_REF:
4233 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4234
4235 /* We also may accept the offsetted memory references in certain special
4236 cases. */
4237 case CONST:
4238 if (GET_CODE (XEXP (value, 0)) == PLUS)
4239 {
4240 rtx op1 = XEXP (XEXP (value, 0), 0);
4241 rtx op2 = XEXP (XEXP (value, 0), 1);
4242
4243 if (ix86_cmodel == CM_LARGE)
4244 return 0;
4245 switch (GET_CODE (op1))
4246 {
4247 case SYMBOL_REF:
4248 return 0;
4249 /* For small code model we may accept pretty large positive
4250 offsets, since one bit is available for free. Negative
4251 offsets are limited by the size of NULL pointer area
4252 specified by the ABI. */
4253 if (ix86_cmodel == CM_SMALL
4254 && GET_CODE (op2) == CONST_INT
4255 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4256 && (trunc_int_for_mode (INTVAL (op2), SImode)
4257 == INTVAL (op2)))
4258 return 1;
4259 /* ??? For the kernel, we may accept adjustment of
4260 -0x10000000, since we know that it will just convert
4261 negative address space to positive, but perhaps this
4262 is not worthwhile. */
4263 break;
4264 case LABEL_REF:
4265 /* These conditions are similar to SYMBOL_REF ones, just the
4266 constraints for code models differ. */
4267 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4268 && GET_CODE (op2) == CONST_INT
4269 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4270 && (trunc_int_for_mode (INTVAL (op2), SImode)
4271 == INTVAL (op2)))
4272 return 1;
4273 break;
4274 default:
4275 return 0;
4276 }
4277 }
4278 return 0;
4279 default:
4280 return 0;
4281 }
4282 }
4283
4284 /* Value should be nonzero if functions must have frame pointers.
4285 Zero means the frame pointer need not be set up (and parms may
4286 be accessed via the stack pointer) in functions that seem suitable. */
4287
4288 int
ix86_frame_pointer_required()4289 ix86_frame_pointer_required ()
4290 {
4291 /* If we accessed previous frames, then the generated code expects
4292 to be able to access the saved ebp value in our frame. */
4293 if (cfun->machine->accesses_prev_frame)
4294 return 1;
4295
4296 /* Several x86 os'es need a frame pointer for other reasons,
4297 usually pertaining to setjmp. */
4298 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4299 return 1;
4300
4301 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4302 the frame pointer by default. Turn it back on now if we've not
4303 got a leaf function. */
4304 if (TARGET_OMIT_LEAF_FRAME_POINTER
4305 && (!current_function_is_leaf))
4306 return 1;
4307
4308 if (current_function_profile)
4309 return 1;
4310
4311 return 0;
4312 }
4313
4314 /* Record that the current function accesses previous call frames. */
4315
4316 void
ix86_setup_frame_addresses()4317 ix86_setup_frame_addresses ()
4318 {
4319 cfun->machine->accesses_prev_frame = 1;
4320 }
4321
4322 #if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY)
4323 # define USE_HIDDEN_LINKONCE 1
4324 #else
4325 # define USE_HIDDEN_LINKONCE 0
4326 #endif
4327
4328 static int pic_labels_used;
4329
4330 /* Fills in the label name that should be used for a pc thunk for
4331 the given register. */
4332
4333 static void
get_pc_thunk_name(name,regno)4334 get_pc_thunk_name (name, regno)
4335 char name[32];
4336 unsigned int regno;
4337 {
4338 if (USE_HIDDEN_LINKONCE)
4339 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4340 else
4341 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4342 }
4343
4344
4345 /* This function generates code for -fpic that loads %ebx with
4346 the return address of the caller and then returns. */
4347
4348 void
ix86_asm_file_end(file)4349 ix86_asm_file_end (file)
4350 FILE *file;
4351 {
4352 rtx xops[2];
4353 int regno;
4354
4355 for (regno = 0; regno < 8; ++regno)
4356 {
4357 char name[32];
4358
4359 if (! ((pic_labels_used >> regno) & 1))
4360 continue;
4361
4362 get_pc_thunk_name (name, regno);
4363
4364 if (USE_HIDDEN_LINKONCE)
4365 {
4366 tree decl;
4367
4368 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4369 error_mark_node);
4370 TREE_PUBLIC (decl) = 1;
4371 TREE_STATIC (decl) = 1;
4372 DECL_ONE_ONLY (decl) = 1;
4373
4374 (*targetm.asm_out.unique_section) (decl, 0);
4375 named_section (decl, NULL, 0);
4376
4377 (*targetm.asm_out.globalize_label) (file, name);
4378 fputs ("\t.hidden\t", file);
4379 assemble_name (file, name);
4380 fputc ('\n', file);
4381 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4382 }
4383 else
4384 {
4385 text_section ();
4386 ASM_OUTPUT_LABEL (file, name);
4387 }
4388
4389 xops[0] = gen_rtx_REG (SImode, regno);
4390 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4391 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4392 output_asm_insn ("ret", xops);
4393 }
4394
4395 #ifdef SUBTARGET_FILE_END
4396 SUBTARGET_FILE_END (file);
4397 #endif
4398 }
4399
4400 /* Emit code for the SET_GOT patterns. */
4401
4402 const char *
output_set_got(dest)4403 output_set_got (dest)
4404 rtx dest;
4405 {
4406 rtx xops[3];
4407
4408 xops[0] = dest;
4409 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4410
4411 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4412 {
4413 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4414
4415 if (!flag_pic)
4416 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4417 else
4418 output_asm_insn ("call\t%a2", xops);
4419
4420 #if TARGET_MACHO
4421 /* Output the "canonical" label name ("Lxx$pb") here too. This
4422 is what will be referred to by the Mach-O PIC subsystem. */
4423 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4424 #endif
4425 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4426 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4427
4428 if (flag_pic)
4429 output_asm_insn ("pop{l}\t%0", xops);
4430 }
4431 else
4432 {
4433 char name[32];
4434 get_pc_thunk_name (name, REGNO (dest));
4435 pic_labels_used |= 1 << REGNO (dest);
4436
4437 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4438 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4439 output_asm_insn ("call\t%X2", xops);
4440 }
4441
4442 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4443 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4444 else if (!TARGET_MACHO)
4445 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4446
4447 return "";
4448 }
4449
4450 /* Generate an "push" pattern for input ARG. */
4451
4452 static rtx
gen_push(arg)4453 gen_push (arg)
4454 rtx arg;
4455 {
4456 return gen_rtx_SET (VOIDmode,
4457 gen_rtx_MEM (Pmode,
4458 gen_rtx_PRE_DEC (Pmode,
4459 stack_pointer_rtx)),
4460 arg);
4461 }
4462
4463 /* Return >= 0 if there is an unused call-clobbered register available
4464 for the entire function. */
4465
4466 static unsigned int
ix86_select_alt_pic_regnum()4467 ix86_select_alt_pic_regnum ()
4468 {
4469 if (current_function_is_leaf && !current_function_profile)
4470 {
4471 int i;
4472 for (i = 2; i >= 0; --i)
4473 if (!regs_ever_live[i])
4474 return i;
4475 }
4476
4477 return INVALID_REGNUM;
4478 }
4479
4480 /* Return 1 if we need to save REGNO. */
4481 static int
ix86_save_reg(regno,maybe_eh_return)4482 ix86_save_reg (regno, maybe_eh_return)
4483 unsigned int regno;
4484 int maybe_eh_return;
4485 {
4486 if (pic_offset_table_rtx
4487 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4488 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4489 || current_function_profile
4490 || current_function_calls_eh_return
4491 || current_function_uses_const_pool))
4492 {
4493 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4494 return 0;
4495 return 1;
4496 }
4497
4498 if (current_function_calls_eh_return && maybe_eh_return)
4499 {
4500 unsigned i;
4501 for (i = 0; ; i++)
4502 {
4503 unsigned test = EH_RETURN_DATA_REGNO (i);
4504 if (test == INVALID_REGNUM)
4505 break;
4506 if (test == regno)
4507 return 1;
4508 }
4509 }
4510
4511 return (regs_ever_live[regno]
4512 && !call_used_regs[regno]
4513 && !fixed_regs[regno]
4514 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4515 }
4516
4517 /* Return number of registers to be saved on the stack. */
4518
4519 static int
ix86_nsaved_regs()4520 ix86_nsaved_regs ()
4521 {
4522 int nregs = 0;
4523 int regno;
4524
4525 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4526 if (ix86_save_reg (regno, true))
4527 nregs++;
4528 return nregs;
4529 }
4530
4531 /* Return the offset between two registers, one to be eliminated, and the other
4532 its replacement, at the start of a routine. */
4533
4534 HOST_WIDE_INT
ix86_initial_elimination_offset(from,to)4535 ix86_initial_elimination_offset (from, to)
4536 int from;
4537 int to;
4538 {
4539 struct ix86_frame frame;
4540 ix86_compute_frame_layout (&frame);
4541
4542 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4543 return frame.hard_frame_pointer_offset;
4544 else if (from == FRAME_POINTER_REGNUM
4545 && to == HARD_FRAME_POINTER_REGNUM)
4546 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4547 else
4548 {
4549 if (to != STACK_POINTER_REGNUM)
4550 abort ();
4551 else if (from == ARG_POINTER_REGNUM)
4552 return frame.stack_pointer_offset;
4553 else if (from != FRAME_POINTER_REGNUM)
4554 abort ();
4555 else
4556 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4557 }
4558 }
4559
4560 /* Fill structure ix86_frame about frame of currently computed function. */
4561
4562 static void
ix86_compute_frame_layout(frame)4563 ix86_compute_frame_layout (frame)
4564 struct ix86_frame *frame;
4565 {
4566 HOST_WIDE_INT total_size;
4567 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4568 int offset;
4569 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4570 HOST_WIDE_INT size = get_frame_size ();
4571
4572 frame->local_size = size;
4573 frame->nregs = ix86_nsaved_regs ();
4574 total_size = size;
4575
4576 /* Skip return address and saved base pointer. */
4577 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4578
4579 frame->hard_frame_pointer_offset = offset;
4580
4581 /* Do some sanity checking of stack_alignment_needed and
4582 preferred_alignment, since i386 port is the only using those features
4583 that may break easily. */
4584
4585 if (size && !stack_alignment_needed)
4586 abort ();
4587 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4588 abort ();
4589 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4590 abort ();
4591 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4592 abort ();
4593
4594 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4595 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4596
4597 /* Register save area */
4598 offset += frame->nregs * UNITS_PER_WORD;
4599
4600 /* Va-arg area */
4601 if (ix86_save_varrargs_registers)
4602 {
4603 offset += X86_64_VARARGS_SIZE;
4604 frame->va_arg_size = X86_64_VARARGS_SIZE;
4605 }
4606 else
4607 frame->va_arg_size = 0;
4608
4609 /* Align start of frame for local function. */
4610 frame->padding1 = ((offset + stack_alignment_needed - 1)
4611 & -stack_alignment_needed) - offset;
4612
4613 offset += frame->padding1;
4614
4615 /* Frame pointer points here. */
4616 frame->frame_pointer_offset = offset;
4617
4618 offset += size;
4619
4620 /* Add outgoing arguments area. Can be skipped if we eliminated
4621 all the function calls as dead code.
4622 Skipping is however impossible when function calls alloca. Alloca
4623 expander assumes that last current_function_outgoing_args_size
4624 of stack frame are unused. */
4625 if (ACCUMULATE_OUTGOING_ARGS
4626 && (!current_function_is_leaf || current_function_calls_alloca))
4627 {
4628 offset += current_function_outgoing_args_size;
4629 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4630 }
4631 else
4632 frame->outgoing_arguments_size = 0;
4633
4634 /* Align stack boundary. Only needed if we're calling another function
4635 or using alloca. */
4636 if (!current_function_is_leaf || current_function_calls_alloca)
4637 frame->padding2 = ((offset + preferred_alignment - 1)
4638 & -preferred_alignment) - offset;
4639 else
4640 frame->padding2 = 0;
4641
4642 offset += frame->padding2;
4643
4644 /* We've reached end of stack frame. */
4645 frame->stack_pointer_offset = offset;
4646
4647 /* Size prologue needs to allocate. */
4648 frame->to_allocate =
4649 (size + frame->padding1 + frame->padding2
4650 + frame->outgoing_arguments_size + frame->va_arg_size);
4651
4652 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4653 && current_function_is_leaf)
4654 {
4655 frame->red_zone_size = frame->to_allocate;
4656 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4657 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4658 }
4659 else
4660 frame->red_zone_size = 0;
4661 frame->to_allocate -= frame->red_zone_size;
4662 frame->stack_pointer_offset -= frame->red_zone_size;
4663 #if 0
4664 fprintf (stderr, "nregs: %i\n", frame->nregs);
4665 fprintf (stderr, "size: %i\n", size);
4666 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4667 fprintf (stderr, "padding1: %i\n", frame->padding1);
4668 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4669 fprintf (stderr, "padding2: %i\n", frame->padding2);
4670 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4671 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4672 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4673 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4674 frame->hard_frame_pointer_offset);
4675 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4676 #endif
4677 }
4678
4679 /* Emit code to save registers in the prologue. */
4680
4681 static void
ix86_emit_save_regs()4682 ix86_emit_save_regs ()
4683 {
4684 register int regno;
4685 rtx insn;
4686
4687 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4688 if (ix86_save_reg (regno, true))
4689 {
4690 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4691 RTX_FRAME_RELATED_P (insn) = 1;
4692 }
4693 }
4694
4695 /* Emit code to save registers using MOV insns. First register
4696 is restored from POINTER + OFFSET. */
4697 static void
ix86_emit_save_regs_using_mov(pointer,offset)4698 ix86_emit_save_regs_using_mov (pointer, offset)
4699 rtx pointer;
4700 HOST_WIDE_INT offset;
4701 {
4702 int regno;
4703 rtx insn;
4704
4705 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4706 if (ix86_save_reg (regno, true))
4707 {
4708 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4709 Pmode, offset),
4710 gen_rtx_REG (Pmode, regno));
4711 RTX_FRAME_RELATED_P (insn) = 1;
4712 offset += UNITS_PER_WORD;
4713 }
4714 }
4715
4716 /* Expand the prologue into a bunch of separate insns. */
4717
4718 void
ix86_expand_prologue()4719 ix86_expand_prologue ()
4720 {
4721 rtx insn;
4722 bool pic_reg_used;
4723 struct ix86_frame frame;
4724 int use_mov = 0;
4725 HOST_WIDE_INT allocate;
4726
4727 if (!optimize_size)
4728 {
4729 use_fast_prologue_epilogue
4730 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4731 if (TARGET_PROLOGUE_USING_MOVE)
4732 use_mov = use_fast_prologue_epilogue;
4733 }
4734 ix86_compute_frame_layout (&frame);
4735
4736 if (warn_stack_larger_than && frame.local_size > stack_larger_than_size)
4737 warning ("stack usage is %d bytes", frame.local_size);
4738
4739 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4740 slower on all targets. Also sdb doesn't like it. */
4741
4742 if (frame_pointer_needed)
4743 {
4744 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4745 RTX_FRAME_RELATED_P (insn) = 1;
4746
4747 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4748 RTX_FRAME_RELATED_P (insn) = 1;
4749 }
4750
4751 allocate = frame.to_allocate;
4752 /* In case we are dealing only with single register and empty frame,
4753 push is equivalent of the mov+add sequence. */
4754 if (allocate == 0 && frame.nregs <= 1)
4755 use_mov = 0;
4756
4757 if (!use_mov)
4758 ix86_emit_save_regs ();
4759 else
4760 allocate += frame.nregs * UNITS_PER_WORD;
4761
4762 if (allocate == 0)
4763 ;
4764 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4765 {
4766 insn = emit_insn (gen_pro_epilogue_adjust_stack
4767 (stack_pointer_rtx, stack_pointer_rtx,
4768 GEN_INT (-allocate)));
4769 RTX_FRAME_RELATED_P (insn) = 1;
4770 }
4771 else
4772 {
4773 /* ??? Is this only valid for Win32? */
4774
4775 rtx arg0, sym;
4776
4777 if (TARGET_64BIT)
4778 abort ();
4779
4780 arg0 = gen_rtx_REG (SImode, 0);
4781 emit_move_insn (arg0, GEN_INT (allocate));
4782
4783 sym = gen_rtx_MEM (FUNCTION_MODE,
4784 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4785 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4786
4787 CALL_INSN_FUNCTION_USAGE (insn)
4788 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4789 CALL_INSN_FUNCTION_USAGE (insn));
4790
4791 /* Don't allow scheduling pass to move insns across __alloca
4792 call. */
4793 emit_insn (gen_blockage (const0_rtx));
4794 }
4795 if (use_mov)
4796 {
4797 if (!frame_pointer_needed || !frame.to_allocate)
4798 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4799 else
4800 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4801 -frame.nregs * UNITS_PER_WORD);
4802 }
4803
4804 #ifdef SUBTARGET_PROLOGUE
4805 SUBTARGET_PROLOGUE;
4806 #endif
4807
4808 pic_reg_used = false;
4809 if (pic_offset_table_rtx
4810 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4811 || current_function_profile))
4812 {
4813 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4814
4815 if (alt_pic_reg_used != INVALID_REGNUM)
4816 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4817
4818 pic_reg_used = true;
4819 }
4820
4821 if (pic_reg_used)
4822 {
4823 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4824
4825 /* Even with accurate pre-reload life analysis, we can wind up
4826 deleting all references to the pic register after reload.
4827 Consider if cross-jumping unifies two sides of a branch
4828 controled by a comparison vs the only read from a global.
4829 In which case, allow the set_got to be deleted, though we're
4830 too late to do anything about the ebx save in the prologue. */
4831 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4832 }
4833
4834 /* Prevent function calls from be scheduled before the call to mcount.
4835 In the pic_reg_used case, make sure that the got load isn't deleted. */
4836 if (current_function_profile)
4837 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4838 }
4839
4840 /* Emit code to restore saved registers using MOV insns. First register
4841 is restored from POINTER + OFFSET. */
4842 static void
ix86_emit_restore_regs_using_mov(pointer,offset,maybe_eh_return)4843 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4844 rtx pointer;
4845 int offset;
4846 int maybe_eh_return;
4847 {
4848 int regno;
4849
4850 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4851 if (ix86_save_reg (regno, maybe_eh_return))
4852 {
4853 emit_move_insn (gen_rtx_REG (Pmode, regno),
4854 adjust_address (gen_rtx_MEM (Pmode, pointer),
4855 Pmode, offset));
4856 offset += UNITS_PER_WORD;
4857 }
4858 }
4859
4860 /* Restore function stack, frame, and registers. */
4861
4862 void
ix86_expand_epilogue(style)4863 ix86_expand_epilogue (style)
4864 int style;
4865 {
4866 int regno;
4867 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4868 struct ix86_frame frame;
4869 HOST_WIDE_INT offset;
4870
4871 ix86_compute_frame_layout (&frame);
4872
4873 /* Calculate start of saved registers relative to ebp. Special care
4874 must be taken for the normal return case of a function using
4875 eh_return: the eax and edx registers are marked as saved, but not
4876 restored along this path. */
4877 offset = frame.nregs;
4878 if (current_function_calls_eh_return && style != 2)
4879 offset -= 2;
4880 offset *= -UNITS_PER_WORD;
4881
4882 /* If we're only restoring one register and sp is not valid then
4883 using a move instruction to restore the register since it's
4884 less work than reloading sp and popping the register.
4885
4886 The default code result in stack adjustment using add/lea instruction,
4887 while this code results in LEAVE instruction (or discrete equivalent),
4888 so it is profitable in some other cases as well. Especially when there
4889 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4890 and there is exactly one register to pop. This heruistic may need some
4891 tuning in future. */
4892 if ((!sp_valid && frame.nregs <= 1)
4893 || (TARGET_EPILOGUE_USING_MOVE
4894 && use_fast_prologue_epilogue
4895 && (frame.nregs > 1 || frame.to_allocate))
4896 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4897 || (frame_pointer_needed && TARGET_USE_LEAVE
4898 && use_fast_prologue_epilogue && frame.nregs == 1)
4899 || current_function_calls_eh_return)
4900 {
4901 /* Restore registers. We can use ebp or esp to address the memory
4902 locations. If both are available, default to ebp, since offsets
4903 are known to be small. Only exception is esp pointing directly to the
4904 end of block of saved registers, where we may simplify addressing
4905 mode. */
4906
4907 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4908 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4909 frame.to_allocate, style == 2);
4910 else
4911 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4912 offset, style == 2);
4913
4914 /* eh_return epilogues need %ecx added to the stack pointer. */
4915 if (style == 2)
4916 {
4917 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4918
4919 if (frame_pointer_needed)
4920 {
4921 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4922 tmp = plus_constant (tmp, UNITS_PER_WORD);
4923 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4924
4925 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4926 emit_move_insn (hard_frame_pointer_rtx, tmp);
4927
4928 emit_insn (gen_pro_epilogue_adjust_stack
4929 (stack_pointer_rtx, sa, const0_rtx));
4930 }
4931 else
4932 {
4933 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4934 tmp = plus_constant (tmp, (frame.to_allocate
4935 + frame.nregs * UNITS_PER_WORD));
4936 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4937 }
4938 }
4939 else if (!frame_pointer_needed)
4940 emit_insn (gen_pro_epilogue_adjust_stack
4941 (stack_pointer_rtx, stack_pointer_rtx,
4942 GEN_INT (frame.to_allocate
4943 + frame.nregs * UNITS_PER_WORD)));
4944 /* If not an i386, mov & pop is faster than "leave". */
4945 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4946 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4947 else
4948 {
4949 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4950 hard_frame_pointer_rtx,
4951 const0_rtx));
4952 if (TARGET_64BIT)
4953 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4954 else
4955 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4956 }
4957 }
4958 else
4959 {
4960 /* First step is to deallocate the stack frame so that we can
4961 pop the registers. */
4962 if (!sp_valid)
4963 {
4964 if (!frame_pointer_needed)
4965 abort ();
4966 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4967 hard_frame_pointer_rtx,
4968 GEN_INT (offset)));
4969 }
4970 else if (frame.to_allocate)
4971 emit_insn (gen_pro_epilogue_adjust_stack
4972 (stack_pointer_rtx, stack_pointer_rtx,
4973 GEN_INT (frame.to_allocate)));
4974
4975 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4976 if (ix86_save_reg (regno, false))
4977 {
4978 if (TARGET_64BIT)
4979 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4980 else
4981 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4982 }
4983 if (frame_pointer_needed)
4984 {
4985 /* Leave results in shorter dependency chains on CPUs that are
4986 able to grok it fast. */
4987 if (TARGET_USE_LEAVE)
4988 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4989 else if (TARGET_64BIT)
4990 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4991 else
4992 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4993 }
4994 }
4995
4996 /* Sibcall epilogues don't want a return instruction. */
4997 if (style == 0)
4998 return;
4999
5000 if (current_function_pops_args && current_function_args_size)
5001 {
5002 rtx popc = GEN_INT (current_function_pops_args);
5003
5004 /* i386 can only pop 64K bytes. If asked to pop more, pop
5005 return address, do explicit add, and jump indirectly to the
5006 caller. */
5007
5008 if (current_function_pops_args >= 65536)
5009 {
5010 rtx ecx = gen_rtx_REG (SImode, 2);
5011
5012 /* There are is no "pascal" calling convention in 64bit ABI. */
5013 if (TARGET_64BIT)
5014 abort ();
5015
5016 emit_insn (gen_popsi1 (ecx));
5017 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5018 emit_jump_insn (gen_return_indirect_internal (ecx));
5019 }
5020 else
5021 emit_jump_insn (gen_return_pop_internal (popc));
5022 }
5023 else
5024 emit_jump_insn (gen_return_internal ());
5025 }
5026
5027 /* Reset from the function's potential modifications. */
5028
5029 static void
ix86_output_function_epilogue(file,size)5030 ix86_output_function_epilogue (file, size)
5031 FILE *file ATTRIBUTE_UNUSED;
5032 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5033 {
5034 if (pic_offset_table_rtx)
5035 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5036 }
5037
5038 /* Extract the parts of an RTL expression that is a valid memory address
5039 for an instruction. Return 0 if the structure of the address is
5040 grossly off. Return -1 if the address contains ASHIFT, so it is not
5041 strictly valid, but still used for computing length of lea instruction.
5042 */
5043
5044 static int
ix86_decompose_address(addr,out)5045 ix86_decompose_address (addr, out)
5046 register rtx addr;
5047 struct ix86_address *out;
5048 {
5049 rtx base = NULL_RTX;
5050 rtx index = NULL_RTX;
5051 rtx disp = NULL_RTX;
5052 HOST_WIDE_INT scale = 1;
5053 rtx scale_rtx = NULL_RTX;
5054 int retval = 1;
5055
5056 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5057 base = addr;
5058 else if (GET_CODE (addr) == PLUS)
5059 {
5060 rtx op0 = XEXP (addr, 0);
5061 rtx op1 = XEXP (addr, 1);
5062 enum rtx_code code0 = GET_CODE (op0);
5063 enum rtx_code code1 = GET_CODE (op1);
5064
5065 if (code0 == REG || code0 == SUBREG)
5066 {
5067 if (code1 == REG || code1 == SUBREG)
5068 index = op0, base = op1; /* index + base */
5069 else
5070 base = op0, disp = op1; /* base + displacement */
5071 }
5072 else if (code0 == MULT)
5073 {
5074 index = XEXP (op0, 0);
5075 scale_rtx = XEXP (op0, 1);
5076 if (code1 == REG || code1 == SUBREG)
5077 base = op1; /* index*scale + base */
5078 else
5079 disp = op1; /* index*scale + disp */
5080 }
5081 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5082 {
5083 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5084 scale_rtx = XEXP (XEXP (op0, 0), 1);
5085 base = XEXP (op0, 1);
5086 disp = op1;
5087 }
5088 else if (code0 == PLUS)
5089 {
5090 index = XEXP (op0, 0); /* index + base + disp */
5091 base = XEXP (op0, 1);
5092 disp = op1;
5093 }
5094 else
5095 return 0;
5096 }
5097 else if (GET_CODE (addr) == MULT)
5098 {
5099 index = XEXP (addr, 0); /* index*scale */
5100 scale_rtx = XEXP (addr, 1);
5101 }
5102 else if (GET_CODE (addr) == ASHIFT)
5103 {
5104 rtx tmp;
5105
5106 /* We're called for lea too, which implements ashift on occasion. */
5107 index = XEXP (addr, 0);
5108 tmp = XEXP (addr, 1);
5109 if (GET_CODE (tmp) != CONST_INT)
5110 return 0;
5111 scale = INTVAL (tmp);
5112 if ((unsigned HOST_WIDE_INT) scale > 3)
5113 return 0;
5114 scale = 1 << scale;
5115 retval = -1;
5116 }
5117 else
5118 disp = addr; /* displacement */
5119
5120 /* Extract the integral value of scale. */
5121 if (scale_rtx)
5122 {
5123 if (GET_CODE (scale_rtx) != CONST_INT)
5124 return 0;
5125 scale = INTVAL (scale_rtx);
5126 }
5127
5128 /* Allow arg pointer and stack pointer as index if there is not scaling */
5129 if (base && index && scale == 1
5130 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5131 || index == stack_pointer_rtx))
5132 {
5133 rtx tmp = base;
5134 base = index;
5135 index = tmp;
5136 }
5137
5138 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5139 if ((base == hard_frame_pointer_rtx
5140 || base == frame_pointer_rtx
5141 || base == arg_pointer_rtx) && !disp)
5142 disp = const0_rtx;
5143
5144 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5145 Avoid this by transforming to [%esi+0]. */
5146 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5147 && base && !index && !disp
5148 && REG_P (base)
5149 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5150 disp = const0_rtx;
5151
5152 /* Special case: encode reg+reg instead of reg*2. */
5153 if (!base && index && scale && scale == 2)
5154 base = index, scale = 1;
5155
5156 /* Special case: scaling cannot be encoded without base or displacement. */
5157 if (!base && !disp && index && scale != 1)
5158 disp = const0_rtx;
5159
5160 out->base = base;
5161 out->index = index;
5162 out->disp = disp;
5163 out->scale = scale;
5164
5165 return retval;
5166 }
5167
5168 /* Return cost of the memory address x.
5169 For i386, it is better to use a complex address than let gcc copy
5170 the address into a reg and make a new pseudo. But not if the address
5171 requires to two regs - that would mean more pseudos with longer
5172 lifetimes. */
5173 int
ix86_address_cost(x)5174 ix86_address_cost (x)
5175 rtx x;
5176 {
5177 struct ix86_address parts;
5178 int cost = 1;
5179
5180 if (!ix86_decompose_address (x, &parts))
5181 abort ();
5182
5183 /* More complex memory references are better. */
5184 if (parts.disp && parts.disp != const0_rtx)
5185 cost--;
5186
5187 /* Attempt to minimize number of registers in the address. */
5188 if ((parts.base
5189 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5190 || (parts.index
5191 && (!REG_P (parts.index)
5192 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5193 cost++;
5194
5195 if (parts.base
5196 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5197 && parts.index
5198 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5199 && parts.base != parts.index)
5200 cost++;
5201
5202 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5203 since it's predecode logic can't detect the length of instructions
5204 and it degenerates to vector decoded. Increase cost of such
5205 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5206 to split such addresses or even refuse such addresses at all.
5207
5208 Following addressing modes are affected:
5209 [base+scale*index]
5210 [scale*index+disp]
5211 [base+index]
5212
5213 The first and last case may be avoidable by explicitly coding the zero in
5214 memory address, but I don't have AMD-K6 machine handy to check this
5215 theory. */
5216
5217 if (TARGET_K6
5218 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5219 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5220 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5221 cost += 10;
5222
5223 return cost;
5224 }
5225
5226 /* If X is a machine specific address (i.e. a symbol or label being
5227 referenced as a displacement from the GOT implemented using an
5228 UNSPEC), then return the base term. Otherwise return X. */
5229
5230 rtx
ix86_find_base_term(x)5231 ix86_find_base_term (x)
5232 rtx x;
5233 {
5234 rtx term;
5235
5236 if (TARGET_64BIT)
5237 {
5238 if (GET_CODE (x) != CONST)
5239 return x;
5240 term = XEXP (x, 0);
5241 if (GET_CODE (term) == PLUS
5242 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5243 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5244 term = XEXP (term, 0);
5245 if (GET_CODE (term) != UNSPEC
5246 || XINT (term, 1) != UNSPEC_GOTPCREL)
5247 return x;
5248
5249 term = XVECEXP (term, 0, 0);
5250
5251 if (GET_CODE (term) != SYMBOL_REF
5252 && GET_CODE (term) != LABEL_REF)
5253 return x;
5254
5255 return term;
5256 }
5257
5258 if (GET_CODE (x) != PLUS
5259 || XEXP (x, 0) != pic_offset_table_rtx
5260 || GET_CODE (XEXP (x, 1)) != CONST)
5261 return x;
5262
5263 term = XEXP (XEXP (x, 1), 0);
5264
5265 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5266 term = XEXP (term, 0);
5267
5268 if (GET_CODE (term) != UNSPEC
5269 || XINT (term, 1) != UNSPEC_GOTOFF)
5270 return x;
5271
5272 term = XVECEXP (term, 0, 0);
5273
5274 if (GET_CODE (term) != SYMBOL_REF
5275 && GET_CODE (term) != LABEL_REF)
5276 return x;
5277
5278 return term;
5279 }
5280
5281 /* Determine if a given RTX is a valid constant. We already know this
5282 satisfies CONSTANT_P. */
5283
5284 bool
legitimate_constant_p(x)5285 legitimate_constant_p (x)
5286 rtx x;
5287 {
5288 rtx inner;
5289
5290 switch (GET_CODE (x))
5291 {
5292 case SYMBOL_REF:
5293 /* TLS symbols are not constant. */
5294 if (tls_symbolic_operand (x, Pmode))
5295 return false;
5296 break;
5297
5298 case CONST:
5299 inner = XEXP (x, 0);
5300
5301 /* Offsets of TLS symbols are never valid.
5302 Discourage CSE from creating them. */
5303 if (GET_CODE (inner) == PLUS
5304 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5305 return false;
5306
5307 /* Only some unspecs are valid as "constants". */
5308 if (GET_CODE (inner) == UNSPEC)
5309 switch (XINT (inner, 1))
5310 {
5311 case UNSPEC_TPOFF:
5312 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5313 default:
5314 return false;
5315 }
5316 break;
5317
5318 default:
5319 break;
5320 }
5321
5322 /* Otherwise we handle everything else in the move patterns. */
5323 return true;
5324 }
5325
5326 /* Determine if it's legal to put X into the constant pool. This
5327 is not possible for the address of thread-local symbols, which
5328 is checked above. */
5329
5330 static bool
ix86_cannot_force_const_mem(x)5331 ix86_cannot_force_const_mem (x)
5332 rtx x;
5333 {
5334 return !legitimate_constant_p (x);
5335 }
5336
5337 /* Determine if a given RTX is a valid constant address. */
5338
5339 bool
constant_address_p(x)5340 constant_address_p (x)
5341 rtx x;
5342 {
5343 switch (GET_CODE (x))
5344 {
5345 case LABEL_REF:
5346 case CONST_INT:
5347 return true;
5348
5349 case CONST_DOUBLE:
5350 return TARGET_64BIT;
5351
5352 case CONST:
5353 /* For Mach-O, really believe the CONST. */
5354 if (TARGET_MACHO)
5355 return true;
5356 /* Otherwise fall through. */
5357 case SYMBOL_REF:
5358 return !flag_pic && legitimate_constant_p (x);
5359
5360 default:
5361 return false;
5362 }
5363 }
5364
5365 /* Nonzero if the constant value X is a legitimate general operand
5366 when generating PIC code. It is given that flag_pic is on and
5367 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5368
5369 bool
legitimate_pic_operand_p(x)5370 legitimate_pic_operand_p (x)
5371 rtx x;
5372 {
5373 rtx inner;
5374
5375 switch (GET_CODE (x))
5376 {
5377 case CONST:
5378 inner = XEXP (x, 0);
5379
5380 /* Only some unspecs are valid as "constants". */
5381 if (GET_CODE (inner) == UNSPEC)
5382 switch (XINT (inner, 1))
5383 {
5384 case UNSPEC_TPOFF:
5385 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5386 default:
5387 return false;
5388 }
5389 /* FALLTHRU */
5390
5391 case SYMBOL_REF:
5392 case LABEL_REF:
5393 return legitimate_pic_address_disp_p (x);
5394
5395 default:
5396 return true;
5397 }
5398 }
5399
5400 /* Determine if a given CONST RTX is a valid memory displacement
5401 in PIC mode. */
5402
5403 int
legitimate_pic_address_disp_p(disp)5404 legitimate_pic_address_disp_p (disp)
5405 register rtx disp;
5406 {
5407 bool saw_plus;
5408
5409 /* In 64bit mode we can allow direct addresses of symbols and labels
5410 when they are not dynamic symbols. */
5411 if (TARGET_64BIT)
5412 {
5413 /* TLS references should always be enclosed in UNSPEC. */
5414 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5415 return 0;
5416 if (GET_CODE (disp) == SYMBOL_REF
5417 && ix86_cmodel == CM_SMALL_PIC
5418 && (CONSTANT_POOL_ADDRESS_P (disp)
5419 || SYMBOL_REF_FLAG (disp)))
5420 return 1;
5421 if (GET_CODE (disp) == LABEL_REF)
5422 return 1;
5423 if (GET_CODE (disp) == CONST
5424 && GET_CODE (XEXP (disp, 0)) == PLUS
5425 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5426 && ix86_cmodel == CM_SMALL_PIC
5427 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5428 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5429 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5430 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5431 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5432 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5433 return 1;
5434 }
5435 if (GET_CODE (disp) != CONST)
5436 return 0;
5437 disp = XEXP (disp, 0);
5438
5439 if (TARGET_64BIT)
5440 {
5441 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5442 of GOT tables. We should not need these anyway. */
5443 if (GET_CODE (disp) != UNSPEC
5444 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5445 return 0;
5446
5447 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5448 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5449 return 0;
5450 return 1;
5451 }
5452
5453 saw_plus = false;
5454 if (GET_CODE (disp) == PLUS)
5455 {
5456 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5457 return 0;
5458 disp = XEXP (disp, 0);
5459 saw_plus = true;
5460 }
5461
5462 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5463 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5464 {
5465 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5466 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5467 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5468 {
5469 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5470 if (strstr (sym_name, "$pb") != 0)
5471 return 1;
5472 }
5473 }
5474
5475 if (GET_CODE (disp) != UNSPEC)
5476 return 0;
5477
5478 switch (XINT (disp, 1))
5479 {
5480 case UNSPEC_GOT:
5481 if (saw_plus)
5482 return false;
5483 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5484 case UNSPEC_GOTOFF:
5485 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5486 case UNSPEC_GOTTPOFF:
5487 case UNSPEC_GOTNTPOFF:
5488 case UNSPEC_INDNTPOFF:
5489 if (saw_plus)
5490 return false;
5491 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5492 case UNSPEC_NTPOFF:
5493 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5494 case UNSPEC_DTPOFF:
5495 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5496 }
5497
5498 return 0;
5499 }
5500
5501 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5502 memory address for an instruction. The MODE argument is the machine mode
5503 for the MEM expression that wants to use this address.
5504
5505 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5506 convert common non-canonical forms to canonical form so that they will
5507 be recognized. */
5508
5509 int
legitimate_address_p(mode,addr,strict)5510 legitimate_address_p (mode, addr, strict)
5511 enum machine_mode mode;
5512 register rtx addr;
5513 int strict;
5514 {
5515 struct ix86_address parts;
5516 rtx base, index, disp;
5517 HOST_WIDE_INT scale;
5518 const char *reason = NULL;
5519 rtx reason_rtx = NULL_RTX;
5520
5521 if (TARGET_DEBUG_ADDR)
5522 {
5523 fprintf (stderr,
5524 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5525 GET_MODE_NAME (mode), strict);
5526 debug_rtx (addr);
5527 }
5528
5529 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5530 {
5531 if (TARGET_DEBUG_ADDR)
5532 fprintf (stderr, "Success.\n");
5533 return TRUE;
5534 }
5535
5536 if (ix86_decompose_address (addr, &parts) <= 0)
5537 {
5538 reason = "decomposition failed";
5539 goto report_error;
5540 }
5541
5542 base = parts.base;
5543 index = parts.index;
5544 disp = parts.disp;
5545 scale = parts.scale;
5546
5547 /* Validate base register.
5548
5549 Don't allow SUBREG's here, it can lead to spill failures when the base
5550 is one word out of a two word structure, which is represented internally
5551 as a DImode int. */
5552
5553 if (base)
5554 {
5555 reason_rtx = base;
5556
5557 if (GET_CODE (base) != REG)
5558 {
5559 reason = "base is not a register";
5560 goto report_error;
5561 }
5562
5563 if (GET_MODE (base) != Pmode)
5564 {
5565 reason = "base is not in Pmode";
5566 goto report_error;
5567 }
5568
5569 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5570 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5571 {
5572 reason = "base is not valid";
5573 goto report_error;
5574 }
5575 }
5576
5577 /* Validate index register.
5578
5579 Don't allow SUBREG's here, it can lead to spill failures when the index
5580 is one word out of a two word structure, which is represented internally
5581 as a DImode int. */
5582
5583 if (index)
5584 {
5585 reason_rtx = index;
5586
5587 if (GET_CODE (index) != REG)
5588 {
5589 reason = "index is not a register";
5590 goto report_error;
5591 }
5592
5593 if (GET_MODE (index) != Pmode)
5594 {
5595 reason = "index is not in Pmode";
5596 goto report_error;
5597 }
5598
5599 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5600 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5601 {
5602 reason = "index is not valid";
5603 goto report_error;
5604 }
5605 }
5606
5607 /* Validate scale factor. */
5608 if (scale != 1)
5609 {
5610 reason_rtx = GEN_INT (scale);
5611 if (!index)
5612 {
5613 reason = "scale without index";
5614 goto report_error;
5615 }
5616
5617 if (scale != 2 && scale != 4 && scale != 8)
5618 {
5619 reason = "scale is not a valid multiplier";
5620 goto report_error;
5621 }
5622 }
5623
5624 /* Validate displacement. */
5625 if (disp)
5626 {
5627 reason_rtx = disp;
5628
5629 if (GET_CODE (disp) == CONST
5630 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5631 switch (XINT (XEXP (disp, 0), 1))
5632 {
5633 case UNSPEC_GOT:
5634 case UNSPEC_GOTOFF:
5635 case UNSPEC_GOTPCREL:
5636 if (!flag_pic)
5637 abort ();
5638 goto is_legitimate_pic;
5639
5640 case UNSPEC_GOTTPOFF:
5641 case UNSPEC_GOTNTPOFF:
5642 case UNSPEC_INDNTPOFF:
5643 case UNSPEC_NTPOFF:
5644 case UNSPEC_DTPOFF:
5645 break;
5646
5647 default:
5648 reason = "invalid address unspec";
5649 goto report_error;
5650 }
5651
5652 else if (flag_pic && (SYMBOLIC_CONST (disp)
5653 #if TARGET_MACHO
5654 && !machopic_operand_p (disp)
5655 #endif
5656 ))
5657 {
5658 is_legitimate_pic:
5659 if (TARGET_64BIT && (index || base))
5660 {
5661 /* foo@dtpoff(%rX) is ok. */
5662 if (GET_CODE (disp) != CONST
5663 || GET_CODE (XEXP (disp, 0)) != PLUS
5664 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5665 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5666 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5667 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5668 {
5669 reason = "non-constant pic memory reference";
5670 goto report_error;
5671 }
5672 }
5673 else if (! legitimate_pic_address_disp_p (disp))
5674 {
5675 reason = "displacement is an invalid pic construct";
5676 goto report_error;
5677 }
5678
5679 /* This code used to verify that a symbolic pic displacement
5680 includes the pic_offset_table_rtx register.
5681
5682 While this is good idea, unfortunately these constructs may
5683 be created by "adds using lea" optimization for incorrect
5684 code like:
5685
5686 int a;
5687 int foo(int i)
5688 {
5689 return *(&a+i);
5690 }
5691
5692 This code is nonsensical, but results in addressing
5693 GOT table with pic_offset_table_rtx base. We can't
5694 just refuse it easily, since it gets matched by
5695 "addsi3" pattern, that later gets split to lea in the
5696 case output register differs from input. While this
5697 can be handled by separate addsi pattern for this case
5698 that never results in lea, this seems to be easier and
5699 correct fix for crash to disable this test. */
5700 }
5701 else if (!CONSTANT_ADDRESS_P (disp))
5702 {
5703 reason = "displacement is not constant";
5704 goto report_error;
5705 }
5706 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5707 {
5708 reason = "displacement is out of range";
5709 goto report_error;
5710 }
5711 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5712 {
5713 reason = "displacement is a const_double";
5714 goto report_error;
5715 }
5716 }
5717
5718 /* Everything looks valid. */
5719 if (TARGET_DEBUG_ADDR)
5720 fprintf (stderr, "Success.\n");
5721 return TRUE;
5722
5723 report_error:
5724 if (TARGET_DEBUG_ADDR)
5725 {
5726 fprintf (stderr, "Error: %s\n", reason);
5727 debug_rtx (reason_rtx);
5728 }
5729 return FALSE;
5730 }
5731
5732 /* Return an unique alias set for the GOT. */
5733
5734 static HOST_WIDE_INT
ix86_GOT_alias_set()5735 ix86_GOT_alias_set ()
5736 {
5737 static HOST_WIDE_INT set = -1;
5738 if (set == -1)
5739 set = new_alias_set ();
5740 return set;
5741 }
5742
5743 /* Return a legitimate reference for ORIG (an address) using the
5744 register REG. If REG is 0, a new pseudo is generated.
5745
5746 There are two types of references that must be handled:
5747
5748 1. Global data references must load the address from the GOT, via
5749 the PIC reg. An insn is emitted to do this load, and the reg is
5750 returned.
5751
5752 2. Static data references, constant pool addresses, and code labels
5753 compute the address as an offset from the GOT, whose base is in
5754 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5755 differentiate them from global data objects. The returned
5756 address is the PIC reg + an unspec constant.
5757
5758 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5759 reg also appears in the address. */
5760
5761 rtx
legitimize_pic_address(orig,reg)5762 legitimize_pic_address (orig, reg)
5763 rtx orig;
5764 rtx reg;
5765 {
5766 rtx addr = orig;
5767 rtx new = orig;
5768 rtx base;
5769
5770 #if TARGET_MACHO
5771 if (reg == 0)
5772 reg = gen_reg_rtx (Pmode);
5773 /* Use the generic Mach-O PIC machinery. */
5774 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5775 #endif
5776
5777 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5778 new = addr;
5779 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5780 {
5781 /* This symbol may be referenced via a displacement from the PIC
5782 base address (@GOTOFF). */
5783
5784 if (reload_in_progress)
5785 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5786 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5787 new = gen_rtx_CONST (Pmode, new);
5788 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5789
5790 if (reg != 0)
5791 {
5792 emit_move_insn (reg, new);
5793 new = reg;
5794 }
5795 }
5796 else if (GET_CODE (addr) == SYMBOL_REF)
5797 {
5798 if (TARGET_64BIT)
5799 {
5800 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5801 new = gen_rtx_CONST (Pmode, new);
5802 new = gen_rtx_MEM (Pmode, new);
5803 RTX_UNCHANGING_P (new) = 1;
5804 set_mem_alias_set (new, ix86_GOT_alias_set ());
5805
5806 if (reg == 0)
5807 reg = gen_reg_rtx (Pmode);
5808 /* Use directly gen_movsi, otherwise the address is loaded
5809 into register for CSE. We don't want to CSE this addresses,
5810 instead we CSE addresses from the GOT table, so skip this. */
5811 emit_insn (gen_movsi (reg, new));
5812 new = reg;
5813 }
5814 else
5815 {
5816 /* This symbol must be referenced via a load from the
5817 Global Offset Table (@GOT). */
5818
5819 if (reload_in_progress)
5820 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5821 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5822 new = gen_rtx_CONST (Pmode, new);
5823 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5824 new = gen_rtx_MEM (Pmode, new);
5825 RTX_UNCHANGING_P (new) = 1;
5826 set_mem_alias_set (new, ix86_GOT_alias_set ());
5827
5828 if (reg == 0)
5829 reg = gen_reg_rtx (Pmode);
5830 emit_move_insn (reg, new);
5831 new = reg;
5832 }
5833 }
5834 else
5835 {
5836 if (GET_CODE (addr) == CONST)
5837 {
5838 addr = XEXP (addr, 0);
5839
5840 /* We must match stuff we generate before. Assume the only
5841 unspecs that can get here are ours. Not that we could do
5842 anything with them anyway... */
5843 if (GET_CODE (addr) == UNSPEC
5844 || (GET_CODE (addr) == PLUS
5845 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5846 return orig;
5847 if (GET_CODE (addr) != PLUS)
5848 abort ();
5849 }
5850 if (GET_CODE (addr) == PLUS)
5851 {
5852 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5853
5854 /* Check first to see if this is a constant offset from a @GOTOFF
5855 symbol reference. */
5856 if (local_symbolic_operand (op0, Pmode)
5857 && GET_CODE (op1) == CONST_INT)
5858 {
5859 if (!TARGET_64BIT)
5860 {
5861 if (reload_in_progress)
5862 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5863 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5864 UNSPEC_GOTOFF);
5865 new = gen_rtx_PLUS (Pmode, new, op1);
5866 new = gen_rtx_CONST (Pmode, new);
5867 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5868
5869 if (reg != 0)
5870 {
5871 emit_move_insn (reg, new);
5872 new = reg;
5873 }
5874 }
5875 else
5876 {
5877 if (INTVAL (op1) < -16*1024*1024
5878 || INTVAL (op1) >= 16*1024*1024)
5879 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5880 }
5881 }
5882 else
5883 {
5884 base = legitimize_pic_address (XEXP (addr, 0), reg);
5885 new = legitimize_pic_address (XEXP (addr, 1),
5886 base == reg ? NULL_RTX : reg);
5887
5888 if (GET_CODE (new) == CONST_INT)
5889 new = plus_constant (base, INTVAL (new));
5890 else
5891 {
5892 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5893 {
5894 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5895 new = XEXP (new, 1);
5896 }
5897 new = gen_rtx_PLUS (Pmode, base, new);
5898 }
5899 }
5900 }
5901 }
5902 return new;
5903 }
5904
5905 static void
ix86_encode_section_info(decl,first)5906 ix86_encode_section_info (decl, first)
5907 tree decl;
5908 int first ATTRIBUTE_UNUSED;
5909 {
5910 bool local_p = (*targetm.binds_local_p) (decl);
5911 rtx rtl, symbol;
5912
5913 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5914 if (GET_CODE (rtl) != MEM)
5915 return;
5916 symbol = XEXP (rtl, 0);
5917 if (GET_CODE (symbol) != SYMBOL_REF)
5918 return;
5919
5920 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5921 symbol so that we may access it directly in the GOT. */
5922
5923 if (flag_pic)
5924 SYMBOL_REF_FLAG (symbol) = local_p;
5925
5926 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5927 "local dynamic", "initial exec" or "local exec" TLS models
5928 respectively. */
5929
5930 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5931 {
5932 const char *symbol_str;
5933 char *newstr;
5934 size_t len;
5935 enum tls_model kind = decl_tls_model (decl);
5936
5937 if (TARGET_64BIT && ! flag_pic)
5938 {
5939 /* x86-64 doesn't allow non-pic code for shared libraries,
5940 so don't generate GD/LD TLS models for non-pic code. */
5941 switch (kind)
5942 {
5943 case TLS_MODEL_GLOBAL_DYNAMIC:
5944 kind = TLS_MODEL_INITIAL_EXEC; break;
5945 case TLS_MODEL_LOCAL_DYNAMIC:
5946 kind = TLS_MODEL_LOCAL_EXEC; break;
5947 default:
5948 break;
5949 }
5950 }
5951
5952 symbol_str = XSTR (symbol, 0);
5953
5954 if (symbol_str[0] == '%')
5955 {
5956 if (symbol_str[1] == tls_model_chars[kind])
5957 return;
5958 symbol_str += 2;
5959 }
5960 len = strlen (symbol_str) + 1;
5961 newstr = alloca (len + 2);
5962
5963 newstr[0] = '%';
5964 newstr[1] = tls_model_chars[kind];
5965 memcpy (newstr + 2, symbol_str, len);
5966
5967 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5968 }
5969 }
5970
5971 /* Undo the above when printing symbol names. */
5972
5973 static const char *
ix86_strip_name_encoding(str)5974 ix86_strip_name_encoding (str)
5975 const char *str;
5976 {
5977 if (str[0] == '%')
5978 str += 2;
5979 if (str [0] == '*')
5980 str += 1;
5981 return str;
5982 }
5983
5984 /* Load the thread pointer into a register. */
5985
5986 static rtx
get_thread_pointer()5987 get_thread_pointer ()
5988 {
5989 rtx tp;
5990
5991 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5992 tp = gen_rtx_MEM (Pmode, tp);
5993 RTX_UNCHANGING_P (tp) = 1;
5994 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5995 tp = force_reg (Pmode, tp);
5996
5997 return tp;
5998 }
5999
6000 /* Try machine-dependent ways of modifying an illegitimate address
6001 to be legitimate. If we find one, return the new, valid address.
6002 This macro is used in only one place: `memory_address' in explow.c.
6003
6004 OLDX is the address as it was before break_out_memory_refs was called.
6005 In some cases it is useful to look at this to decide what needs to be done.
6006
6007 MODE and WIN are passed so that this macro can use
6008 GO_IF_LEGITIMATE_ADDRESS.
6009
6010 It is always safe for this macro to do nothing. It exists to recognize
6011 opportunities to optimize the output.
6012
6013 For the 80386, we handle X+REG by loading X into a register R and
6014 using R+REG. R will go in a general reg and indexing will be used.
6015 However, if REG is a broken-out memory address or multiplication,
6016 nothing needs to be done because REG can certainly go in a general reg.
6017
6018 When -fpic is used, special handling is needed for symbolic references.
6019 See comments by legitimize_pic_address in i386.c for details. */
6020
6021 rtx
legitimize_address(x,oldx,mode)6022 legitimize_address (x, oldx, mode)
6023 register rtx x;
6024 register rtx oldx ATTRIBUTE_UNUSED;
6025 enum machine_mode mode;
6026 {
6027 int changed = 0;
6028 unsigned log;
6029
6030 if (TARGET_DEBUG_ADDR)
6031 {
6032 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6033 GET_MODE_NAME (mode));
6034 debug_rtx (x);
6035 }
6036
6037 log = tls_symbolic_operand (x, mode);
6038 if (log)
6039 {
6040 rtx dest, base, off, pic;
6041 int type;
6042
6043 switch (log)
6044 {
6045 case TLS_MODEL_GLOBAL_DYNAMIC:
6046 dest = gen_reg_rtx (Pmode);
6047 if (TARGET_64BIT)
6048 {
6049 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6050
6051 start_sequence ();
6052 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6053 insns = get_insns ();
6054 end_sequence ();
6055
6056 emit_libcall_block (insns, dest, rax, x);
6057 }
6058 else
6059 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6060 break;
6061
6062 case TLS_MODEL_LOCAL_DYNAMIC:
6063 base = gen_reg_rtx (Pmode);
6064 if (TARGET_64BIT)
6065 {
6066 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6067
6068 start_sequence ();
6069 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6070 insns = get_insns ();
6071 end_sequence ();
6072
6073 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6074 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6075 emit_libcall_block (insns, base, rax, note);
6076 }
6077 else
6078 emit_insn (gen_tls_local_dynamic_base_32 (base));
6079
6080 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6081 off = gen_rtx_CONST (Pmode, off);
6082
6083 return gen_rtx_PLUS (Pmode, base, off);
6084
6085 case TLS_MODEL_INITIAL_EXEC:
6086 if (TARGET_64BIT)
6087 {
6088 pic = NULL;
6089 type = UNSPEC_GOTNTPOFF;
6090 }
6091 else if (flag_pic)
6092 {
6093 if (reload_in_progress)
6094 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6095 pic = pic_offset_table_rtx;
6096 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6097 }
6098 else if (!TARGET_GNU_TLS)
6099 {
6100 pic = gen_reg_rtx (Pmode);
6101 emit_insn (gen_set_got (pic));
6102 type = UNSPEC_GOTTPOFF;
6103 }
6104 else
6105 {
6106 pic = NULL;
6107 type = UNSPEC_INDNTPOFF;
6108 }
6109
6110 base = get_thread_pointer ();
6111
6112 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6113 off = gen_rtx_CONST (Pmode, off);
6114 if (pic)
6115 off = gen_rtx_PLUS (Pmode, pic, off);
6116 off = gen_rtx_MEM (Pmode, off);
6117 RTX_UNCHANGING_P (off) = 1;
6118 set_mem_alias_set (off, ix86_GOT_alias_set ());
6119 dest = gen_reg_rtx (Pmode);
6120
6121 if (TARGET_64BIT || TARGET_GNU_TLS)
6122 {
6123 emit_move_insn (dest, off);
6124 return gen_rtx_PLUS (Pmode, base, dest);
6125 }
6126 else
6127 emit_insn (gen_subsi3 (dest, base, off));
6128 break;
6129
6130 case TLS_MODEL_LOCAL_EXEC:
6131 base = get_thread_pointer ();
6132
6133 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6134 (TARGET_64BIT || TARGET_GNU_TLS)
6135 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6136 off = gen_rtx_CONST (Pmode, off);
6137
6138 if (TARGET_64BIT || TARGET_GNU_TLS)
6139 return gen_rtx_PLUS (Pmode, base, off);
6140 else
6141 {
6142 dest = gen_reg_rtx (Pmode);
6143 emit_insn (gen_subsi3 (dest, base, off));
6144 }
6145 break;
6146
6147 default:
6148 abort ();
6149 }
6150
6151 return dest;
6152 }
6153
6154 if (flag_pic && SYMBOLIC_CONST (x))
6155 return legitimize_pic_address (x, 0);
6156
6157 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6158 if (GET_CODE (x) == ASHIFT
6159 && GET_CODE (XEXP (x, 1)) == CONST_INT
6160 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6161 {
6162 changed = 1;
6163 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6164 GEN_INT (1 << log));
6165 }
6166
6167 if (GET_CODE (x) == PLUS)
6168 {
6169 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6170
6171 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6172 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6173 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6174 {
6175 changed = 1;
6176 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6177 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6178 GEN_INT (1 << log));
6179 }
6180
6181 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6182 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6183 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6184 {
6185 changed = 1;
6186 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6187 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6188 GEN_INT (1 << log));
6189 }
6190
6191 /* Put multiply first if it isn't already. */
6192 if (GET_CODE (XEXP (x, 1)) == MULT)
6193 {
6194 rtx tmp = XEXP (x, 0);
6195 XEXP (x, 0) = XEXP (x, 1);
6196 XEXP (x, 1) = tmp;
6197 changed = 1;
6198 }
6199
6200 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6201 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6202 created by virtual register instantiation, register elimination, and
6203 similar optimizations. */
6204 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6205 {
6206 changed = 1;
6207 x = gen_rtx_PLUS (Pmode,
6208 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6209 XEXP (XEXP (x, 1), 0)),
6210 XEXP (XEXP (x, 1), 1));
6211 }
6212
6213 /* Canonicalize
6214 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6215 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6216 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6217 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6218 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6219 && CONSTANT_P (XEXP (x, 1)))
6220 {
6221 rtx constant;
6222 rtx other = NULL_RTX;
6223
6224 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6225 {
6226 constant = XEXP (x, 1);
6227 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6228 }
6229 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6230 {
6231 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6232 other = XEXP (x, 1);
6233 }
6234 else
6235 constant = 0;
6236
6237 if (constant)
6238 {
6239 changed = 1;
6240 x = gen_rtx_PLUS (Pmode,
6241 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6242 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6243 plus_constant (other, INTVAL (constant)));
6244 }
6245 }
6246
6247 if (changed && legitimate_address_p (mode, x, FALSE))
6248 return x;
6249
6250 if (GET_CODE (XEXP (x, 0)) == MULT)
6251 {
6252 changed = 1;
6253 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6254 }
6255
6256 if (GET_CODE (XEXP (x, 1)) == MULT)
6257 {
6258 changed = 1;
6259 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6260 }
6261
6262 if (changed
6263 && GET_CODE (XEXP (x, 1)) == REG
6264 && GET_CODE (XEXP (x, 0)) == REG)
6265 return x;
6266
6267 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6268 {
6269 changed = 1;
6270 x = legitimize_pic_address (x, 0);
6271 }
6272
6273 if (changed && legitimate_address_p (mode, x, FALSE))
6274 return x;
6275
6276 if (GET_CODE (XEXP (x, 0)) == REG)
6277 {
6278 register rtx temp = gen_reg_rtx (Pmode);
6279 register rtx val = force_operand (XEXP (x, 1), temp);
6280 if (val != temp)
6281 emit_move_insn (temp, val);
6282
6283 XEXP (x, 1) = temp;
6284 return x;
6285 }
6286
6287 else if (GET_CODE (XEXP (x, 1)) == REG)
6288 {
6289 register rtx temp = gen_reg_rtx (Pmode);
6290 register rtx val = force_operand (XEXP (x, 0), temp);
6291 if (val != temp)
6292 emit_move_insn (temp, val);
6293
6294 XEXP (x, 0) = temp;
6295 return x;
6296 }
6297 }
6298
6299 return x;
6300 }
6301
6302 /* Print an integer constant expression in assembler syntax. Addition
6303 and subtraction are the only arithmetic that may appear in these
6304 expressions. FILE is the stdio stream to write to, X is the rtx, and
6305 CODE is the operand print code from the output string. */
6306
6307 static void
output_pic_addr_const(file,x,code)6308 output_pic_addr_const (file, x, code)
6309 FILE *file;
6310 rtx x;
6311 int code;
6312 {
6313 char buf[256];
6314
6315 switch (GET_CODE (x))
6316 {
6317 case PC:
6318 if (flag_pic)
6319 putc ('.', file);
6320 else
6321 abort ();
6322 break;
6323
6324 case SYMBOL_REF:
6325 assemble_name (file, XSTR (x, 0));
6326 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6327 fputs ("@PLT", file);
6328 break;
6329
6330 case LABEL_REF:
6331 x = XEXP (x, 0);
6332 /* FALLTHRU */
6333 case CODE_LABEL:
6334 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6335 assemble_name (asm_out_file, buf);
6336 break;
6337
6338 case CONST_INT:
6339 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6340 break;
6341
6342 case CONST:
6343 /* This used to output parentheses around the expression,
6344 but that does not work on the 386 (either ATT or BSD assembler). */
6345 output_pic_addr_const (file, XEXP (x, 0), code);
6346 break;
6347
6348 case CONST_DOUBLE:
6349 if (GET_MODE (x) == VOIDmode)
6350 {
6351 /* We can use %d if the number is <32 bits and positive. */
6352 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6353 fprintf (file, "0x%lx%08lx",
6354 (unsigned long) CONST_DOUBLE_HIGH (x),
6355 (unsigned long) CONST_DOUBLE_LOW (x));
6356 else
6357 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6358 }
6359 else
6360 /* We can't handle floating point constants;
6361 PRINT_OPERAND must handle them. */
6362 output_operand_lossage ("floating constant misused");
6363 break;
6364
6365 case PLUS:
6366 /* Some assemblers need integer constants to appear first. */
6367 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6368 {
6369 output_pic_addr_const (file, XEXP (x, 0), code);
6370 putc ('+', file);
6371 output_pic_addr_const (file, XEXP (x, 1), code);
6372 }
6373 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6374 {
6375 output_pic_addr_const (file, XEXP (x, 1), code);
6376 putc ('+', file);
6377 output_pic_addr_const (file, XEXP (x, 0), code);
6378 }
6379 else
6380 abort ();
6381 break;
6382
6383 case MINUS:
6384 if (!TARGET_MACHO)
6385 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6386 output_pic_addr_const (file, XEXP (x, 0), code);
6387 putc ('-', file);
6388 output_pic_addr_const (file, XEXP (x, 1), code);
6389 if (!TARGET_MACHO)
6390 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6391 break;
6392
6393 case UNSPEC:
6394 if (XVECLEN (x, 0) != 1)
6395 abort ();
6396 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6397 switch (XINT (x, 1))
6398 {
6399 case UNSPEC_GOT:
6400 fputs ("@GOT", file);
6401 break;
6402 case UNSPEC_GOTOFF:
6403 fputs ("@GOTOFF", file);
6404 break;
6405 case UNSPEC_GOTPCREL:
6406 fputs ("@GOTPCREL(%rip)", file);
6407 break;
6408 case UNSPEC_GOTTPOFF:
6409 /* FIXME: This might be @TPOFF in Sun ld too. */
6410 fputs ("@GOTTPOFF", file);
6411 break;
6412 case UNSPEC_TPOFF:
6413 fputs ("@TPOFF", file);
6414 break;
6415 case UNSPEC_NTPOFF:
6416 if (TARGET_64BIT)
6417 fputs ("@TPOFF", file);
6418 else
6419 fputs ("@NTPOFF", file);
6420 break;
6421 case UNSPEC_DTPOFF:
6422 fputs ("@DTPOFF", file);
6423 break;
6424 case UNSPEC_GOTNTPOFF:
6425 if (TARGET_64BIT)
6426 fputs ("@GOTTPOFF(%rip)", file);
6427 else
6428 fputs ("@GOTNTPOFF", file);
6429 break;
6430 case UNSPEC_INDNTPOFF:
6431 fputs ("@INDNTPOFF", file);
6432 break;
6433 default:
6434 output_operand_lossage ("invalid UNSPEC as operand");
6435 break;
6436 }
6437 break;
6438
6439 default:
6440 output_operand_lossage ("invalid expression as operand");
6441 }
6442 }
6443
6444 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6445 We need to handle our special PIC relocations. */
6446
6447 void
i386_dwarf_output_addr_const(file,x)6448 i386_dwarf_output_addr_const (file, x)
6449 FILE *file;
6450 rtx x;
6451 {
6452 #ifdef ASM_QUAD
6453 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6454 #else
6455 if (TARGET_64BIT)
6456 abort ();
6457 fprintf (file, "%s", ASM_LONG);
6458 #endif
6459 if (flag_pic)
6460 output_pic_addr_const (file, x, '\0');
6461 else
6462 output_addr_const (file, x);
6463 fputc ('\n', file);
6464 }
6465
6466 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6467 We need to emit DTP-relative relocations. */
6468
6469 void
i386_output_dwarf_dtprel(file,size,x)6470 i386_output_dwarf_dtprel (file, size, x)
6471 FILE *file;
6472 int size;
6473 rtx x;
6474 {
6475 fputs (ASM_LONG, file);
6476 output_addr_const (file, x);
6477 fputs ("@DTPOFF", file);
6478 switch (size)
6479 {
6480 case 4:
6481 break;
6482 case 8:
6483 fputs (", 0", file);
6484 break;
6485 default:
6486 abort ();
6487 }
6488 }
6489
6490 /* In the name of slightly smaller debug output, and to cater to
6491 general assembler losage, recognize PIC+GOTOFF and turn it back
6492 into a direct symbol reference. */
6493
6494 rtx
i386_simplify_dwarf_addr(orig_x)6495 i386_simplify_dwarf_addr (orig_x)
6496 rtx orig_x;
6497 {
6498 rtx x = orig_x, y;
6499
6500 if (GET_CODE (x) == MEM)
6501 x = XEXP (x, 0);
6502
6503 if (TARGET_64BIT)
6504 {
6505 if (GET_CODE (x) != CONST
6506 || GET_CODE (XEXP (x, 0)) != UNSPEC
6507 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6508 || GET_CODE (orig_x) != MEM)
6509 return orig_x;
6510 return XVECEXP (XEXP (x, 0), 0, 0);
6511 }
6512
6513 if (GET_CODE (x) != PLUS
6514 || GET_CODE (XEXP (x, 1)) != CONST)
6515 return orig_x;
6516
6517 if (GET_CODE (XEXP (x, 0)) == REG
6518 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6519 /* %ebx + GOT/GOTOFF */
6520 y = NULL;
6521 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6522 {
6523 /* %ebx + %reg * scale + GOT/GOTOFF */
6524 y = XEXP (x, 0);
6525 if (GET_CODE (XEXP (y, 0)) == REG
6526 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6527 y = XEXP (y, 1);
6528 else if (GET_CODE (XEXP (y, 1)) == REG
6529 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6530 y = XEXP (y, 0);
6531 else
6532 return orig_x;
6533 if (GET_CODE (y) != REG
6534 && GET_CODE (y) != MULT
6535 && GET_CODE (y) != ASHIFT)
6536 return orig_x;
6537 }
6538 else
6539 return orig_x;
6540
6541 x = XEXP (XEXP (x, 1), 0);
6542 if (GET_CODE (x) == UNSPEC
6543 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6544 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6545 {
6546 if (y)
6547 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6548 return XVECEXP (x, 0, 0);
6549 }
6550
6551 if (GET_CODE (x) == PLUS
6552 && GET_CODE (XEXP (x, 0)) == UNSPEC
6553 && GET_CODE (XEXP (x, 1)) == CONST_INT
6554 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6555 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6556 && GET_CODE (orig_x) != MEM)))
6557 {
6558 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6559 if (y)
6560 return gen_rtx_PLUS (Pmode, y, x);
6561 return x;
6562 }
6563
6564 return orig_x;
6565 }
6566
6567 static void
put_condition_code(code,mode,reverse,fp,file)6568 put_condition_code (code, mode, reverse, fp, file)
6569 enum rtx_code code;
6570 enum machine_mode mode;
6571 int reverse, fp;
6572 FILE *file;
6573 {
6574 const char *suffix;
6575
6576 if (mode == CCFPmode || mode == CCFPUmode)
6577 {
6578 enum rtx_code second_code, bypass_code;
6579 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6580 if (bypass_code != NIL || second_code != NIL)
6581 abort ();
6582 code = ix86_fp_compare_code_to_integer (code);
6583 mode = CCmode;
6584 }
6585 if (reverse)
6586 code = reverse_condition (code);
6587
6588 switch (code)
6589 {
6590 case EQ:
6591 suffix = "e";
6592 break;
6593 case NE:
6594 suffix = "ne";
6595 break;
6596 case GT:
6597 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6598 abort ();
6599 suffix = "g";
6600 break;
6601 case GTU:
6602 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6603 Those same assemblers have the same but opposite losage on cmov. */
6604 if (mode != CCmode)
6605 abort ();
6606 suffix = fp ? "nbe" : "a";
6607 break;
6608 case LT:
6609 if (mode == CCNOmode || mode == CCGOCmode)
6610 suffix = "s";
6611 else if (mode == CCmode || mode == CCGCmode)
6612 suffix = "l";
6613 else
6614 abort ();
6615 break;
6616 case LTU:
6617 if (mode != CCmode)
6618 abort ();
6619 suffix = "b";
6620 break;
6621 case GE:
6622 if (mode == CCNOmode || mode == CCGOCmode)
6623 suffix = "ns";
6624 else if (mode == CCmode || mode == CCGCmode)
6625 suffix = "ge";
6626 else
6627 abort ();
6628 break;
6629 case GEU:
6630 /* ??? As above. */
6631 if (mode != CCmode)
6632 abort ();
6633 suffix = fp ? "nb" : "ae";
6634 break;
6635 case LE:
6636 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6637 abort ();
6638 suffix = "le";
6639 break;
6640 case LEU:
6641 if (mode != CCmode)
6642 abort ();
6643 suffix = "be";
6644 break;
6645 case UNORDERED:
6646 suffix = fp ? "u" : "p";
6647 break;
6648 case ORDERED:
6649 suffix = fp ? "nu" : "np";
6650 break;
6651 default:
6652 abort ();
6653 }
6654 fputs (suffix, file);
6655 }
6656
6657 void
print_reg(x,code,file)6658 print_reg (x, code, file)
6659 rtx x;
6660 int code;
6661 FILE *file;
6662 {
6663 if (REGNO (x) == ARG_POINTER_REGNUM
6664 || REGNO (x) == FRAME_POINTER_REGNUM
6665 || REGNO (x) == FLAGS_REG
6666 || REGNO (x) == FPSR_REG)
6667 abort ();
6668
6669 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6670 putc ('%', file);
6671
6672 if (code == 'w' || MMX_REG_P (x))
6673 code = 2;
6674 else if (code == 'b')
6675 code = 1;
6676 else if (code == 'k')
6677 code = 4;
6678 else if (code == 'q')
6679 code = 8;
6680 else if (code == 'y')
6681 code = 3;
6682 else if (code == 'h')
6683 code = 0;
6684 else
6685 code = GET_MODE_SIZE (GET_MODE (x));
6686
6687 /* Irritatingly, AMD extended registers use different naming convention
6688 from the normal registers. */
6689 if (REX_INT_REG_P (x))
6690 {
6691 if (!TARGET_64BIT)
6692 abort ();
6693 switch (code)
6694 {
6695 case 0:
6696 error ("extended registers have no high halves");
6697 break;
6698 case 1:
6699 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6700 break;
6701 case 2:
6702 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6703 break;
6704 case 4:
6705 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6706 break;
6707 case 8:
6708 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6709 break;
6710 default:
6711 error ("unsupported operand size for extended register");
6712 break;
6713 }
6714 return;
6715 }
6716 switch (code)
6717 {
6718 case 3:
6719 if (STACK_TOP_P (x))
6720 {
6721 fputs ("st(0)", file);
6722 break;
6723 }
6724 /* FALLTHRU */
6725 case 8:
6726 case 4:
6727 case 12:
6728 if (! ANY_FP_REG_P (x))
6729 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6730 /* FALLTHRU */
6731 case 16:
6732 case 2:
6733 fputs (hi_reg_name[REGNO (x)], file);
6734 break;
6735 case 1:
6736 fputs (qi_reg_name[REGNO (x)], file);
6737 break;
6738 case 0:
6739 fputs (qi_high_reg_name[REGNO (x)], file);
6740 break;
6741 default:
6742 abort ();
6743 }
6744 }
6745
6746 /* Locate some local-dynamic symbol still in use by this function
6747 so that we can print its name in some tls_local_dynamic_base
6748 pattern. */
6749
6750 static const char *
get_some_local_dynamic_name()6751 get_some_local_dynamic_name ()
6752 {
6753 rtx insn;
6754
6755 if (cfun->machine->some_ld_name)
6756 return cfun->machine->some_ld_name;
6757
6758 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6759 if (INSN_P (insn)
6760 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6761 return cfun->machine->some_ld_name;
6762
6763 abort ();
6764 }
6765
6766 static int
get_some_local_dynamic_name_1(px,data)6767 get_some_local_dynamic_name_1 (px, data)
6768 rtx *px;
6769 void *data ATTRIBUTE_UNUSED;
6770 {
6771 rtx x = *px;
6772
6773 if (GET_CODE (x) == SYMBOL_REF
6774 && local_dynamic_symbolic_operand (x, Pmode))
6775 {
6776 cfun->machine->some_ld_name = XSTR (x, 0);
6777 return 1;
6778 }
6779
6780 return 0;
6781 }
6782
6783 /* Meaning of CODE:
6784 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6785 C -- print opcode suffix for set/cmov insn.
6786 c -- like C, but print reversed condition
6787 F,f -- likewise, but for floating-point.
6788 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6789 otherwise nothing
6790 R -- print the prefix for register names.
6791 z -- print the opcode suffix for the size of the current operand.
6792 * -- print a star (in certain assembler syntax)
6793 A -- print an absolute memory reference.
6794 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6795 s -- print a shift double count, followed by the assemblers argument
6796 delimiter.
6797 b -- print the QImode name of the register for the indicated operand.
6798 %b0 would print %al if operands[0] is reg 0.
6799 w -- likewise, print the HImode name of the register.
6800 k -- likewise, print the SImode name of the register.
6801 q -- likewise, print the DImode name of the register.
6802 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6803 y -- print "st(0)" instead of "st" as a register.
6804 D -- print condition for SSE cmp instruction.
6805 P -- if PIC, print an @PLT suffix.
6806 X -- don't print any sort of PIC '@' suffix for a symbol.
6807 & -- print some in-use local-dynamic symbol name.
6808 */
6809
6810 void
print_operand(file,x,code)6811 print_operand (file, x, code)
6812 FILE *file;
6813 rtx x;
6814 int code;
6815 {
6816 if (code)
6817 {
6818 switch (code)
6819 {
6820 case '*':
6821 if (ASSEMBLER_DIALECT == ASM_ATT)
6822 putc ('*', file);
6823 return;
6824
6825 case '&':
6826 assemble_name (file, get_some_local_dynamic_name ());
6827 return;
6828
6829 case 'A':
6830 if (ASSEMBLER_DIALECT == ASM_ATT)
6831 putc ('*', file);
6832 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6833 {
6834 /* Intel syntax. For absolute addresses, registers should not
6835 be surrounded by braces. */
6836 if (GET_CODE (x) != REG)
6837 {
6838 putc ('[', file);
6839 PRINT_OPERAND (file, x, 0);
6840 putc (']', file);
6841 return;
6842 }
6843 }
6844 else
6845 abort ();
6846
6847 PRINT_OPERAND (file, x, 0);
6848 return;
6849
6850
6851 case 'L':
6852 if (ASSEMBLER_DIALECT == ASM_ATT)
6853 putc ('l', file);
6854 return;
6855
6856 case 'W':
6857 if (ASSEMBLER_DIALECT == ASM_ATT)
6858 putc ('w', file);
6859 return;
6860
6861 case 'B':
6862 if (ASSEMBLER_DIALECT == ASM_ATT)
6863 putc ('b', file);
6864 return;
6865
6866 case 'Q':
6867 if (ASSEMBLER_DIALECT == ASM_ATT)
6868 putc ('l', file);
6869 return;
6870
6871 case 'S':
6872 if (ASSEMBLER_DIALECT == ASM_ATT)
6873 putc ('s', file);
6874 return;
6875
6876 case 'T':
6877 if (ASSEMBLER_DIALECT == ASM_ATT)
6878 putc ('t', file);
6879 return;
6880
6881 case 'z':
6882 /* 387 opcodes don't get size suffixes if the operands are
6883 registers. */
6884 if (STACK_REG_P (x))
6885 return;
6886
6887 /* Likewise if using Intel opcodes. */
6888 if (ASSEMBLER_DIALECT == ASM_INTEL)
6889 return;
6890
6891 /* This is the size of op from size of operand. */
6892 switch (GET_MODE_SIZE (GET_MODE (x)))
6893 {
6894 case 2:
6895 #ifdef HAVE_GAS_FILDS_FISTS
6896 putc ('s', file);
6897 #endif
6898 return;
6899
6900 case 4:
6901 if (GET_MODE (x) == SFmode)
6902 {
6903 putc ('s', file);
6904 return;
6905 }
6906 else
6907 putc ('l', file);
6908 return;
6909
6910 case 12:
6911 case 16:
6912 putc ('t', file);
6913 return;
6914
6915 case 8:
6916 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6917 {
6918 #ifdef GAS_MNEMONICS
6919 putc ('q', file);
6920 #else
6921 putc ('l', file);
6922 putc ('l', file);
6923 #endif
6924 }
6925 else
6926 putc ('l', file);
6927 return;
6928
6929 default:
6930 abort ();
6931 }
6932
6933 case 'b':
6934 case 'w':
6935 case 'k':
6936 case 'q':
6937 case 'h':
6938 case 'y':
6939 case 'X':
6940 case 'P':
6941 break;
6942
6943 case 's':
6944 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6945 {
6946 PRINT_OPERAND (file, x, 0);
6947 putc (',', file);
6948 }
6949 return;
6950
6951 case 'D':
6952 /* Little bit of braindamage here. The SSE compare instructions
6953 does use completely different names for the comparisons that the
6954 fp conditional moves. */
6955 switch (GET_CODE (x))
6956 {
6957 case EQ:
6958 case UNEQ:
6959 fputs ("eq", file);
6960 break;
6961 case LT:
6962 case UNLT:
6963 fputs ("lt", file);
6964 break;
6965 case LE:
6966 case UNLE:
6967 fputs ("le", file);
6968 break;
6969 case UNORDERED:
6970 fputs ("unord", file);
6971 break;
6972 case NE:
6973 case LTGT:
6974 fputs ("neq", file);
6975 break;
6976 case UNGE:
6977 case GE:
6978 fputs ("nlt", file);
6979 break;
6980 case UNGT:
6981 case GT:
6982 fputs ("nle", file);
6983 break;
6984 case ORDERED:
6985 fputs ("ord", file);
6986 break;
6987 default:
6988 abort ();
6989 break;
6990 }
6991 return;
6992 case 'O':
6993 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6994 if (ASSEMBLER_DIALECT == ASM_ATT)
6995 {
6996 switch (GET_MODE (x))
6997 {
6998 case HImode: putc ('w', file); break;
6999 case SImode:
7000 case SFmode: putc ('l', file); break;
7001 case DImode:
7002 case DFmode: putc ('q', file); break;
7003 default: abort ();
7004 }
7005 putc ('.', file);
7006 }
7007 #endif
7008 return;
7009 case 'C':
7010 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7011 return;
7012 case 'F':
7013 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7014 if (ASSEMBLER_DIALECT == ASM_ATT)
7015 putc ('.', file);
7016 #endif
7017 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7018 return;
7019
7020 /* Like above, but reverse condition */
7021 case 'c':
7022 /* Check to see if argument to %c is really a constant
7023 and not a condition code which needs to be reversed. */
7024 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7025 {
7026 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7027 return;
7028 }
7029 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7030 return;
7031 case 'f':
7032 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7033 if (ASSEMBLER_DIALECT == ASM_ATT)
7034 putc ('.', file);
7035 #endif
7036 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7037 return;
7038 case '+':
7039 {
7040 rtx x;
7041
7042 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7043 return;
7044
7045 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7046 if (x)
7047 {
7048 int pred_val = INTVAL (XEXP (x, 0));
7049
7050 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7051 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7052 {
7053 int taken = pred_val > REG_BR_PROB_BASE / 2;
7054 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7055
7056 /* Emit hints only in the case default branch prediction
7057 heruistics would fail. */
7058 if (taken != cputaken)
7059 {
7060 /* We use 3e (DS) prefix for taken branches and
7061 2e (CS) prefix for not taken branches. */
7062 if (taken)
7063 fputs ("ds ; ", file);
7064 else
7065 fputs ("cs ; ", file);
7066 }
7067 }
7068 }
7069 return;
7070 }
7071 default:
7072 output_operand_lossage ("invalid operand code `%c'", code);
7073 }
7074 }
7075
7076 if (GET_CODE (x) == REG)
7077 {
7078 PRINT_REG (x, code, file);
7079 }
7080
7081 else if (GET_CODE (x) == MEM)
7082 {
7083 /* No `byte ptr' prefix for call instructions. */
7084 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7085 {
7086 const char * size;
7087 switch (GET_MODE_SIZE (GET_MODE (x)))
7088 {
7089 case 1: size = "BYTE"; break;
7090 case 2: size = "WORD"; break;
7091 case 4: size = "DWORD"; break;
7092 case 8: size = "QWORD"; break;
7093 case 12: size = "XWORD"; break;
7094 case 16: size = "XMMWORD"; break;
7095 default:
7096 abort ();
7097 }
7098
7099 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7100 if (code == 'b')
7101 size = "BYTE";
7102 else if (code == 'w')
7103 size = "WORD";
7104 else if (code == 'k')
7105 size = "DWORD";
7106
7107 fputs (size, file);
7108 fputs (" PTR ", file);
7109 }
7110
7111 x = XEXP (x, 0);
7112 if (flag_pic && CONSTANT_ADDRESS_P (x))
7113 output_pic_addr_const (file, x, code);
7114 /* Avoid (%rip) for call operands. */
7115 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7116 && GET_CODE (x) != CONST_INT)
7117 output_addr_const (file, x);
7118 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7119 output_operand_lossage ("invalid constraints for operand");
7120 else
7121 output_address (x);
7122 }
7123
7124 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7125 {
7126 REAL_VALUE_TYPE r;
7127 long l;
7128
7129 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7130 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7131
7132 if (ASSEMBLER_DIALECT == ASM_ATT)
7133 putc ('$', file);
7134 fprintf (file, "0x%lx", l);
7135 }
7136
7137 /* These float cases don't actually occur as immediate operands. */
7138 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7139 {
7140 char dstr[30];
7141
7142 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7143 fprintf (file, "%s", dstr);
7144 }
7145
7146 else if (GET_CODE (x) == CONST_DOUBLE
7147 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7148 {
7149 char dstr[30];
7150
7151 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7152 fprintf (file, "%s", dstr);
7153 }
7154
7155 else
7156 {
7157 if (code != 'P')
7158 {
7159 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7160 {
7161 if (ASSEMBLER_DIALECT == ASM_ATT)
7162 putc ('$', file);
7163 }
7164 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7165 || GET_CODE (x) == LABEL_REF)
7166 {
7167 if (ASSEMBLER_DIALECT == ASM_ATT)
7168 putc ('$', file);
7169 else
7170 fputs ("OFFSET FLAT:", file);
7171 }
7172 }
7173 if (GET_CODE (x) == CONST_INT)
7174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7175 else if (flag_pic)
7176 output_pic_addr_const (file, x, code);
7177 else
7178 output_addr_const (file, x);
7179 }
7180 }
7181
7182 /* Print a memory operand whose address is ADDR. */
7183
7184 void
print_operand_address(file,addr)7185 print_operand_address (file, addr)
7186 FILE *file;
7187 register rtx addr;
7188 {
7189 struct ix86_address parts;
7190 rtx base, index, disp;
7191 int scale;
7192
7193 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7194 {
7195 if (ASSEMBLER_DIALECT == ASM_INTEL)
7196 fputs ("DWORD PTR ", file);
7197 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7198 putc ('%', file);
7199 if (TARGET_64BIT)
7200 fputs ("fs:0", file);
7201 else
7202 fputs ("gs:0", file);
7203 return;
7204 }
7205
7206 if (! ix86_decompose_address (addr, &parts))
7207 abort ();
7208
7209 base = parts.base;
7210 index = parts.index;
7211 disp = parts.disp;
7212 scale = parts.scale;
7213
7214 if (!base && !index)
7215 {
7216 /* Displacement only requires special attention. */
7217
7218 if (GET_CODE (disp) == CONST_INT)
7219 {
7220 if (ASSEMBLER_DIALECT == ASM_INTEL)
7221 {
7222 if (USER_LABEL_PREFIX[0] == 0)
7223 putc ('%', file);
7224 fputs ("ds:", file);
7225 }
7226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7227 }
7228 else if (flag_pic)
7229 output_pic_addr_const (file, addr, 0);
7230 else
7231 output_addr_const (file, addr);
7232
7233 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7234 if (TARGET_64BIT
7235 && ((GET_CODE (addr) == SYMBOL_REF
7236 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7237 || GET_CODE (addr) == LABEL_REF
7238 || (GET_CODE (addr) == CONST
7239 && GET_CODE (XEXP (addr, 0)) == PLUS
7240 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7241 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7242 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7243 fputs ("(%rip)", file);
7244 }
7245 else
7246 {
7247 if (ASSEMBLER_DIALECT == ASM_ATT)
7248 {
7249 if (disp)
7250 {
7251 if (flag_pic)
7252 output_pic_addr_const (file, disp, 0);
7253 else if (GET_CODE (disp) == LABEL_REF)
7254 output_asm_label (disp);
7255 else
7256 output_addr_const (file, disp);
7257 }
7258
7259 putc ('(', file);
7260 if (base)
7261 PRINT_REG (base, 0, file);
7262 if (index)
7263 {
7264 putc (',', file);
7265 PRINT_REG (index, 0, file);
7266 if (scale != 1)
7267 fprintf (file, ",%d", scale);
7268 }
7269 putc (')', file);
7270 }
7271 else
7272 {
7273 rtx offset = NULL_RTX;
7274
7275 if (disp)
7276 {
7277 /* Pull out the offset of a symbol; print any symbol itself. */
7278 if (GET_CODE (disp) == CONST
7279 && GET_CODE (XEXP (disp, 0)) == PLUS
7280 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7281 {
7282 offset = XEXP (XEXP (disp, 0), 1);
7283 disp = gen_rtx_CONST (VOIDmode,
7284 XEXP (XEXP (disp, 0), 0));
7285 }
7286
7287 if (flag_pic)
7288 output_pic_addr_const (file, disp, 0);
7289 else if (GET_CODE (disp) == LABEL_REF)
7290 output_asm_label (disp);
7291 else if (GET_CODE (disp) == CONST_INT)
7292 offset = disp;
7293 else
7294 output_addr_const (file, disp);
7295 }
7296
7297 putc ('[', file);
7298 if (base)
7299 {
7300 PRINT_REG (base, 0, file);
7301 if (offset)
7302 {
7303 if (INTVAL (offset) >= 0)
7304 putc ('+', file);
7305 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7306 }
7307 }
7308 else if (offset)
7309 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7310 else
7311 putc ('0', file);
7312
7313 if (index)
7314 {
7315 putc ('+', file);
7316 PRINT_REG (index, 0, file);
7317 if (scale != 1)
7318 fprintf (file, "*%d", scale);
7319 }
7320 putc (']', file);
7321 }
7322 }
7323 }
7324
7325 bool
output_addr_const_extra(file,x)7326 output_addr_const_extra (file, x)
7327 FILE *file;
7328 rtx x;
7329 {
7330 rtx op;
7331
7332 if (GET_CODE (x) != UNSPEC)
7333 return false;
7334
7335 op = XVECEXP (x, 0, 0);
7336 switch (XINT (x, 1))
7337 {
7338 case UNSPEC_GOTTPOFF:
7339 output_addr_const (file, op);
7340 /* FIXME: This might be @TPOFF in Sun ld. */
7341 fputs ("@GOTTPOFF", file);
7342 break;
7343 case UNSPEC_TPOFF:
7344 output_addr_const (file, op);
7345 fputs ("@TPOFF", file);
7346 break;
7347 case UNSPEC_NTPOFF:
7348 output_addr_const (file, op);
7349 if (TARGET_64BIT)
7350 fputs ("@TPOFF", file);
7351 else
7352 fputs ("@NTPOFF", file);
7353 break;
7354 case UNSPEC_DTPOFF:
7355 output_addr_const (file, op);
7356 fputs ("@DTPOFF", file);
7357 break;
7358 case UNSPEC_GOTNTPOFF:
7359 output_addr_const (file, op);
7360 if (TARGET_64BIT)
7361 fputs ("@GOTTPOFF(%rip)", file);
7362 else
7363 fputs ("@GOTNTPOFF", file);
7364 break;
7365 case UNSPEC_INDNTPOFF:
7366 output_addr_const (file, op);
7367 fputs ("@INDNTPOFF", file);
7368 break;
7369
7370 default:
7371 return false;
7372 }
7373
7374 return true;
7375 }
7376
7377 /* Split one or more DImode RTL references into pairs of SImode
7378 references. The RTL can be REG, offsettable MEM, integer constant, or
7379 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7380 split and "num" is its length. lo_half and hi_half are output arrays
7381 that parallel "operands". */
7382
7383 void
split_di(operands,num,lo_half,hi_half)7384 split_di (operands, num, lo_half, hi_half)
7385 rtx operands[];
7386 int num;
7387 rtx lo_half[], hi_half[];
7388 {
7389 while (num--)
7390 {
7391 rtx op = operands[num];
7392
7393 /* simplify_subreg refuse to split volatile memory addresses,
7394 but we still have to handle it. */
7395 if (GET_CODE (op) == MEM)
7396 {
7397 lo_half[num] = adjust_address (op, SImode, 0);
7398 hi_half[num] = adjust_address (op, SImode, 4);
7399 }
7400 else
7401 {
7402 lo_half[num] = simplify_gen_subreg (SImode, op,
7403 GET_MODE (op) == VOIDmode
7404 ? DImode : GET_MODE (op), 0);
7405 hi_half[num] = simplify_gen_subreg (SImode, op,
7406 GET_MODE (op) == VOIDmode
7407 ? DImode : GET_MODE (op), 4);
7408 }
7409 }
7410 }
7411 /* Split one or more TImode RTL references into pairs of SImode
7412 references. The RTL can be REG, offsettable MEM, integer constant, or
7413 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7414 split and "num" is its length. lo_half and hi_half are output arrays
7415 that parallel "operands". */
7416
7417 void
split_ti(operands,num,lo_half,hi_half)7418 split_ti (operands, num, lo_half, hi_half)
7419 rtx operands[];
7420 int num;
7421 rtx lo_half[], hi_half[];
7422 {
7423 while (num--)
7424 {
7425 rtx op = operands[num];
7426
7427 /* simplify_subreg refuse to split volatile memory addresses, but we
7428 still have to handle it. */
7429 if (GET_CODE (op) == MEM)
7430 {
7431 lo_half[num] = adjust_address (op, DImode, 0);
7432 hi_half[num] = adjust_address (op, DImode, 8);
7433 }
7434 else
7435 {
7436 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7437 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7438 }
7439 }
7440 }
7441
7442 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7443 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7444 is the expression of the binary operation. The output may either be
7445 emitted here, or returned to the caller, like all output_* functions.
7446
7447 There is no guarantee that the operands are the same mode, as they
7448 might be within FLOAT or FLOAT_EXTEND expressions. */
7449
7450 #ifndef SYSV386_COMPAT
7451 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7452 wants to fix the assemblers because that causes incompatibility
7453 with gcc. No-one wants to fix gcc because that causes
7454 incompatibility with assemblers... You can use the option of
7455 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7456 #define SYSV386_COMPAT 1
7457 #endif
7458
7459 const char *
output_387_binary_op(insn,operands)7460 output_387_binary_op (insn, operands)
7461 rtx insn;
7462 rtx *operands;
7463 {
7464 static char buf[30];
7465 const char *p;
7466 const char *ssep;
7467 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7468
7469 #ifdef ENABLE_CHECKING
7470 /* Even if we do not want to check the inputs, this documents input
7471 constraints. Which helps in understanding the following code. */
7472 if (STACK_REG_P (operands[0])
7473 && ((REG_P (operands[1])
7474 && REGNO (operands[0]) == REGNO (operands[1])
7475 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7476 || (REG_P (operands[2])
7477 && REGNO (operands[0]) == REGNO (operands[2])
7478 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7479 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7480 ; /* ok */
7481 else if (!is_sse)
7482 abort ();
7483 #endif
7484
7485 switch (GET_CODE (operands[3]))
7486 {
7487 case PLUS:
7488 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7489 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7490 p = "fiadd";
7491 else
7492 p = "fadd";
7493 ssep = "add";
7494 break;
7495
7496 case MINUS:
7497 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7498 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7499 p = "fisub";
7500 else
7501 p = "fsub";
7502 ssep = "sub";
7503 break;
7504
7505 case MULT:
7506 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7507 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7508 p = "fimul";
7509 else
7510 p = "fmul";
7511 ssep = "mul";
7512 break;
7513
7514 case DIV:
7515 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7516 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7517 p = "fidiv";
7518 else
7519 p = "fdiv";
7520 ssep = "div";
7521 break;
7522
7523 default:
7524 abort ();
7525 }
7526
7527 if (is_sse)
7528 {
7529 strlcpy (buf, ssep, sizeof buf);
7530 if (GET_MODE (operands[0]) == SFmode)
7531 strlcat (buf, "ss\t{%2, %0|%0, %2}", sizeof buf);
7532 else
7533 strlcat (buf, "sd\t{%2, %0|%0, %2}", sizeof buf);
7534 return buf;
7535 }
7536 strlcpy (buf, p, sizeof buf);
7537
7538 switch (GET_CODE (operands[3]))
7539 {
7540 case MULT:
7541 case PLUS:
7542 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7543 {
7544 rtx temp = operands[2];
7545 operands[2] = operands[1];
7546 operands[1] = temp;
7547 }
7548
7549 /* know operands[0] == operands[1]. */
7550
7551 if (GET_CODE (operands[2]) == MEM)
7552 {
7553 p = "%z2\t%2";
7554 break;
7555 }
7556
7557 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7558 {
7559 if (STACK_TOP_P (operands[0]))
7560 /* How is it that we are storing to a dead operand[2]?
7561 Well, presumably operands[1] is dead too. We can't
7562 store the result to st(0) as st(0) gets popped on this
7563 instruction. Instead store to operands[2] (which I
7564 think has to be st(1)). st(1) will be popped later.
7565 gcc <= 2.8.1 didn't have this check and generated
7566 assembly code that the Unixware assembler rejected. */
7567 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7568 else
7569 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7570 break;
7571 }
7572
7573 if (STACK_TOP_P (operands[0]))
7574 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7575 else
7576 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7577 break;
7578
7579 case MINUS:
7580 case DIV:
7581 if (GET_CODE (operands[1]) == MEM)
7582 {
7583 p = "r%z1\t%1";
7584 break;
7585 }
7586
7587 if (GET_CODE (operands[2]) == MEM)
7588 {
7589 p = "%z2\t%2";
7590 break;
7591 }
7592
7593 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7594 {
7595 #if SYSV386_COMPAT
7596 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7597 derived assemblers, confusingly reverse the direction of
7598 the operation for fsub{r} and fdiv{r} when the
7599 destination register is not st(0). The Intel assembler
7600 doesn't have this brain damage. Read !SYSV386_COMPAT to
7601 figure out what the hardware really does. */
7602 if (STACK_TOP_P (operands[0]))
7603 p = "{p\t%0, %2|rp\t%2, %0}";
7604 else
7605 p = "{rp\t%2, %0|p\t%0, %2}";
7606 #else
7607 if (STACK_TOP_P (operands[0]))
7608 /* As above for fmul/fadd, we can't store to st(0). */
7609 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7610 else
7611 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7612 #endif
7613 break;
7614 }
7615
7616 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7617 {
7618 #if SYSV386_COMPAT
7619 if (STACK_TOP_P (operands[0]))
7620 p = "{rp\t%0, %1|p\t%1, %0}";
7621 else
7622 p = "{p\t%1, %0|rp\t%0, %1}";
7623 #else
7624 if (STACK_TOP_P (operands[0]))
7625 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7626 else
7627 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7628 #endif
7629 break;
7630 }
7631
7632 if (STACK_TOP_P (operands[0]))
7633 {
7634 if (STACK_TOP_P (operands[1]))
7635 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7636 else
7637 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7638 break;
7639 }
7640 else if (STACK_TOP_P (operands[1]))
7641 {
7642 #if SYSV386_COMPAT
7643 p = "{\t%1, %0|r\t%0, %1}";
7644 #else
7645 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7646 #endif
7647 }
7648 else
7649 {
7650 #if SYSV386_COMPAT
7651 p = "{r\t%2, %0|\t%0, %2}";
7652 #else
7653 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7654 #endif
7655 }
7656 break;
7657
7658 default:
7659 abort ();
7660 }
7661
7662 strcat (buf, p);
7663 return buf;
7664 }
7665
7666 /* Output code to initialize control word copies used by
7667 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7668 is set to control word rounding downwards. */
7669 void
emit_i387_cw_initialization(normal,round_down)7670 emit_i387_cw_initialization (normal, round_down)
7671 rtx normal, round_down;
7672 {
7673 rtx reg = gen_reg_rtx (HImode);
7674
7675 emit_insn (gen_x86_fnstcw_1 (normal));
7676 emit_move_insn (reg, normal);
7677 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7678 && !TARGET_64BIT)
7679 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7680 else
7681 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7682 emit_move_insn (round_down, reg);
7683 }
7684
7685 /* Output code for INSN to convert a float to a signed int. OPERANDS
7686 are the insn operands. The output may be [HSD]Imode and the input
7687 operand may be [SDX]Fmode. */
7688
7689 const char *
output_fix_trunc(insn,operands)7690 output_fix_trunc (insn, operands)
7691 rtx insn;
7692 rtx *operands;
7693 {
7694 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7695 int dimode_p = GET_MODE (operands[0]) == DImode;
7696
7697 /* Jump through a hoop or two for DImode, since the hardware has no
7698 non-popping instruction. We used to do this a different way, but
7699 that was somewhat fragile and broke with post-reload splitters. */
7700 if (dimode_p && !stack_top_dies)
7701 output_asm_insn ("fld\t%y1", operands);
7702
7703 if (!STACK_TOP_P (operands[1]))
7704 abort ();
7705
7706 if (GET_CODE (operands[0]) != MEM)
7707 abort ();
7708
7709 output_asm_insn ("fldcw\t%3", operands);
7710 if (stack_top_dies || dimode_p)
7711 output_asm_insn ("fistp%z0\t%0", operands);
7712 else
7713 output_asm_insn ("fist%z0\t%0", operands);
7714 output_asm_insn ("fldcw\t%2", operands);
7715
7716 return "";
7717 }
7718
7719 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7720 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7721 when fucom should be used. */
7722
7723 const char *
output_fp_compare(insn,operands,eflags_p,unordered_p)7724 output_fp_compare (insn, operands, eflags_p, unordered_p)
7725 rtx insn;
7726 rtx *operands;
7727 int eflags_p, unordered_p;
7728 {
7729 int stack_top_dies;
7730 rtx cmp_op0 = operands[0];
7731 rtx cmp_op1 = operands[1];
7732 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7733
7734 if (eflags_p == 2)
7735 {
7736 cmp_op0 = cmp_op1;
7737 cmp_op1 = operands[2];
7738 }
7739 if (is_sse)
7740 {
7741 if (GET_MODE (operands[0]) == SFmode)
7742 if (unordered_p)
7743 return "ucomiss\t{%1, %0|%0, %1}";
7744 else
7745 return "comiss\t{%1, %0|%0, %1}";
7746 else
7747 if (unordered_p)
7748 return "ucomisd\t{%1, %0|%0, %1}";
7749 else
7750 return "comisd\t{%1, %0|%0, %1}";
7751 }
7752
7753 if (! STACK_TOP_P (cmp_op0))
7754 abort ();
7755
7756 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7757
7758 if (STACK_REG_P (cmp_op1)
7759 && stack_top_dies
7760 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7761 && REGNO (cmp_op1) != FIRST_STACK_REG)
7762 {
7763 /* If both the top of the 387 stack dies, and the other operand
7764 is also a stack register that dies, then this must be a
7765 `fcompp' float compare */
7766
7767 if (eflags_p == 1)
7768 {
7769 /* There is no double popping fcomi variant. Fortunately,
7770 eflags is immune from the fstp's cc clobbering. */
7771 if (unordered_p)
7772 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7773 else
7774 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7775 return "fstp\t%y0";
7776 }
7777 else
7778 {
7779 if (eflags_p == 2)
7780 {
7781 if (unordered_p)
7782 return "fucompp\n\tfnstsw\t%0";
7783 else
7784 return "fcompp\n\tfnstsw\t%0";
7785 }
7786 else
7787 {
7788 if (unordered_p)
7789 return "fucompp";
7790 else
7791 return "fcompp";
7792 }
7793 }
7794 }
7795 else
7796 {
7797 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7798
7799 static const char * const alt[24] =
7800 {
7801 "fcom%z1\t%y1",
7802 "fcomp%z1\t%y1",
7803 "fucom%z1\t%y1",
7804 "fucomp%z1\t%y1",
7805
7806 "ficom%z1\t%y1",
7807 "ficomp%z1\t%y1",
7808 NULL,
7809 NULL,
7810
7811 "fcomi\t{%y1, %0|%0, %y1}",
7812 "fcomip\t{%y1, %0|%0, %y1}",
7813 "fucomi\t{%y1, %0|%0, %y1}",
7814 "fucomip\t{%y1, %0|%0, %y1}",
7815
7816 NULL,
7817 NULL,
7818 NULL,
7819 NULL,
7820
7821 "fcom%z2\t%y2\n\tfnstsw\t%0",
7822 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7823 "fucom%z2\t%y2\n\tfnstsw\t%0",
7824 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7825
7826 "ficom%z2\t%y2\n\tfnstsw\t%0",
7827 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7828 NULL,
7829 NULL
7830 };
7831
7832 int mask;
7833 const char *ret;
7834
7835 mask = eflags_p << 3;
7836 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7837 mask |= unordered_p << 1;
7838 mask |= stack_top_dies;
7839
7840 if (mask >= 24)
7841 abort ();
7842 ret = alt[mask];
7843 if (ret == NULL)
7844 abort ();
7845
7846 return ret;
7847 }
7848 }
7849
7850 void
ix86_output_addr_vec_elt(file,value)7851 ix86_output_addr_vec_elt (file, value)
7852 FILE *file;
7853 int value;
7854 {
7855 const char *directive = ASM_LONG;
7856
7857 if (TARGET_64BIT)
7858 {
7859 #ifdef ASM_QUAD
7860 directive = ASM_QUAD;
7861 #else
7862 abort ();
7863 #endif
7864 }
7865
7866 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7867 }
7868
7869 void
ix86_output_addr_diff_elt(file,value,rel)7870 ix86_output_addr_diff_elt (file, value, rel)
7871 FILE *file;
7872 int value, rel;
7873 {
7874 if (TARGET_64BIT)
7875 fprintf (file, "%s%s%d-%s%d\n",
7876 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7877 else if (HAVE_AS_GOTOFF_IN_DATA)
7878 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7879 #if TARGET_MACHO
7880 else if (TARGET_MACHO)
7881 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7882 machopic_function_base_name () + 1);
7883 #endif
7884 else
7885 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7886 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7887 }
7888
7889 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7890 for the target. */
7891
7892 void
ix86_expand_clear(dest)7893 ix86_expand_clear (dest)
7894 rtx dest;
7895 {
7896 rtx tmp;
7897
7898 /* We play register width games, which are only valid after reload. */
7899 if (!reload_completed)
7900 abort ();
7901
7902 /* Avoid HImode and its attendant prefix byte. */
7903 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7904 dest = gen_rtx_REG (SImode, REGNO (dest));
7905
7906 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7907
7908 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7909 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7910 {
7911 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7912 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7913 }
7914
7915 emit_insn (tmp);
7916 }
7917
7918 /* X is an unchanging MEM. If it is a constant pool reference, return
7919 the constant pool rtx, else NULL. */
7920
7921 static rtx
maybe_get_pool_constant(x)7922 maybe_get_pool_constant (x)
7923 rtx x;
7924 {
7925 x = XEXP (x, 0);
7926
7927 if (flag_pic && ! TARGET_64BIT)
7928 {
7929 if (GET_CODE (x) != PLUS)
7930 return NULL_RTX;
7931 if (XEXP (x, 0) != pic_offset_table_rtx)
7932 return NULL_RTX;
7933 x = XEXP (x, 1);
7934 if (GET_CODE (x) != CONST)
7935 return NULL_RTX;
7936 x = XEXP (x, 0);
7937 if (GET_CODE (x) != UNSPEC)
7938 return NULL_RTX;
7939 if (XINT (x, 1) != UNSPEC_GOTOFF)
7940 return NULL_RTX;
7941 x = XVECEXP (x, 0, 0);
7942 }
7943
7944 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7945 return get_pool_constant (x);
7946
7947 return NULL_RTX;
7948 }
7949
7950 void
ix86_expand_move(mode,operands)7951 ix86_expand_move (mode, operands)
7952 enum machine_mode mode;
7953 rtx operands[];
7954 {
7955 int strict = (reload_in_progress || reload_completed);
7956 rtx insn, op0, op1, tmp;
7957
7958 op0 = operands[0];
7959 op1 = operands[1];
7960
7961 if (tls_symbolic_operand (op1, Pmode))
7962 {
7963 op1 = legitimize_address (op1, op1, VOIDmode);
7964 if (GET_CODE (op0) == MEM)
7965 {
7966 tmp = gen_reg_rtx (mode);
7967 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7968 op1 = tmp;
7969 }
7970 }
7971 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7972 {
7973 #if TARGET_MACHO
7974 if (MACHOPIC_PURE)
7975 {
7976 rtx temp = ((reload_in_progress
7977 || ((op0 && GET_CODE (op0) == REG)
7978 && mode == Pmode))
7979 ? op0 : gen_reg_rtx (Pmode));
7980 op1 = machopic_indirect_data_reference (op1, temp);
7981 op1 = machopic_legitimize_pic_address (op1, mode,
7982 temp == op1 ? 0 : temp);
7983 }
7984 else
7985 {
7986 if (MACHOPIC_INDIRECT)
7987 op1 = machopic_indirect_data_reference (op1, 0);
7988 }
7989 if (op0 != op1)
7990 {
7991 insn = gen_rtx_SET (VOIDmode, op0, op1);
7992 emit_insn (insn);
7993 }
7994 return;
7995 #endif /* TARGET_MACHO */
7996 if (GET_CODE (op0) == MEM)
7997 op1 = force_reg (Pmode, op1);
7998 else
7999 {
8000 rtx temp = op0;
8001 if (GET_CODE (temp) != REG)
8002 temp = gen_reg_rtx (Pmode);
8003 temp = legitimize_pic_address (op1, temp);
8004 if (temp == op0)
8005 return;
8006 op1 = temp;
8007 }
8008 }
8009 else
8010 {
8011 if (GET_CODE (op0) == MEM
8012 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8013 || !push_operand (op0, mode))
8014 && GET_CODE (op1) == MEM)
8015 op1 = force_reg (mode, op1);
8016
8017 if (push_operand (op0, mode)
8018 && ! general_no_elim_operand (op1, mode))
8019 op1 = copy_to_mode_reg (mode, op1);
8020
8021 /* Force large constants in 64bit compilation into register
8022 to get them CSEed. */
8023 if (TARGET_64BIT && mode == DImode
8024 && immediate_operand (op1, mode)
8025 && !x86_64_zero_extended_value (op1)
8026 && !register_operand (op0, mode)
8027 && optimize && !reload_completed && !reload_in_progress)
8028 op1 = copy_to_mode_reg (mode, op1);
8029
8030 if (FLOAT_MODE_P (mode))
8031 {
8032 /* If we are loading a floating point constant to a register,
8033 force the value to memory now, since we'll get better code
8034 out the back end. */
8035
8036 if (strict)
8037 ;
8038 else if (GET_CODE (op1) == CONST_DOUBLE)
8039 {
8040 op1 = validize_mem (force_const_mem (mode, op1));
8041 if (!register_operand (op0, mode))
8042 {
8043 rtx temp = gen_reg_rtx (mode);
8044 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8045 emit_move_insn (op0, temp);
8046 return;
8047 }
8048 }
8049 }
8050 }
8051
8052 insn = gen_rtx_SET (VOIDmode, op0, op1);
8053
8054 emit_insn (insn);
8055 }
8056
8057 void
ix86_expand_vector_move(mode,operands)8058 ix86_expand_vector_move (mode, operands)
8059 enum machine_mode mode;
8060 rtx operands[];
8061 {
8062 /* Force constants other than zero into memory. We do not know how
8063 the instructions used to build constants modify the upper 64 bits
8064 of the register, once we have that information we may be able
8065 to handle some of them more efficiently. */
8066 if ((reload_in_progress | reload_completed) == 0
8067 && register_operand (operands[0], mode)
8068 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8069 {
8070 operands[1] = force_const_mem (mode, operands[1]);
8071 emit_move_insn (operands[0], operands[1]);
8072 return;
8073 }
8074
8075 /* Make operand1 a register if it isn't already. */
8076 if (!no_new_pseudos
8077 && !register_operand (operands[0], mode)
8078 && !register_operand (operands[1], mode))
8079 {
8080 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8081 emit_move_insn (operands[0], temp);
8082 return;
8083 }
8084
8085 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8086 }
8087
8088 /* Attempt to expand a binary operator. Make the expansion closer to the
8089 actual machine, then just general_operand, which will allow 3 separate
8090 memory references (one output, two input) in a single insn. */
8091
8092 void
ix86_expand_binary_operator(code,mode,operands)8093 ix86_expand_binary_operator (code, mode, operands)
8094 enum rtx_code code;
8095 enum machine_mode mode;
8096 rtx operands[];
8097 {
8098 int matching_memory;
8099 rtx src1, src2, dst, op, clob;
8100
8101 dst = operands[0];
8102 src1 = operands[1];
8103 src2 = operands[2];
8104
8105 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8106 if (GET_RTX_CLASS (code) == 'c'
8107 && (rtx_equal_p (dst, src2)
8108 || immediate_operand (src1, mode)))
8109 {
8110 rtx temp = src1;
8111 src1 = src2;
8112 src2 = temp;
8113 }
8114
8115 /* If the destination is memory, and we do not have matching source
8116 operands, do things in registers. */
8117 matching_memory = 0;
8118 if (GET_CODE (dst) == MEM)
8119 {
8120 if (rtx_equal_p (dst, src1))
8121 matching_memory = 1;
8122 else if (GET_RTX_CLASS (code) == 'c'
8123 && rtx_equal_p (dst, src2))
8124 matching_memory = 2;
8125 else
8126 dst = gen_reg_rtx (mode);
8127 }
8128
8129 /* Both source operands cannot be in memory. */
8130 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8131 {
8132 if (matching_memory != 2)
8133 src2 = force_reg (mode, src2);
8134 else
8135 src1 = force_reg (mode, src1);
8136 }
8137
8138 /* If the operation is not commutable, source 1 cannot be a constant
8139 or non-matching memory. */
8140 if ((CONSTANT_P (src1)
8141 || (!matching_memory && GET_CODE (src1) == MEM))
8142 && GET_RTX_CLASS (code) != 'c')
8143 src1 = force_reg (mode, src1);
8144
8145 /* If optimizing, copy to regs to improve CSE */
8146 if (optimize && ! no_new_pseudos)
8147 {
8148 if (GET_CODE (dst) == MEM)
8149 dst = gen_reg_rtx (mode);
8150 if (GET_CODE (src1) == MEM)
8151 src1 = force_reg (mode, src1);
8152 if (GET_CODE (src2) == MEM)
8153 src2 = force_reg (mode, src2);
8154 }
8155
8156 /* Emit the instruction. */
8157
8158 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8159 if (reload_in_progress)
8160 {
8161 /* Reload doesn't know about the flags register, and doesn't know that
8162 it doesn't want to clobber it. We can only do this with PLUS. */
8163 if (code != PLUS)
8164 abort ();
8165 emit_insn (op);
8166 }
8167 else
8168 {
8169 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8170 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8171 }
8172
8173 /* Fix up the destination if needed. */
8174 if (dst != operands[0])
8175 emit_move_insn (operands[0], dst);
8176 }
8177
8178 /* Return TRUE or FALSE depending on whether the binary operator meets the
8179 appropriate constraints. */
8180
8181 int
ix86_binary_operator_ok(code,mode,operands)8182 ix86_binary_operator_ok (code, mode, operands)
8183 enum rtx_code code;
8184 enum machine_mode mode ATTRIBUTE_UNUSED;
8185 rtx operands[3];
8186 {
8187 /* Both source operands cannot be in memory. */
8188 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8189 return 0;
8190 /* If the operation is not commutable, source 1 cannot be a constant. */
8191 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8192 return 0;
8193 /* If the destination is memory, we must have a matching source operand. */
8194 if (GET_CODE (operands[0]) == MEM
8195 && ! (rtx_equal_p (operands[0], operands[1])
8196 || (GET_RTX_CLASS (code) == 'c'
8197 && rtx_equal_p (operands[0], operands[2]))))
8198 return 0;
8199 /* If the operation is not commutable and the source 1 is memory, we must
8200 have a matching destination. */
8201 if (GET_CODE (operands[1]) == MEM
8202 && GET_RTX_CLASS (code) != 'c'
8203 && ! rtx_equal_p (operands[0], operands[1]))
8204 return 0;
8205 return 1;
8206 }
8207
8208 /* Attempt to expand a unary operator. Make the expansion closer to the
8209 actual machine, then just general_operand, which will allow 2 separate
8210 memory references (one output, one input) in a single insn. */
8211
8212 void
ix86_expand_unary_operator(code,mode,operands)8213 ix86_expand_unary_operator (code, mode, operands)
8214 enum rtx_code code;
8215 enum machine_mode mode;
8216 rtx operands[];
8217 {
8218 int matching_memory;
8219 rtx src, dst, op, clob;
8220
8221 dst = operands[0];
8222 src = operands[1];
8223
8224 /* If the destination is memory, and we do not have matching source
8225 operands, do things in registers. */
8226 matching_memory = 0;
8227 if (GET_CODE (dst) == MEM)
8228 {
8229 if (rtx_equal_p (dst, src))
8230 matching_memory = 1;
8231 else
8232 dst = gen_reg_rtx (mode);
8233 }
8234
8235 /* When source operand is memory, destination must match. */
8236 if (!matching_memory && GET_CODE (src) == MEM)
8237 src = force_reg (mode, src);
8238
8239 /* If optimizing, copy to regs to improve CSE */
8240 if (optimize && ! no_new_pseudos)
8241 {
8242 if (GET_CODE (dst) == MEM)
8243 dst = gen_reg_rtx (mode);
8244 if (GET_CODE (src) == MEM)
8245 src = force_reg (mode, src);
8246 }
8247
8248 /* Emit the instruction. */
8249
8250 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8251 if (reload_in_progress || code == NOT)
8252 {
8253 /* Reload doesn't know about the flags register, and doesn't know that
8254 it doesn't want to clobber it. */
8255 if (code != NOT)
8256 abort ();
8257 emit_insn (op);
8258 }
8259 else
8260 {
8261 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8262 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8263 }
8264
8265 /* Fix up the destination if needed. */
8266 if (dst != operands[0])
8267 emit_move_insn (operands[0], dst);
8268 }
8269
8270 /* Return TRUE or FALSE depending on whether the unary operator meets the
8271 appropriate constraints. */
8272
8273 int
ix86_unary_operator_ok(code,mode,operands)8274 ix86_unary_operator_ok (code, mode, operands)
8275 enum rtx_code code ATTRIBUTE_UNUSED;
8276 enum machine_mode mode ATTRIBUTE_UNUSED;
8277 rtx operands[2] ATTRIBUTE_UNUSED;
8278 {
8279 /* If one of operands is memory, source and destination must match. */
8280 if ((GET_CODE (operands[0]) == MEM
8281 || GET_CODE (operands[1]) == MEM)
8282 && ! rtx_equal_p (operands[0], operands[1]))
8283 return FALSE;
8284 return TRUE;
8285 }
8286
8287 /* Return TRUE or FALSE depending on whether the first SET in INSN
8288 has source and destination with matching CC modes, and that the
8289 CC mode is at least as constrained as REQ_MODE. */
8290
8291 int
ix86_match_ccmode(insn,req_mode)8292 ix86_match_ccmode (insn, req_mode)
8293 rtx insn;
8294 enum machine_mode req_mode;
8295 {
8296 rtx set;
8297 enum machine_mode set_mode;
8298
8299 set = PATTERN (insn);
8300 if (GET_CODE (set) == PARALLEL)
8301 set = XVECEXP (set, 0, 0);
8302 if (GET_CODE (set) != SET)
8303 abort ();
8304 if (GET_CODE (SET_SRC (set)) != COMPARE)
8305 abort ();
8306
8307 set_mode = GET_MODE (SET_DEST (set));
8308 switch (set_mode)
8309 {
8310 case CCNOmode:
8311 if (req_mode != CCNOmode
8312 && (req_mode != CCmode
8313 || XEXP (SET_SRC (set), 1) != const0_rtx))
8314 return 0;
8315 break;
8316 case CCmode:
8317 if (req_mode == CCGCmode)
8318 return 0;
8319 /* FALLTHRU */
8320 case CCGCmode:
8321 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8322 return 0;
8323 /* FALLTHRU */
8324 case CCGOCmode:
8325 if (req_mode == CCZmode)
8326 return 0;
8327 /* FALLTHRU */
8328 case CCZmode:
8329 break;
8330
8331 default:
8332 abort ();
8333 }
8334
8335 return (GET_MODE (SET_SRC (set)) == set_mode);
8336 }
8337
8338 /* Generate insn patterns to do an integer compare of OPERANDS. */
8339
8340 static rtx
ix86_expand_int_compare(code,op0,op1)8341 ix86_expand_int_compare (code, op0, op1)
8342 enum rtx_code code;
8343 rtx op0, op1;
8344 {
8345 enum machine_mode cmpmode;
8346 rtx tmp, flags;
8347
8348 cmpmode = SELECT_CC_MODE (code, op0, op1);
8349 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8350
8351 /* This is very simple, but making the interface the same as in the
8352 FP case makes the rest of the code easier. */
8353 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8354 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8355
8356 /* Return the test that should be put into the flags user, i.e.
8357 the bcc, scc, or cmov instruction. */
8358 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8359 }
8360
8361 /* Figure out whether to use ordered or unordered fp comparisons.
8362 Return the appropriate mode to use. */
8363
8364 enum machine_mode
ix86_fp_compare_mode(code)8365 ix86_fp_compare_mode (code)
8366 enum rtx_code code ATTRIBUTE_UNUSED;
8367 {
8368 /* ??? In order to make all comparisons reversible, we do all comparisons
8369 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8370 all forms trapping and nontrapping comparisons, we can make inequality
8371 comparisons trapping again, since it results in better code when using
8372 FCOM based compares. */
8373 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8374 }
8375
8376 enum machine_mode
ix86_cc_mode(code,op0,op1)8377 ix86_cc_mode (code, op0, op1)
8378 enum rtx_code code;
8379 rtx op0, op1;
8380 {
8381 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8382 return ix86_fp_compare_mode (code);
8383 switch (code)
8384 {
8385 /* Only zero flag is needed. */
8386 case EQ: /* ZF=0 */
8387 case NE: /* ZF!=0 */
8388 return CCZmode;
8389 /* Codes needing carry flag. */
8390 case GEU: /* CF=0 */
8391 case GTU: /* CF=0 & ZF=0 */
8392 case LTU: /* CF=1 */
8393 case LEU: /* CF=1 | ZF=1 */
8394 return CCmode;
8395 /* Codes possibly doable only with sign flag when
8396 comparing against zero. */
8397 case GE: /* SF=OF or SF=0 */
8398 case LT: /* SF<>OF or SF=1 */
8399 if (op1 == const0_rtx)
8400 return CCGOCmode;
8401 else
8402 /* For other cases Carry flag is not required. */
8403 return CCGCmode;
8404 /* Codes doable only with sign flag when comparing
8405 against zero, but we miss jump instruction for it
8406 so we need to use relational tests agains overflow
8407 that thus needs to be zero. */
8408 case GT: /* ZF=0 & SF=OF */
8409 case LE: /* ZF=1 | SF<>OF */
8410 if (op1 == const0_rtx)
8411 return CCNOmode;
8412 else
8413 return CCGCmode;
8414 /* strcmp pattern do (use flags) and combine may ask us for proper
8415 mode. */
8416 case USE:
8417 return CCmode;
8418 default:
8419 abort ();
8420 }
8421 }
8422
8423 /* Return the fixed registers used for condition codes. */
8424
8425 static bool
ix86_fixed_condition_code_regs(p1,p2)8426 ix86_fixed_condition_code_regs (p1, p2)
8427 unsigned int *p1;
8428 unsigned int *p2;
8429 {
8430 *p1 = FLAGS_REG;
8431 *p2 = FPSR_REG;
8432 return true;
8433 }
8434
8435 /* If two condition code modes are compatible, return a condition code
8436 mode which is compatible with both. Otherwise, return
8437 VOIDmode. */
8438
8439 static enum machine_mode
ix86_cc_modes_compatible(m1,m2)8440 ix86_cc_modes_compatible (m1, m2)
8441 enum machine_mode m1;
8442 enum machine_mode m2;
8443 {
8444 if (m1 == m2)
8445 return m1;
8446
8447 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8448 return VOIDmode;
8449
8450 if ((m1 == CCGCmode && m2 == CCGOCmode)
8451 || (m1 == CCGOCmode && m2 == CCGCmode))
8452 return CCGCmode;
8453
8454 switch (m1)
8455 {
8456 default:
8457 abort ();
8458
8459 case CCmode:
8460 case CCGCmode:
8461 case CCGOCmode:
8462 case CCNOmode:
8463 case CCZmode:
8464 switch (m2)
8465 {
8466 default:
8467 return VOIDmode;
8468
8469 case CCmode:
8470 case CCGCmode:
8471 case CCGOCmode:
8472 case CCNOmode:
8473 case CCZmode:
8474 return CCmode;
8475 }
8476
8477 case CCFPmode:
8478 case CCFPUmode:
8479 /* These are only compatible with themselves, which we already
8480 checked above. */
8481 return VOIDmode;
8482 }
8483 }
8484
8485 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8486
8487 int
ix86_use_fcomi_compare(code)8488 ix86_use_fcomi_compare (code)
8489 enum rtx_code code ATTRIBUTE_UNUSED;
8490 {
8491 enum rtx_code swapped_code = swap_condition (code);
8492 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8493 || (ix86_fp_comparison_cost (swapped_code)
8494 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8495 }
8496
8497 /* Swap, force into registers, or otherwise massage the two operands
8498 to a fp comparison. The operands are updated in place; the new
8499 comparsion code is returned. */
8500
8501 static enum rtx_code
ix86_prepare_fp_compare_args(code,pop0,pop1)8502 ix86_prepare_fp_compare_args (code, pop0, pop1)
8503 enum rtx_code code;
8504 rtx *pop0, *pop1;
8505 {
8506 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8507 rtx op0 = *pop0, op1 = *pop1;
8508 enum machine_mode op_mode = GET_MODE (op0);
8509 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8510
8511 /* All of the unordered compare instructions only work on registers.
8512 The same is true of the XFmode compare instructions. The same is
8513 true of the fcomi compare instructions. */
8514
8515 if (!is_sse
8516 && (fpcmp_mode == CCFPUmode
8517 || op_mode == XFmode
8518 || op_mode == TFmode
8519 || ix86_use_fcomi_compare (code)))
8520 {
8521 op0 = force_reg (op_mode, op0);
8522 op1 = force_reg (op_mode, op1);
8523 }
8524 else
8525 {
8526 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8527 things around if they appear profitable, otherwise force op0
8528 into a register. */
8529
8530 if (standard_80387_constant_p (op0) == 0
8531 || (GET_CODE (op0) == MEM
8532 && ! (standard_80387_constant_p (op1) == 0
8533 || GET_CODE (op1) == MEM)))
8534 {
8535 rtx tmp;
8536 tmp = op0, op0 = op1, op1 = tmp;
8537 code = swap_condition (code);
8538 }
8539
8540 if (GET_CODE (op0) != REG)
8541 op0 = force_reg (op_mode, op0);
8542
8543 if (CONSTANT_P (op1))
8544 {
8545 if (standard_80387_constant_p (op1))
8546 op1 = force_reg (op_mode, op1);
8547 else
8548 op1 = validize_mem (force_const_mem (op_mode, op1));
8549 }
8550 }
8551
8552 /* Try to rearrange the comparison to make it cheaper. */
8553 if (ix86_fp_comparison_cost (code)
8554 > ix86_fp_comparison_cost (swap_condition (code))
8555 && (GET_CODE (op1) == REG || !no_new_pseudos))
8556 {
8557 rtx tmp;
8558 tmp = op0, op0 = op1, op1 = tmp;
8559 code = swap_condition (code);
8560 if (GET_CODE (op0) != REG)
8561 op0 = force_reg (op_mode, op0);
8562 }
8563
8564 *pop0 = op0;
8565 *pop1 = op1;
8566 return code;
8567 }
8568
8569 /* Convert comparison codes we use to represent FP comparison to integer
8570 code that will result in proper branch. Return UNKNOWN if no such code
8571 is available. */
8572 static enum rtx_code
ix86_fp_compare_code_to_integer(code)8573 ix86_fp_compare_code_to_integer (code)
8574 enum rtx_code code;
8575 {
8576 switch (code)
8577 {
8578 case GT:
8579 return GTU;
8580 case GE:
8581 return GEU;
8582 case ORDERED:
8583 case UNORDERED:
8584 return code;
8585 break;
8586 case UNEQ:
8587 return EQ;
8588 break;
8589 case UNLT:
8590 return LTU;
8591 break;
8592 case UNLE:
8593 return LEU;
8594 break;
8595 case LTGT:
8596 return NE;
8597 break;
8598 default:
8599 return UNKNOWN;
8600 }
8601 }
8602
8603 /* Split comparison code CODE into comparisons we can do using branch
8604 instructions. BYPASS_CODE is comparison code for branch that will
8605 branch around FIRST_CODE and SECOND_CODE. If some of branches
8606 is not required, set value to NIL.
8607 We never require more than two branches. */
8608 static void
ix86_fp_comparison_codes(code,bypass_code,first_code,second_code)8609 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8610 enum rtx_code code, *bypass_code, *first_code, *second_code;
8611 {
8612 *first_code = code;
8613 *bypass_code = NIL;
8614 *second_code = NIL;
8615
8616 /* The fcomi comparison sets flags as follows:
8617
8618 cmp ZF PF CF
8619 > 0 0 0
8620 < 0 0 1
8621 = 1 0 0
8622 un 1 1 1 */
8623
8624 switch (code)
8625 {
8626 case GT: /* GTU - CF=0 & ZF=0 */
8627 case GE: /* GEU - CF=0 */
8628 case ORDERED: /* PF=0 */
8629 case UNORDERED: /* PF=1 */
8630 case UNEQ: /* EQ - ZF=1 */
8631 case UNLT: /* LTU - CF=1 */
8632 case UNLE: /* LEU - CF=1 | ZF=1 */
8633 case LTGT: /* EQ - ZF=0 */
8634 break;
8635 case LT: /* LTU - CF=1 - fails on unordered */
8636 *first_code = UNLT;
8637 *bypass_code = UNORDERED;
8638 break;
8639 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8640 *first_code = UNLE;
8641 *bypass_code = UNORDERED;
8642 break;
8643 case EQ: /* EQ - ZF=1 - fails on unordered */
8644 *first_code = UNEQ;
8645 *bypass_code = UNORDERED;
8646 break;
8647 case NE: /* NE - ZF=0 - fails on unordered */
8648 *first_code = LTGT;
8649 *second_code = UNORDERED;
8650 break;
8651 case UNGE: /* GEU - CF=0 - fails on unordered */
8652 *first_code = GE;
8653 *second_code = UNORDERED;
8654 break;
8655 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8656 *first_code = GT;
8657 *second_code = UNORDERED;
8658 break;
8659 default:
8660 abort ();
8661 }
8662 if (!TARGET_IEEE_FP)
8663 {
8664 *second_code = NIL;
8665 *bypass_code = NIL;
8666 }
8667 }
8668
8669 /* Return cost of comparison done fcom + arithmetics operations on AX.
8670 All following functions do use number of instructions as an cost metrics.
8671 In future this should be tweaked to compute bytes for optimize_size and
8672 take into account performance of various instructions on various CPUs. */
8673 static int
ix86_fp_comparison_arithmetics_cost(code)8674 ix86_fp_comparison_arithmetics_cost (code)
8675 enum rtx_code code;
8676 {
8677 if (!TARGET_IEEE_FP)
8678 return 4;
8679 /* The cost of code output by ix86_expand_fp_compare. */
8680 switch (code)
8681 {
8682 case UNLE:
8683 case UNLT:
8684 case LTGT:
8685 case GT:
8686 case GE:
8687 case UNORDERED:
8688 case ORDERED:
8689 case UNEQ:
8690 return 4;
8691 break;
8692 case LT:
8693 case NE:
8694 case EQ:
8695 case UNGE:
8696 return 5;
8697 break;
8698 case LE:
8699 case UNGT:
8700 return 6;
8701 break;
8702 default:
8703 abort ();
8704 }
8705 }
8706
8707 /* Return cost of comparison done using fcomi operation.
8708 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8709 static int
ix86_fp_comparison_fcomi_cost(code)8710 ix86_fp_comparison_fcomi_cost (code)
8711 enum rtx_code code;
8712 {
8713 enum rtx_code bypass_code, first_code, second_code;
8714 /* Return arbitarily high cost when instruction is not supported - this
8715 prevents gcc from using it. */
8716 if (!TARGET_CMOVE)
8717 return 1024;
8718 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8719 return (bypass_code != NIL || second_code != NIL) + 2;
8720 }
8721
8722 /* Return cost of comparison done using sahf operation.
8723 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8724 static int
ix86_fp_comparison_sahf_cost(code)8725 ix86_fp_comparison_sahf_cost (code)
8726 enum rtx_code code;
8727 {
8728 enum rtx_code bypass_code, first_code, second_code;
8729 /* Return arbitarily high cost when instruction is not preferred - this
8730 avoids gcc from using it. */
8731 if (!TARGET_USE_SAHF && !optimize_size)
8732 return 1024;
8733 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8734 return (bypass_code != NIL || second_code != NIL) + 3;
8735 }
8736
8737 /* Compute cost of the comparison done using any method.
8738 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8739 static int
ix86_fp_comparison_cost(code)8740 ix86_fp_comparison_cost (code)
8741 enum rtx_code code;
8742 {
8743 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8744 int min;
8745
8746 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8747 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8748
8749 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8750 if (min > sahf_cost)
8751 min = sahf_cost;
8752 if (min > fcomi_cost)
8753 min = fcomi_cost;
8754 return min;
8755 }
8756
8757 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8758
8759 static rtx
ix86_expand_fp_compare(code,op0,op1,scratch,second_test,bypass_test)8760 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8761 enum rtx_code code;
8762 rtx op0, op1, scratch;
8763 rtx *second_test;
8764 rtx *bypass_test;
8765 {
8766 enum machine_mode fpcmp_mode, intcmp_mode;
8767 rtx tmp, tmp2;
8768 int cost = ix86_fp_comparison_cost (code);
8769 enum rtx_code bypass_code, first_code, second_code;
8770
8771 fpcmp_mode = ix86_fp_compare_mode (code);
8772 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8773
8774 if (second_test)
8775 *second_test = NULL_RTX;
8776 if (bypass_test)
8777 *bypass_test = NULL_RTX;
8778
8779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8780
8781 /* Do fcomi/sahf based test when profitable. */
8782 if ((bypass_code == NIL || bypass_test)
8783 && (second_code == NIL || second_test)
8784 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8785 {
8786 if (TARGET_CMOVE)
8787 {
8788 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8789 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8790 tmp);
8791 emit_insn (tmp);
8792 }
8793 else
8794 {
8795 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8796 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8797 if (!scratch)
8798 scratch = gen_reg_rtx (HImode);
8799 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8800 emit_insn (gen_x86_sahf_1 (scratch));
8801 }
8802
8803 /* The FP codes work out to act like unsigned. */
8804 intcmp_mode = fpcmp_mode;
8805 code = first_code;
8806 if (bypass_code != NIL)
8807 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8808 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8809 const0_rtx);
8810 if (second_code != NIL)
8811 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8812 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8813 const0_rtx);
8814 }
8815 else
8816 {
8817 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8818 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8819 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8820 if (!scratch)
8821 scratch = gen_reg_rtx (HImode);
8822 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8823
8824 /* In the unordered case, we have to check C2 for NaN's, which
8825 doesn't happen to work out to anything nice combination-wise.
8826 So do some bit twiddling on the value we've got in AH to come
8827 up with an appropriate set of condition codes. */
8828
8829 intcmp_mode = CCNOmode;
8830 switch (code)
8831 {
8832 case GT:
8833 case UNGT:
8834 if (code == GT || !TARGET_IEEE_FP)
8835 {
8836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8837 code = EQ;
8838 }
8839 else
8840 {
8841 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8842 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8843 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8844 intcmp_mode = CCmode;
8845 code = GEU;
8846 }
8847 break;
8848 case LT:
8849 case UNLT:
8850 if (code == LT && TARGET_IEEE_FP)
8851 {
8852 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8853 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8854 intcmp_mode = CCmode;
8855 code = EQ;
8856 }
8857 else
8858 {
8859 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8860 code = NE;
8861 }
8862 break;
8863 case GE:
8864 case UNGE:
8865 if (code == GE || !TARGET_IEEE_FP)
8866 {
8867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8868 code = EQ;
8869 }
8870 else
8871 {
8872 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8873 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8874 GEN_INT (0x01)));
8875 code = NE;
8876 }
8877 break;
8878 case LE:
8879 case UNLE:
8880 if (code == LE && TARGET_IEEE_FP)
8881 {
8882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8883 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8884 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8885 intcmp_mode = CCmode;
8886 code = LTU;
8887 }
8888 else
8889 {
8890 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8891 code = NE;
8892 }
8893 break;
8894 case EQ:
8895 case UNEQ:
8896 if (code == EQ && TARGET_IEEE_FP)
8897 {
8898 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8899 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8900 intcmp_mode = CCmode;
8901 code = EQ;
8902 }
8903 else
8904 {
8905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8906 code = NE;
8907 break;
8908 }
8909 break;
8910 case NE:
8911 case LTGT:
8912 if (code == NE && TARGET_IEEE_FP)
8913 {
8914 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8915 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8916 GEN_INT (0x40)));
8917 code = NE;
8918 }
8919 else
8920 {
8921 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8922 code = EQ;
8923 }
8924 break;
8925
8926 case UNORDERED:
8927 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8928 code = NE;
8929 break;
8930 case ORDERED:
8931 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8932 code = EQ;
8933 break;
8934
8935 default:
8936 abort ();
8937 }
8938 }
8939
8940 /* Return the test that should be put into the flags user, i.e.
8941 the bcc, scc, or cmov instruction. */
8942 return gen_rtx_fmt_ee (code, VOIDmode,
8943 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8944 const0_rtx);
8945 }
8946
8947 rtx
ix86_expand_compare(code,second_test,bypass_test)8948 ix86_expand_compare (code, second_test, bypass_test)
8949 enum rtx_code code;
8950 rtx *second_test, *bypass_test;
8951 {
8952 rtx op0, op1, ret;
8953 op0 = ix86_compare_op0;
8954 op1 = ix86_compare_op1;
8955
8956 if (second_test)
8957 *second_test = NULL_RTX;
8958 if (bypass_test)
8959 *bypass_test = NULL_RTX;
8960
8961 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8962 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8963 second_test, bypass_test);
8964 else
8965 ret = ix86_expand_int_compare (code, op0, op1);
8966
8967 return ret;
8968 }
8969
8970 /* Return true if the CODE will result in nontrivial jump sequence. */
8971 bool
ix86_fp_jump_nontrivial_p(code)8972 ix86_fp_jump_nontrivial_p (code)
8973 enum rtx_code code;
8974 {
8975 enum rtx_code bypass_code, first_code, second_code;
8976 if (!TARGET_CMOVE)
8977 return true;
8978 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8979 return bypass_code != NIL || second_code != NIL;
8980 }
8981
8982 void
ix86_expand_branch(code,label)8983 ix86_expand_branch (code, label)
8984 enum rtx_code code;
8985 rtx label;
8986 {
8987 rtx tmp;
8988
8989 switch (GET_MODE (ix86_compare_op0))
8990 {
8991 case QImode:
8992 case HImode:
8993 case SImode:
8994 simple:
8995 tmp = ix86_expand_compare (code, NULL, NULL);
8996 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8997 gen_rtx_LABEL_REF (VOIDmode, label),
8998 pc_rtx);
8999 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9000 return;
9001
9002 case SFmode:
9003 case DFmode:
9004 case XFmode:
9005 case TFmode:
9006 {
9007 rtvec vec;
9008 int use_fcomi;
9009 enum rtx_code bypass_code, first_code, second_code;
9010
9011 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9012 &ix86_compare_op1);
9013
9014 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9015
9016 /* Check whether we will use the natural sequence with one jump. If
9017 so, we can expand jump early. Otherwise delay expansion by
9018 creating compound insn to not confuse optimizers. */
9019 if (bypass_code == NIL && second_code == NIL
9020 && TARGET_CMOVE)
9021 {
9022 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9023 gen_rtx_LABEL_REF (VOIDmode, label),
9024 pc_rtx, NULL_RTX);
9025 }
9026 else
9027 {
9028 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9029 ix86_compare_op0, ix86_compare_op1);
9030 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9031 gen_rtx_LABEL_REF (VOIDmode, label),
9032 pc_rtx);
9033 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9034
9035 use_fcomi = ix86_use_fcomi_compare (code);
9036 vec = rtvec_alloc (3 + !use_fcomi);
9037 RTVEC_ELT (vec, 0) = tmp;
9038 RTVEC_ELT (vec, 1)
9039 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9040 RTVEC_ELT (vec, 2)
9041 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9042 if (! use_fcomi)
9043 RTVEC_ELT (vec, 3)
9044 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9045
9046 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9047 }
9048 return;
9049 }
9050
9051 case DImode:
9052 if (TARGET_64BIT)
9053 goto simple;
9054 /* Expand DImode branch into multiple compare+branch. */
9055 {
9056 rtx lo[2], hi[2], label2;
9057 enum rtx_code code1, code2, code3;
9058
9059 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9060 {
9061 tmp = ix86_compare_op0;
9062 ix86_compare_op0 = ix86_compare_op1;
9063 ix86_compare_op1 = tmp;
9064 code = swap_condition (code);
9065 }
9066 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9067 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9068
9069 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9070 avoid two branches. This costs one extra insn, so disable when
9071 optimizing for size. */
9072
9073 if ((code == EQ || code == NE)
9074 && (!optimize_size
9075 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9076 {
9077 rtx xor0, xor1;
9078
9079 xor1 = hi[0];
9080 if (hi[1] != const0_rtx)
9081 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9082 NULL_RTX, 0, OPTAB_WIDEN);
9083
9084 xor0 = lo[0];
9085 if (lo[1] != const0_rtx)
9086 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9087 NULL_RTX, 0, OPTAB_WIDEN);
9088
9089 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9090 NULL_RTX, 0, OPTAB_WIDEN);
9091
9092 ix86_compare_op0 = tmp;
9093 ix86_compare_op1 = const0_rtx;
9094 ix86_expand_branch (code, label);
9095 return;
9096 }
9097
9098 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9099 op1 is a constant and the low word is zero, then we can just
9100 examine the high word. */
9101
9102 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9103 switch (code)
9104 {
9105 case LT: case LTU: case GE: case GEU:
9106 ix86_compare_op0 = hi[0];
9107 ix86_compare_op1 = hi[1];
9108 ix86_expand_branch (code, label);
9109 return;
9110 default:
9111 break;
9112 }
9113
9114 /* Otherwise, we need two or three jumps. */
9115
9116 label2 = gen_label_rtx ();
9117
9118 code1 = code;
9119 code2 = swap_condition (code);
9120 code3 = unsigned_condition (code);
9121
9122 switch (code)
9123 {
9124 case LT: case GT: case LTU: case GTU:
9125 break;
9126
9127 case LE: code1 = LT; code2 = GT; break;
9128 case GE: code1 = GT; code2 = LT; break;
9129 case LEU: code1 = LTU; code2 = GTU; break;
9130 case GEU: code1 = GTU; code2 = LTU; break;
9131
9132 case EQ: code1 = NIL; code2 = NE; break;
9133 case NE: code2 = NIL; break;
9134
9135 default:
9136 abort ();
9137 }
9138
9139 /*
9140 * a < b =>
9141 * if (hi(a) < hi(b)) goto true;
9142 * if (hi(a) > hi(b)) goto false;
9143 * if (lo(a) < lo(b)) goto true;
9144 * false:
9145 */
9146
9147 ix86_compare_op0 = hi[0];
9148 ix86_compare_op1 = hi[1];
9149
9150 if (code1 != NIL)
9151 ix86_expand_branch (code1, label);
9152 if (code2 != NIL)
9153 ix86_expand_branch (code2, label2);
9154
9155 ix86_compare_op0 = lo[0];
9156 ix86_compare_op1 = lo[1];
9157 ix86_expand_branch (code3, label);
9158
9159 if (code2 != NIL)
9160 emit_label (label2);
9161 return;
9162 }
9163
9164 default:
9165 abort ();
9166 }
9167 }
9168
9169 /* Split branch based on floating point condition. */
9170 void
ix86_split_fp_branch(code,op1,op2,target1,target2,tmp)9171 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9172 enum rtx_code code;
9173 rtx op1, op2, target1, target2, tmp;
9174 {
9175 rtx second, bypass;
9176 rtx label = NULL_RTX;
9177 rtx condition;
9178 int bypass_probability = -1, second_probability = -1, probability = -1;
9179 rtx i;
9180
9181 if (target2 != pc_rtx)
9182 {
9183 rtx tmp = target2;
9184 code = reverse_condition_maybe_unordered (code);
9185 target2 = target1;
9186 target1 = tmp;
9187 }
9188
9189 condition = ix86_expand_fp_compare (code, op1, op2,
9190 tmp, &second, &bypass);
9191
9192 if (split_branch_probability >= 0)
9193 {
9194 /* Distribute the probabilities across the jumps.
9195 Assume the BYPASS and SECOND to be always test
9196 for UNORDERED. */
9197 probability = split_branch_probability;
9198
9199 /* Value of 1 is low enough to make no need for probability
9200 to be updated. Later we may run some experiments and see
9201 if unordered values are more frequent in practice. */
9202 if (bypass)
9203 bypass_probability = 1;
9204 if (second)
9205 second_probability = 1;
9206 }
9207 if (bypass != NULL_RTX)
9208 {
9209 label = gen_label_rtx ();
9210 i = emit_jump_insn (gen_rtx_SET
9211 (VOIDmode, pc_rtx,
9212 gen_rtx_IF_THEN_ELSE (VOIDmode,
9213 bypass,
9214 gen_rtx_LABEL_REF (VOIDmode,
9215 label),
9216 pc_rtx)));
9217 if (bypass_probability >= 0)
9218 REG_NOTES (i)
9219 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9220 GEN_INT (bypass_probability),
9221 REG_NOTES (i));
9222 }
9223 i = emit_jump_insn (gen_rtx_SET
9224 (VOIDmode, pc_rtx,
9225 gen_rtx_IF_THEN_ELSE (VOIDmode,
9226 condition, target1, target2)));
9227 if (probability >= 0)
9228 REG_NOTES (i)
9229 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9230 GEN_INT (probability),
9231 REG_NOTES (i));
9232 if (second != NULL_RTX)
9233 {
9234 i = emit_jump_insn (gen_rtx_SET
9235 (VOIDmode, pc_rtx,
9236 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9237 target2)));
9238 if (second_probability >= 0)
9239 REG_NOTES (i)
9240 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9241 GEN_INT (second_probability),
9242 REG_NOTES (i));
9243 }
9244 if (label != NULL_RTX)
9245 emit_label (label);
9246 }
9247
9248 int
ix86_expand_setcc(code,dest)9249 ix86_expand_setcc (code, dest)
9250 enum rtx_code code;
9251 rtx dest;
9252 {
9253 rtx ret, tmp, tmpreg;
9254 rtx second_test, bypass_test;
9255
9256 if (GET_MODE (ix86_compare_op0) == DImode
9257 && !TARGET_64BIT)
9258 return 0; /* FAIL */
9259
9260 if (GET_MODE (dest) != QImode)
9261 abort ();
9262
9263 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9264 PUT_MODE (ret, QImode);
9265
9266 tmp = dest;
9267 tmpreg = dest;
9268
9269 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9270 if (bypass_test || second_test)
9271 {
9272 rtx test = second_test;
9273 int bypass = 0;
9274 rtx tmp2 = gen_reg_rtx (QImode);
9275 if (bypass_test)
9276 {
9277 if (second_test)
9278 abort ();
9279 test = bypass_test;
9280 bypass = 1;
9281 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9282 }
9283 PUT_MODE (test, QImode);
9284 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9285
9286 if (bypass)
9287 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9288 else
9289 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9290 }
9291
9292 return 1; /* DONE */
9293 }
9294
9295 int
ix86_expand_int_movcc(operands)9296 ix86_expand_int_movcc (operands)
9297 rtx operands[];
9298 {
9299 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9300 rtx compare_seq, compare_op;
9301 rtx second_test, bypass_test;
9302 enum machine_mode mode = GET_MODE (operands[0]);
9303
9304 /* When the compare code is not LTU or GEU, we can not use sbbl case.
9305 In case comparsion is done with immediate, we can convert it to LTU or
9306 GEU by altering the integer. */
9307
9308 if ((code == LEU || code == GTU)
9309 && GET_CODE (ix86_compare_op1) == CONST_INT
9310 && mode != HImode
9311 && INTVAL (ix86_compare_op1) != -1
9312 /* For x86-64, the immediate field in the instruction is 32-bit
9313 signed, so we can't increment a DImode value above 0x7fffffff. */
9314 && (!TARGET_64BIT
9315 || GET_MODE (ix86_compare_op0) != DImode
9316 || INTVAL (ix86_compare_op1) != 0x7fffffff)
9317 && GET_CODE (operands[2]) == CONST_INT
9318 && GET_CODE (operands[3]) == CONST_INT)
9319 {
9320 if (code == LEU)
9321 code = LTU;
9322 else
9323 code = GEU;
9324 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9325 GET_MODE (ix86_compare_op0));
9326 }
9327
9328 start_sequence ();
9329 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9330 compare_seq = get_insns ();
9331 end_sequence ();
9332
9333 compare_code = GET_CODE (compare_op);
9334
9335 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9336 HImode insns, we'd be swallowed in word prefix ops. */
9337
9338 if (mode != HImode
9339 && (mode != DImode || TARGET_64BIT)
9340 && GET_CODE (operands[2]) == CONST_INT
9341 && GET_CODE (operands[3]) == CONST_INT)
9342 {
9343 rtx out = operands[0];
9344 HOST_WIDE_INT ct = INTVAL (operands[2]);
9345 HOST_WIDE_INT cf = INTVAL (operands[3]);
9346 HOST_WIDE_INT diff;
9347
9348 if ((compare_code == LTU || compare_code == GEU)
9349 && !second_test && !bypass_test)
9350 {
9351 /* Detect overlap between destination and compare sources. */
9352 rtx tmp = out;
9353
9354 /* To simplify rest of code, restrict to the GEU case. */
9355 if (compare_code == LTU)
9356 {
9357 HOST_WIDE_INT tmp = ct;
9358 ct = cf;
9359 cf = tmp;
9360 compare_code = reverse_condition (compare_code);
9361 code = reverse_condition (code);
9362 }
9363 diff = ct - cf;
9364
9365 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9366 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9367 tmp = gen_reg_rtx (mode);
9368
9369 emit_insn (compare_seq);
9370 if (mode == DImode)
9371 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9372 else
9373 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9374
9375 if (diff == 1)
9376 {
9377 /*
9378 * cmpl op0,op1
9379 * sbbl dest,dest
9380 * [addl dest, ct]
9381 *
9382 * Size 5 - 8.
9383 */
9384 if (ct)
9385 tmp = expand_simple_binop (mode, PLUS,
9386 tmp, GEN_INT (ct),
9387 tmp, 1, OPTAB_DIRECT);
9388 }
9389 else if (cf == -1)
9390 {
9391 /*
9392 * cmpl op0,op1
9393 * sbbl dest,dest
9394 * orl $ct, dest
9395 *
9396 * Size 8.
9397 */
9398 tmp = expand_simple_binop (mode, IOR,
9399 tmp, GEN_INT (ct),
9400 tmp, 1, OPTAB_DIRECT);
9401 }
9402 else if (diff == -1 && ct)
9403 {
9404 /*
9405 * cmpl op0,op1
9406 * sbbl dest,dest
9407 * notl dest
9408 * [addl dest, cf]
9409 *
9410 * Size 8 - 11.
9411 */
9412 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9413 if (cf)
9414 tmp = expand_simple_binop (mode, PLUS,
9415 tmp, GEN_INT (cf),
9416 tmp, 1, OPTAB_DIRECT);
9417 }
9418 else
9419 {
9420 /*
9421 * cmpl op0,op1
9422 * sbbl dest,dest
9423 * [notl dest]
9424 * andl cf - ct, dest
9425 * [addl dest, ct]
9426 *
9427 * Size 8 - 11.
9428 */
9429
9430 if (cf == 0)
9431 {
9432 cf = ct;
9433 ct = 0;
9434 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9435 }
9436
9437 tmp = expand_simple_binop (mode, AND,
9438 tmp,
9439 gen_int_mode (cf - ct, mode),
9440 tmp, 1, OPTAB_DIRECT);
9441 if (ct)
9442 tmp = expand_simple_binop (mode, PLUS,
9443 tmp, GEN_INT (ct),
9444 tmp, 1, OPTAB_DIRECT);
9445 }
9446
9447 if (tmp != out)
9448 emit_move_insn (out, tmp);
9449
9450 return 1; /* DONE */
9451 }
9452
9453 diff = ct - cf;
9454 if (diff < 0)
9455 {
9456 HOST_WIDE_INT tmp;
9457 tmp = ct, ct = cf, cf = tmp;
9458 diff = -diff;
9459 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9460 {
9461 /* We may be reversing unordered compare to normal compare, that
9462 is not valid in general (we may convert non-trapping condition
9463 to trapping one), however on i386 we currently emit all
9464 comparisons unordered. */
9465 compare_code = reverse_condition_maybe_unordered (compare_code);
9466 code = reverse_condition_maybe_unordered (code);
9467 }
9468 else
9469 {
9470 compare_code = reverse_condition (compare_code);
9471 code = reverse_condition (code);
9472 }
9473 }
9474
9475 compare_code = NIL;
9476 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9477 && GET_CODE (ix86_compare_op1) == CONST_INT)
9478 {
9479 if (ix86_compare_op1 == const0_rtx
9480 && (code == LT || code == GE))
9481 compare_code = code;
9482 else if (ix86_compare_op1 == constm1_rtx)
9483 {
9484 if (code == LE)
9485 compare_code = LT;
9486 else if (code == GT)
9487 compare_code = GE;
9488 }
9489 }
9490
9491 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9492 if (compare_code != NIL
9493 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9494 && (cf == -1 || ct == -1))
9495 {
9496 /* If lea code below could be used, only optimize
9497 if it results in a 2 insn sequence. */
9498
9499 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9500 || diff == 3 || diff == 5 || diff == 9)
9501 || (compare_code == LT && ct == -1)
9502 || (compare_code == GE && cf == -1))
9503 {
9504 /*
9505 * notl op1 (if necessary)
9506 * sarl $31, op1
9507 * orl cf, op1
9508 */
9509 if (ct != -1)
9510 {
9511 cf = ct;
9512 ct = -1;
9513 code = reverse_condition (code);
9514 }
9515
9516 out = emit_store_flag (out, code, ix86_compare_op0,
9517 ix86_compare_op1, VOIDmode, 0, -1);
9518
9519 out = expand_simple_binop (mode, IOR,
9520 out, GEN_INT (cf),
9521 out, 1, OPTAB_DIRECT);
9522 if (out != operands[0])
9523 emit_move_insn (operands[0], out);
9524
9525 return 1; /* DONE */
9526 }
9527 }
9528
9529 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9530 || diff == 3 || diff == 5 || diff == 9)
9531 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9532 {
9533 /*
9534 * xorl dest,dest
9535 * cmpl op1,op2
9536 * setcc dest
9537 * lea cf(dest*(ct-cf)),dest
9538 *
9539 * Size 14.
9540 *
9541 * This also catches the degenerate setcc-only case.
9542 */
9543
9544 rtx tmp;
9545 int nops;
9546
9547 out = emit_store_flag (out, code, ix86_compare_op0,
9548 ix86_compare_op1, VOIDmode, 0, 1);
9549
9550 nops = 0;
9551 /* On x86_64 the lea instruction operates on Pmode, so we need
9552 to get arithmetics done in proper mode to match. */
9553 if (diff == 1)
9554 tmp = copy_rtx (out);
9555 else
9556 {
9557 rtx out1;
9558 out1 = copy_rtx (out);
9559 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9560 nops++;
9561 if (diff & 1)
9562 {
9563 tmp = gen_rtx_PLUS (mode, tmp, out1);
9564 nops++;
9565 }
9566 }
9567 if (cf != 0)
9568 {
9569 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9570 nops++;
9571 }
9572 if (tmp != out
9573 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9574 {
9575 if (nops == 1)
9576 out = force_operand (tmp, copy_rtx (out));
9577 else
9578 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9579 }
9580 if (out != operands[0])
9581 emit_move_insn (operands[0], copy_rtx (out));
9582
9583 return 1; /* DONE */
9584 }
9585
9586 /*
9587 * General case: Jumpful:
9588 * xorl dest,dest cmpl op1, op2
9589 * cmpl op1, op2 movl ct, dest
9590 * setcc dest jcc 1f
9591 * decl dest movl cf, dest
9592 * andl (cf-ct),dest 1:
9593 * addl ct,dest
9594 *
9595 * Size 20. Size 14.
9596 *
9597 * This is reasonably steep, but branch mispredict costs are
9598 * high on modern cpus, so consider failing only if optimizing
9599 * for space.
9600 *
9601 * %%% Parameterize branch_cost on the tuning architecture, then
9602 * use that. The 80386 couldn't care less about mispredicts.
9603 */
9604
9605 if (!optimize_size && !TARGET_CMOVE)
9606 {
9607 if (cf == 0)
9608 {
9609 cf = ct;
9610 ct = 0;
9611 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9612 /* We may be reversing unordered compare to normal compare,
9613 that is not valid in general (we may convert non-trapping
9614 condition to trapping one), however on i386 we currently
9615 emit all comparisons unordered. */
9616 code = reverse_condition_maybe_unordered (code);
9617 else
9618 {
9619 code = reverse_condition (code);
9620 if (compare_code != NIL)
9621 compare_code = reverse_condition (compare_code);
9622 }
9623 }
9624
9625 if (compare_code != NIL)
9626 {
9627 /* notl op1 (if needed)
9628 sarl $31, op1
9629 andl (cf-ct), op1
9630 addl ct, op1
9631
9632 For x < 0 (resp. x <= -1) there will be no notl,
9633 so if possible swap the constants to get rid of the
9634 complement.
9635 True/false will be -1/0 while code below (store flag
9636 followed by decrement) is 0/-1, so the constants need
9637 to be exchanged once more. */
9638
9639 if (compare_code == GE || !cf)
9640 {
9641 code = reverse_condition (code);
9642 compare_code = LT;
9643 }
9644 else
9645 {
9646 HOST_WIDE_INT tmp = cf;
9647 cf = ct;
9648 ct = tmp;
9649 }
9650
9651 out = emit_store_flag (out, code, ix86_compare_op0,
9652 ix86_compare_op1, VOIDmode, 0, -1);
9653 }
9654 else
9655 {
9656 out = emit_store_flag (out, code, ix86_compare_op0,
9657 ix86_compare_op1, VOIDmode, 0, 1);
9658
9659 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9660 out, 1, OPTAB_DIRECT);
9661 }
9662
9663 out = expand_simple_binop (mode, AND, out,
9664 gen_int_mode (cf - ct, mode),
9665 out, 1, OPTAB_DIRECT);
9666 if (ct)
9667 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9668 out, 1, OPTAB_DIRECT);
9669 if (out != operands[0])
9670 emit_move_insn (operands[0], out);
9671
9672 return 1; /* DONE */
9673 }
9674 }
9675
9676 if (!TARGET_CMOVE)
9677 {
9678 /* Try a few things more with specific constants and a variable. */
9679
9680 optab op;
9681 rtx var, orig_out, out, tmp;
9682
9683 if (optimize_size)
9684 return 0; /* FAIL */
9685
9686 /* If one of the two operands is an interesting constant, load a
9687 constant with the above and mask it in with a logical operation. */
9688
9689 if (GET_CODE (operands[2]) == CONST_INT)
9690 {
9691 var = operands[3];
9692 if (INTVAL (operands[2]) == 0)
9693 operands[3] = constm1_rtx, op = and_optab;
9694 else if (INTVAL (operands[2]) == -1)
9695 operands[3] = const0_rtx, op = ior_optab;
9696 else
9697 return 0; /* FAIL */
9698 }
9699 else if (GET_CODE (operands[3]) == CONST_INT)
9700 {
9701 var = operands[2];
9702 if (INTVAL (operands[3]) == 0)
9703 operands[2] = constm1_rtx, op = and_optab;
9704 else if (INTVAL (operands[3]) == -1)
9705 operands[2] = const0_rtx, op = ior_optab;
9706 else
9707 return 0; /* FAIL */
9708 }
9709 else
9710 return 0; /* FAIL */
9711
9712 orig_out = operands[0];
9713 tmp = gen_reg_rtx (mode);
9714 operands[0] = tmp;
9715
9716 /* Recurse to get the constant loaded. */
9717 if (ix86_expand_int_movcc (operands) == 0)
9718 return 0; /* FAIL */
9719
9720 /* Mask in the interesting variable. */
9721 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9722 OPTAB_WIDEN);
9723 if (out != orig_out)
9724 emit_move_insn (orig_out, out);
9725
9726 return 1; /* DONE */
9727 }
9728
9729 /*
9730 * For comparison with above,
9731 *
9732 * movl cf,dest
9733 * movl ct,tmp
9734 * cmpl op1,op2
9735 * cmovcc tmp,dest
9736 *
9737 * Size 15.
9738 */
9739
9740 if (! nonimmediate_operand (operands[2], mode))
9741 operands[2] = force_reg (mode, operands[2]);
9742 if (! nonimmediate_operand (operands[3], mode))
9743 operands[3] = force_reg (mode, operands[3]);
9744
9745 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9746 {
9747 rtx tmp = gen_reg_rtx (mode);
9748 emit_move_insn (tmp, operands[3]);
9749 operands[3] = tmp;
9750 }
9751 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9752 {
9753 rtx tmp = gen_reg_rtx (mode);
9754 emit_move_insn (tmp, operands[2]);
9755 operands[2] = tmp;
9756 }
9757 if (! register_operand (operands[2], VOIDmode)
9758 && ! register_operand (operands[3], VOIDmode))
9759 operands[2] = force_reg (mode, operands[2]);
9760
9761 emit_insn (compare_seq);
9762 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9763 gen_rtx_IF_THEN_ELSE (mode,
9764 compare_op, operands[2],
9765 operands[3])));
9766 if (bypass_test)
9767 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9768 gen_rtx_IF_THEN_ELSE (mode,
9769 bypass_test,
9770 operands[3],
9771 operands[0])));
9772 if (second_test)
9773 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9774 gen_rtx_IF_THEN_ELSE (mode,
9775 second_test,
9776 operands[2],
9777 operands[0])));
9778
9779 return 1; /* DONE */
9780 }
9781
9782 int
ix86_expand_fp_movcc(operands)9783 ix86_expand_fp_movcc (operands)
9784 rtx operands[];
9785 {
9786 enum rtx_code code;
9787 rtx tmp;
9788 rtx compare_op, second_test, bypass_test;
9789
9790 /* For SF/DFmode conditional moves based on comparisons
9791 in same mode, we may want to use SSE min/max instructions. */
9792 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9793 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9794 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9795 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9796 && (!TARGET_IEEE_FP
9797 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9798 /* We may be called from the post-reload splitter. */
9799 && (!REG_P (operands[0])
9800 || SSE_REG_P (operands[0])
9801 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9802 {
9803 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9804 code = GET_CODE (operands[1]);
9805
9806 /* See if we have (cross) match between comparison operands and
9807 conditional move operands. */
9808 if (rtx_equal_p (operands[2], op1))
9809 {
9810 rtx tmp = op0;
9811 op0 = op1;
9812 op1 = tmp;
9813 code = reverse_condition_maybe_unordered (code);
9814 }
9815 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9816 {
9817 /* Check for min operation. */
9818 if (code == LT)
9819 {
9820 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9821 if (memory_operand (op0, VOIDmode))
9822 op0 = force_reg (GET_MODE (operands[0]), op0);
9823 if (GET_MODE (operands[0]) == SFmode)
9824 emit_insn (gen_minsf3 (operands[0], op0, op1));
9825 else
9826 emit_insn (gen_mindf3 (operands[0], op0, op1));
9827 return 1;
9828 }
9829 /* Check for max operation. */
9830 if (code == GT)
9831 {
9832 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9833 if (memory_operand (op0, VOIDmode))
9834 op0 = force_reg (GET_MODE (operands[0]), op0);
9835 if (GET_MODE (operands[0]) == SFmode)
9836 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9837 else
9838 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9839 return 1;
9840 }
9841 }
9842 /* Manage condition to be sse_comparison_operator. In case we are
9843 in non-ieee mode, try to canonicalize the destination operand
9844 to be first in the comparison - this helps reload to avoid extra
9845 moves. */
9846 if (!sse_comparison_operator (operands[1], VOIDmode)
9847 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9848 {
9849 rtx tmp = ix86_compare_op0;
9850 ix86_compare_op0 = ix86_compare_op1;
9851 ix86_compare_op1 = tmp;
9852 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9853 VOIDmode, ix86_compare_op0,
9854 ix86_compare_op1);
9855 }
9856 /* Similary try to manage result to be first operand of conditional
9857 move. We also don't support the NE comparison on SSE, so try to
9858 avoid it. */
9859 if ((rtx_equal_p (operands[0], operands[3])
9860 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9861 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9862 {
9863 rtx tmp = operands[2];
9864 operands[2] = operands[3];
9865 operands[3] = tmp;
9866 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9867 (GET_CODE (operands[1])),
9868 VOIDmode, ix86_compare_op0,
9869 ix86_compare_op1);
9870 }
9871 if (GET_MODE (operands[0]) == SFmode)
9872 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9873 operands[2], operands[3],
9874 ix86_compare_op0, ix86_compare_op1));
9875 else
9876 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9877 operands[2], operands[3],
9878 ix86_compare_op0, ix86_compare_op1));
9879 return 1;
9880 }
9881
9882 /* The floating point conditional move instructions don't directly
9883 support conditions resulting from a signed integer comparison. */
9884
9885 code = GET_CODE (operands[1]);
9886 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9887
9888 /* The floating point conditional move instructions don't directly
9889 support signed integer comparisons. */
9890
9891 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9892 {
9893 if (second_test != NULL || bypass_test != NULL)
9894 abort ();
9895 tmp = gen_reg_rtx (QImode);
9896 ix86_expand_setcc (code, tmp);
9897 code = NE;
9898 ix86_compare_op0 = tmp;
9899 ix86_compare_op1 = const0_rtx;
9900 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9901 }
9902 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9903 {
9904 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9905 emit_move_insn (tmp, operands[3]);
9906 operands[3] = tmp;
9907 }
9908 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9909 {
9910 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9911 emit_move_insn (tmp, operands[2]);
9912 operands[2] = tmp;
9913 }
9914
9915 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9916 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9917 compare_op,
9918 operands[2],
9919 operands[3])));
9920 if (bypass_test)
9921 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9922 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9923 bypass_test,
9924 operands[3],
9925 operands[0])));
9926 if (second_test)
9927 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9928 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9929 second_test,
9930 operands[2],
9931 operands[0])));
9932
9933 return 1;
9934 }
9935
9936 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9937 works for floating pointer parameters and nonoffsetable memories.
9938 For pushes, it returns just stack offsets; the values will be saved
9939 in the right order. Maximally three parts are generated. */
9940
9941 static int
ix86_split_to_parts(operand,parts,mode)9942 ix86_split_to_parts (operand, parts, mode)
9943 rtx operand;
9944 rtx *parts;
9945 enum machine_mode mode;
9946 {
9947 int size;
9948
9949 if (!TARGET_64BIT)
9950 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9951 else
9952 size = (GET_MODE_SIZE (mode) + 4) / 8;
9953
9954 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9955 abort ();
9956 if (size < 2 || size > 3)
9957 abort ();
9958
9959 /* Optimize constant pool reference to immediates. This is used by fp
9960 moves, that force all constants to memory to allow combining. */
9961 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9962 {
9963 rtx tmp = maybe_get_pool_constant (operand);
9964 if (tmp)
9965 operand = tmp;
9966 }
9967
9968 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9969 {
9970 /* The only non-offsetable memories we handle are pushes. */
9971 if (! push_operand (operand, VOIDmode))
9972 abort ();
9973
9974 operand = copy_rtx (operand);
9975 PUT_MODE (operand, Pmode);
9976 parts[0] = parts[1] = parts[2] = operand;
9977 }
9978 else if (!TARGET_64BIT)
9979 {
9980 if (mode == DImode)
9981 split_di (&operand, 1, &parts[0], &parts[1]);
9982 else
9983 {
9984 if (REG_P (operand))
9985 {
9986 if (!reload_completed)
9987 abort ();
9988 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9989 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9990 if (size == 3)
9991 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9992 }
9993 else if (offsettable_memref_p (operand))
9994 {
9995 operand = adjust_address (operand, SImode, 0);
9996 parts[0] = operand;
9997 parts[1] = adjust_address (operand, SImode, 4);
9998 if (size == 3)
9999 parts[2] = adjust_address (operand, SImode, 8);
10000 }
10001 else if (GET_CODE (operand) == CONST_DOUBLE)
10002 {
10003 REAL_VALUE_TYPE r;
10004 long l[4];
10005
10006 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10007 switch (mode)
10008 {
10009 case XFmode:
10010 case TFmode:
10011 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10012 parts[2] = gen_int_mode (l[2], SImode);
10013 break;
10014 case DFmode:
10015 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10016 break;
10017 default:
10018 abort ();
10019 }
10020 parts[1] = gen_int_mode (l[1], SImode);
10021 parts[0] = gen_int_mode (l[0], SImode);
10022 }
10023 else
10024 abort ();
10025 }
10026 }
10027 else
10028 {
10029 if (mode == TImode)
10030 split_ti (&operand, 1, &parts[0], &parts[1]);
10031 if (mode == XFmode || mode == TFmode)
10032 {
10033 if (REG_P (operand))
10034 {
10035 if (!reload_completed)
10036 abort ();
10037 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10038 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10039 }
10040 else if (offsettable_memref_p (operand))
10041 {
10042 operand = adjust_address (operand, DImode, 0);
10043 parts[0] = operand;
10044 parts[1] = adjust_address (operand, SImode, 8);
10045 }
10046 else if (GET_CODE (operand) == CONST_DOUBLE)
10047 {
10048 REAL_VALUE_TYPE r;
10049 long l[3];
10050
10051 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10052 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10053 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10054 if (HOST_BITS_PER_WIDE_INT >= 64)
10055 parts[0]
10056 = gen_int_mode
10057 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10058 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10059 DImode);
10060 else
10061 parts[0] = immed_double_const (l[0], l[1], DImode);
10062 parts[1] = gen_int_mode (l[2], SImode);
10063 }
10064 else
10065 abort ();
10066 }
10067 }
10068
10069 return size;
10070 }
10071
10072 /* Emit insns to perform a move or push of DI, DF, and XF values.
10073 Return false when normal moves are needed; true when all required
10074 insns have been emitted. Operands 2-4 contain the input values
10075 int the correct order; operands 5-7 contain the output values. */
10076
10077 void
ix86_split_long_move(operands)10078 ix86_split_long_move (operands)
10079 rtx operands[];
10080 {
10081 rtx part[2][3];
10082 int nparts;
10083 int push = 0;
10084 int collisions = 0;
10085 enum machine_mode mode = GET_MODE (operands[0]);
10086
10087 /* The DFmode expanders may ask us to move double.
10088 For 64bit target this is single move. By hiding the fact
10089 here we simplify i386.md splitters. */
10090 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10091 {
10092 /* Optimize constant pool reference to immediates. This is used by
10093 fp moves, that force all constants to memory to allow combining. */
10094
10095 if (GET_CODE (operands[1]) == MEM
10096 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10097 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10098 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10099 if (push_operand (operands[0], VOIDmode))
10100 {
10101 operands[0] = copy_rtx (operands[0]);
10102 PUT_MODE (operands[0], Pmode);
10103 }
10104 else
10105 operands[0] = gen_lowpart (DImode, operands[0]);
10106 operands[1] = gen_lowpart (DImode, operands[1]);
10107 emit_move_insn (operands[0], operands[1]);
10108 return;
10109 }
10110
10111 /* The only non-offsettable memory we handle is push. */
10112 if (push_operand (operands[0], VOIDmode))
10113 push = 1;
10114 else if (GET_CODE (operands[0]) == MEM
10115 && ! offsettable_memref_p (operands[0]))
10116 abort ();
10117
10118 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10119 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10120
10121 /* When emitting push, take care for source operands on the stack. */
10122 if (push && GET_CODE (operands[1]) == MEM
10123 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10124 {
10125 if (nparts == 3)
10126 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10127 XEXP (part[1][2], 0));
10128 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10129 XEXP (part[1][1], 0));
10130 }
10131
10132 /* We need to do copy in the right order in case an address register
10133 of the source overlaps the destination. */
10134 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10135 {
10136 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10137 collisions++;
10138 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10139 collisions++;
10140 if (nparts == 3
10141 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10142 collisions++;
10143
10144 /* Collision in the middle part can be handled by reordering. */
10145 if (collisions == 1 && nparts == 3
10146 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10147 {
10148 rtx tmp;
10149 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10150 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10151 }
10152
10153 /* If there are more collisions, we can't handle it by reordering.
10154 Do an lea to the last part and use only one colliding move. */
10155 else if (collisions > 1)
10156 {
10157 rtx base;
10158
10159 collisions = 1;
10160
10161 base = part[0][nparts - 1];
10162
10163 /* Handle the case when the last part isn't valid for lea.
10164 Happens in 64-bit mode storing the 12-byte XFmode. */
10165 if (GET_MODE (base) != Pmode)
10166 base = gen_rtx_REG (Pmode, REGNO (base));
10167
10168 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10169 part[1][0] = replace_equiv_address (part[1][0], base);
10170 part[1][1] = replace_equiv_address (part[1][1],
10171 plus_constant (base, UNITS_PER_WORD));
10172 if (nparts == 3)
10173 part[1][2] = replace_equiv_address (part[1][2],
10174 plus_constant (base, 8));
10175 }
10176 }
10177
10178 if (push)
10179 {
10180 if (!TARGET_64BIT)
10181 {
10182 if (nparts == 3)
10183 {
10184 /* We use only first 12 bytes of TFmode value, but for pushing we
10185 are required to adjust stack as if we were pushing real 16byte
10186 value. */
10187 if (mode == TFmode && !TARGET_64BIT)
10188 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10189 GEN_INT (-4)));
10190 emit_move_insn (part[0][2], part[1][2]);
10191 }
10192 }
10193 else
10194 {
10195 /* In 64bit mode we don't have 32bit push available. In case this is
10196 register, it is OK - we will just use larger counterpart. We also
10197 retype memory - these comes from attempt to avoid REX prefix on
10198 moving of second half of TFmode value. */
10199 if (GET_MODE (part[1][1]) == SImode)
10200 {
10201 if (GET_CODE (part[1][1]) == MEM)
10202 part[1][1] = adjust_address (part[1][1], DImode, 0);
10203 else if (REG_P (part[1][1]))
10204 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10205 else
10206 abort ();
10207 if (GET_MODE (part[1][0]) == SImode)
10208 part[1][0] = part[1][1];
10209 }
10210 }
10211 emit_move_insn (part[0][1], part[1][1]);
10212 emit_move_insn (part[0][0], part[1][0]);
10213 return;
10214 }
10215
10216 /* Choose correct order to not overwrite the source before it is copied. */
10217 if ((REG_P (part[0][0])
10218 && REG_P (part[1][1])
10219 && (REGNO (part[0][0]) == REGNO (part[1][1])
10220 || (nparts == 3
10221 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10222 || (collisions > 0
10223 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10224 {
10225 if (nparts == 3)
10226 {
10227 operands[2] = part[0][2];
10228 operands[3] = part[0][1];
10229 operands[4] = part[0][0];
10230 operands[5] = part[1][2];
10231 operands[6] = part[1][1];
10232 operands[7] = part[1][0];
10233 }
10234 else
10235 {
10236 operands[2] = part[0][1];
10237 operands[3] = part[0][0];
10238 operands[5] = part[1][1];
10239 operands[6] = part[1][0];
10240 }
10241 }
10242 else
10243 {
10244 if (nparts == 3)
10245 {
10246 operands[2] = part[0][0];
10247 operands[3] = part[0][1];
10248 operands[4] = part[0][2];
10249 operands[5] = part[1][0];
10250 operands[6] = part[1][1];
10251 operands[7] = part[1][2];
10252 }
10253 else
10254 {
10255 operands[2] = part[0][0];
10256 operands[3] = part[0][1];
10257 operands[5] = part[1][0];
10258 operands[6] = part[1][1];
10259 }
10260 }
10261 emit_move_insn (operands[2], operands[5]);
10262 emit_move_insn (operands[3], operands[6]);
10263 if (nparts == 3)
10264 emit_move_insn (operands[4], operands[7]);
10265
10266 return;
10267 }
10268
10269 void
ix86_split_ashldi(operands,scratch)10270 ix86_split_ashldi (operands, scratch)
10271 rtx *operands, scratch;
10272 {
10273 rtx low[2], high[2];
10274 int count;
10275
10276 if (GET_CODE (operands[2]) == CONST_INT)
10277 {
10278 split_di (operands, 2, low, high);
10279 count = INTVAL (operands[2]) & 63;
10280
10281 if (count >= 32)
10282 {
10283 emit_move_insn (high[0], low[1]);
10284 emit_move_insn (low[0], const0_rtx);
10285
10286 if (count > 32)
10287 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10288 }
10289 else
10290 {
10291 if (!rtx_equal_p (operands[0], operands[1]))
10292 emit_move_insn (operands[0], operands[1]);
10293 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10294 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10295 }
10296 }
10297 else
10298 {
10299 if (!rtx_equal_p (operands[0], operands[1]))
10300 emit_move_insn (operands[0], operands[1]);
10301
10302 split_di (operands, 1, low, high);
10303
10304 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10305 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10306
10307 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10308 {
10309 if (! no_new_pseudos)
10310 scratch = force_reg (SImode, const0_rtx);
10311 else
10312 emit_move_insn (scratch, const0_rtx);
10313
10314 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10315 scratch));
10316 }
10317 else
10318 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10319 }
10320 }
10321
10322 void
ix86_split_ashrdi(operands,scratch)10323 ix86_split_ashrdi (operands, scratch)
10324 rtx *operands, scratch;
10325 {
10326 rtx low[2], high[2];
10327 int count;
10328
10329 if (GET_CODE (operands[2]) == CONST_INT)
10330 {
10331 split_di (operands, 2, low, high);
10332 count = INTVAL (operands[2]) & 63;
10333
10334 if (count >= 32)
10335 {
10336 emit_move_insn (low[0], high[1]);
10337
10338 if (! reload_completed)
10339 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10340 else
10341 {
10342 emit_move_insn (high[0], low[0]);
10343 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10344 }
10345
10346 if (count > 32)
10347 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10348 }
10349 else
10350 {
10351 if (!rtx_equal_p (operands[0], operands[1]))
10352 emit_move_insn (operands[0], operands[1]);
10353 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10354 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10355 }
10356 }
10357 else
10358 {
10359 if (!rtx_equal_p (operands[0], operands[1]))
10360 emit_move_insn (operands[0], operands[1]);
10361
10362 split_di (operands, 1, low, high);
10363
10364 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10365 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10366
10367 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10368 {
10369 if (! no_new_pseudos)
10370 scratch = gen_reg_rtx (SImode);
10371 emit_move_insn (scratch, high[0]);
10372 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10373 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10374 scratch));
10375 }
10376 else
10377 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10378 }
10379 }
10380
10381 void
ix86_split_lshrdi(operands,scratch)10382 ix86_split_lshrdi (operands, scratch)
10383 rtx *operands, scratch;
10384 {
10385 rtx low[2], high[2];
10386 int count;
10387
10388 if (GET_CODE (operands[2]) == CONST_INT)
10389 {
10390 split_di (operands, 2, low, high);
10391 count = INTVAL (operands[2]) & 63;
10392
10393 if (count >= 32)
10394 {
10395 emit_move_insn (low[0], high[1]);
10396 emit_move_insn (high[0], const0_rtx);
10397
10398 if (count > 32)
10399 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10400 }
10401 else
10402 {
10403 if (!rtx_equal_p (operands[0], operands[1]))
10404 emit_move_insn (operands[0], operands[1]);
10405 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10406 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10407 }
10408 }
10409 else
10410 {
10411 if (!rtx_equal_p (operands[0], operands[1]))
10412 emit_move_insn (operands[0], operands[1]);
10413
10414 split_di (operands, 1, low, high);
10415
10416 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10417 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10418
10419 /* Heh. By reversing the arguments, we can reuse this pattern. */
10420 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10421 {
10422 if (! no_new_pseudos)
10423 scratch = force_reg (SImode, const0_rtx);
10424 else
10425 emit_move_insn (scratch, const0_rtx);
10426
10427 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10428 scratch));
10429 }
10430 else
10431 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10432 }
10433 }
10434
10435 /* Helper function for the string operations below. Dest VARIABLE whether
10436 it is aligned to VALUE bytes. If true, jump to the label. */
10437 static rtx
ix86_expand_aligntest(variable,value)10438 ix86_expand_aligntest (variable, value)
10439 rtx variable;
10440 int value;
10441 {
10442 rtx label = gen_label_rtx ();
10443 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10444 if (GET_MODE (variable) == DImode)
10445 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10446 else
10447 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10448 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10449 1, label);
10450 return label;
10451 }
10452
10453 /* Adjust COUNTER by the VALUE. */
10454 static void
ix86_adjust_counter(countreg,value)10455 ix86_adjust_counter (countreg, value)
10456 rtx countreg;
10457 HOST_WIDE_INT value;
10458 {
10459 if (GET_MODE (countreg) == DImode)
10460 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10461 else
10462 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10463 }
10464
10465 /* Zero extend possibly SImode EXP to Pmode register. */
10466 rtx
ix86_zero_extend_to_Pmode(exp)10467 ix86_zero_extend_to_Pmode (exp)
10468 rtx exp;
10469 {
10470 rtx r;
10471 if (GET_MODE (exp) == VOIDmode)
10472 return force_reg (Pmode, exp);
10473 if (GET_MODE (exp) == Pmode)
10474 return copy_to_mode_reg (Pmode, exp);
10475 r = gen_reg_rtx (Pmode);
10476 emit_insn (gen_zero_extendsidi2 (r, exp));
10477 return r;
10478 }
10479
10480 /* Expand string move (memcpy) operation. Use i386 string operations when
10481 profitable. expand_clrstr contains similar code. */
10482 int
ix86_expand_movstr(dst,src,count_exp,align_exp)10483 ix86_expand_movstr (dst, src, count_exp, align_exp)
10484 rtx dst, src, count_exp, align_exp;
10485 {
10486 rtx srcreg, destreg, countreg;
10487 enum machine_mode counter_mode;
10488 HOST_WIDE_INT align = 0;
10489 unsigned HOST_WIDE_INT count = 0;
10490 rtx insns;
10491
10492 start_sequence ();
10493
10494 if (GET_CODE (align_exp) == CONST_INT)
10495 align = INTVAL (align_exp);
10496
10497 /* This simple hack avoids all inlining code and simplifies code below. */
10498 if (!TARGET_ALIGN_STRINGOPS)
10499 align = 64;
10500
10501 if (GET_CODE (count_exp) == CONST_INT)
10502 count = INTVAL (count_exp);
10503
10504 /* Figure out proper mode for counter. For 32bits it is always SImode,
10505 for 64bits use SImode when possible, otherwise DImode.
10506 Set count to number of bytes copied when known at compile time. */
10507 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10508 || x86_64_zero_extended_value (count_exp))
10509 counter_mode = SImode;
10510 else
10511 counter_mode = DImode;
10512
10513 if (counter_mode != SImode && counter_mode != DImode)
10514 abort ();
10515
10516 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10517 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10518
10519 emit_insn (gen_cld ());
10520
10521 /* When optimizing for size emit simple rep ; movsb instruction for
10522 counts not divisible by 4. */
10523
10524 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10525 {
10526 countreg = ix86_zero_extend_to_Pmode (count_exp);
10527 if (TARGET_64BIT)
10528 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10529 destreg, srcreg, countreg));
10530 else
10531 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10532 destreg, srcreg, countreg));
10533 }
10534
10535 /* For constant aligned (or small unaligned) copies use rep movsl
10536 followed by code copying the rest. For PentiumPro ensure 8 byte
10537 alignment to allow rep movsl acceleration. */
10538
10539 else if (count != 0
10540 && (align >= 8
10541 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10542 || optimize_size || count < (unsigned int) 64))
10543 {
10544 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10545 if (count & ~(size - 1))
10546 {
10547 countreg = copy_to_mode_reg (counter_mode,
10548 GEN_INT ((count >> (size == 4 ? 2 : 3))
10549 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10550 countreg = ix86_zero_extend_to_Pmode (countreg);
10551 if (size == 4)
10552 {
10553 if (TARGET_64BIT)
10554 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10555 destreg, srcreg, countreg));
10556 else
10557 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10558 destreg, srcreg, countreg));
10559 }
10560 else
10561 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10562 destreg, srcreg, countreg));
10563 }
10564 if (size == 8 && (count & 0x04))
10565 emit_insn (gen_strmovsi (destreg, srcreg));
10566 if (count & 0x02)
10567 emit_insn (gen_strmovhi (destreg, srcreg));
10568 if (count & 0x01)
10569 emit_insn (gen_strmovqi (destreg, srcreg));
10570 }
10571 /* The generic code based on the glibc implementation:
10572 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10573 allowing accelerated copying there)
10574 - copy the data using rep movsl
10575 - copy the rest. */
10576 else
10577 {
10578 rtx countreg2;
10579 rtx label = NULL;
10580 int desired_alignment = (TARGET_PENTIUMPRO
10581 && (count == 0 || count >= (unsigned int) 260)
10582 ? 8 : UNITS_PER_WORD);
10583
10584 /* In case we don't know anything about the alignment, default to
10585 library version, since it is usually equally fast and result in
10586 shorter code. */
10587 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10588 {
10589 end_sequence ();
10590 return 0;
10591 }
10592
10593 if (TARGET_SINGLE_STRINGOP)
10594 emit_insn (gen_cld ());
10595
10596 countreg2 = gen_reg_rtx (Pmode);
10597 countreg = copy_to_mode_reg (counter_mode, count_exp);
10598
10599 /* We don't use loops to align destination and to copy parts smaller
10600 than 4 bytes, because gcc is able to optimize such code better (in
10601 the case the destination or the count really is aligned, gcc is often
10602 able to predict the branches) and also it is friendlier to the
10603 hardware branch prediction.
10604
10605 Using loops is benefical for generic case, because we can
10606 handle small counts using the loops. Many CPUs (such as Athlon)
10607 have large REP prefix setup costs.
10608
10609 This is quite costy. Maybe we can revisit this decision later or
10610 add some customizability to this code. */
10611
10612 if (count == 0 && align < desired_alignment)
10613 {
10614 label = gen_label_rtx ();
10615 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10616 LEU, 0, counter_mode, 1, label);
10617 }
10618 if (align <= 1)
10619 {
10620 rtx label = ix86_expand_aligntest (destreg, 1);
10621 emit_insn (gen_strmovqi (destreg, srcreg));
10622 ix86_adjust_counter (countreg, 1);
10623 emit_label (label);
10624 LABEL_NUSES (label) = 1;
10625 }
10626 if (align <= 2)
10627 {
10628 rtx label = ix86_expand_aligntest (destreg, 2);
10629 emit_insn (gen_strmovhi (destreg, srcreg));
10630 ix86_adjust_counter (countreg, 2);
10631 emit_label (label);
10632 LABEL_NUSES (label) = 1;
10633 }
10634 if (align <= 4 && desired_alignment > 4)
10635 {
10636 rtx label = ix86_expand_aligntest (destreg, 4);
10637 emit_insn (gen_strmovsi (destreg, srcreg));
10638 ix86_adjust_counter (countreg, 4);
10639 emit_label (label);
10640 LABEL_NUSES (label) = 1;
10641 }
10642
10643 if (label && desired_alignment > 4 && !TARGET_64BIT)
10644 {
10645 emit_label (label);
10646 LABEL_NUSES (label) = 1;
10647 label = NULL_RTX;
10648 }
10649 if (!TARGET_SINGLE_STRINGOP)
10650 emit_insn (gen_cld ());
10651 if (TARGET_64BIT)
10652 {
10653 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10654 GEN_INT (3)));
10655 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10656 destreg, srcreg, countreg2));
10657 }
10658 else
10659 {
10660 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10661 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10662 destreg, srcreg, countreg2));
10663 }
10664
10665 if (label)
10666 {
10667 emit_label (label);
10668 LABEL_NUSES (label) = 1;
10669 }
10670 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10671 emit_insn (gen_strmovsi (destreg, srcreg));
10672 if ((align <= 4 || count == 0) && TARGET_64BIT)
10673 {
10674 rtx label = ix86_expand_aligntest (countreg, 4);
10675 emit_insn (gen_strmovsi (destreg, srcreg));
10676 emit_label (label);
10677 LABEL_NUSES (label) = 1;
10678 }
10679 if (align > 2 && count != 0 && (count & 2))
10680 emit_insn (gen_strmovhi (destreg, srcreg));
10681 if (align <= 2 || count == 0)
10682 {
10683 rtx label = ix86_expand_aligntest (countreg, 2);
10684 emit_insn (gen_strmovhi (destreg, srcreg));
10685 emit_label (label);
10686 LABEL_NUSES (label) = 1;
10687 }
10688 if (align > 1 && count != 0 && (count & 1))
10689 emit_insn (gen_strmovqi (destreg, srcreg));
10690 if (align <= 1 || count == 0)
10691 {
10692 rtx label = ix86_expand_aligntest (countreg, 1);
10693 emit_insn (gen_strmovqi (destreg, srcreg));
10694 emit_label (label);
10695 LABEL_NUSES (label) = 1;
10696 }
10697 }
10698
10699 insns = get_insns ();
10700 end_sequence ();
10701
10702 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10703 emit_insn (insns);
10704 return 1;
10705 }
10706
10707 /* Expand string clear operation (bzero). Use i386 string operations when
10708 profitable. expand_movstr contains similar code. */
10709 int
ix86_expand_clrstr(src,count_exp,align_exp)10710 ix86_expand_clrstr (src, count_exp, align_exp)
10711 rtx src, count_exp, align_exp;
10712 {
10713 rtx destreg, zeroreg, countreg;
10714 enum machine_mode counter_mode;
10715 HOST_WIDE_INT align = 0;
10716 unsigned HOST_WIDE_INT count = 0;
10717
10718 if (GET_CODE (align_exp) == CONST_INT)
10719 align = INTVAL (align_exp);
10720
10721 /* This simple hack avoids all inlining code and simplifies code below. */
10722 if (!TARGET_ALIGN_STRINGOPS)
10723 align = 32;
10724
10725 if (GET_CODE (count_exp) == CONST_INT)
10726 count = INTVAL (count_exp);
10727 /* Figure out proper mode for counter. For 32bits it is always SImode,
10728 for 64bits use SImode when possible, otherwise DImode.
10729 Set count to number of bytes copied when known at compile time. */
10730 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10731 || x86_64_zero_extended_value (count_exp))
10732 counter_mode = SImode;
10733 else
10734 counter_mode = DImode;
10735
10736 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10737
10738 emit_insn (gen_cld ());
10739
10740 /* When optimizing for size emit simple rep ; movsb instruction for
10741 counts not divisible by 4. */
10742
10743 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10744 {
10745 countreg = ix86_zero_extend_to_Pmode (count_exp);
10746 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10747 if (TARGET_64BIT)
10748 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10749 destreg, countreg));
10750 else
10751 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10752 destreg, countreg));
10753 }
10754 else if (count != 0
10755 && (align >= 8
10756 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10757 || optimize_size || count < (unsigned int) 64))
10758 {
10759 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10760 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10761 if (count & ~(size - 1))
10762 {
10763 countreg = copy_to_mode_reg (counter_mode,
10764 GEN_INT ((count >> (size == 4 ? 2 : 3))
10765 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10766 countreg = ix86_zero_extend_to_Pmode (countreg);
10767 if (size == 4)
10768 {
10769 if (TARGET_64BIT)
10770 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10771 destreg, countreg));
10772 else
10773 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10774 destreg, countreg));
10775 }
10776 else
10777 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10778 destreg, countreg));
10779 }
10780 if (size == 8 && (count & 0x04))
10781 emit_insn (gen_strsetsi (destreg,
10782 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10783 if (count & 0x02)
10784 emit_insn (gen_strsethi (destreg,
10785 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10786 if (count & 0x01)
10787 emit_insn (gen_strsetqi (destreg,
10788 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10789 }
10790 else
10791 {
10792 rtx countreg2;
10793 rtx label = NULL;
10794 /* Compute desired alignment of the string operation. */
10795 int desired_alignment = (TARGET_PENTIUMPRO
10796 && (count == 0 || count >= (unsigned int) 260)
10797 ? 8 : UNITS_PER_WORD);
10798
10799 /* In case we don't know anything about the alignment, default to
10800 library version, since it is usually equally fast and result in
10801 shorter code. */
10802 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10803 return 0;
10804
10805 if (TARGET_SINGLE_STRINGOP)
10806 emit_insn (gen_cld ());
10807
10808 countreg2 = gen_reg_rtx (Pmode);
10809 countreg = copy_to_mode_reg (counter_mode, count_exp);
10810 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10811
10812 if (count == 0 && align < desired_alignment)
10813 {
10814 label = gen_label_rtx ();
10815 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10816 LEU, 0, counter_mode, 1, label);
10817 }
10818 if (align <= 1)
10819 {
10820 rtx label = ix86_expand_aligntest (destreg, 1);
10821 emit_insn (gen_strsetqi (destreg,
10822 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10823 ix86_adjust_counter (countreg, 1);
10824 emit_label (label);
10825 LABEL_NUSES (label) = 1;
10826 }
10827 if (align <= 2)
10828 {
10829 rtx label = ix86_expand_aligntest (destreg, 2);
10830 emit_insn (gen_strsethi (destreg,
10831 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10832 ix86_adjust_counter (countreg, 2);
10833 emit_label (label);
10834 LABEL_NUSES (label) = 1;
10835 }
10836 if (align <= 4 && desired_alignment > 4)
10837 {
10838 rtx label = ix86_expand_aligntest (destreg, 4);
10839 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10840 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10841 : zeroreg)));
10842 ix86_adjust_counter (countreg, 4);
10843 emit_label (label);
10844 LABEL_NUSES (label) = 1;
10845 }
10846
10847 if (label && desired_alignment > 4 && !TARGET_64BIT)
10848 {
10849 emit_label (label);
10850 LABEL_NUSES (label) = 1;
10851 label = NULL_RTX;
10852 }
10853
10854 if (!TARGET_SINGLE_STRINGOP)
10855 emit_insn (gen_cld ());
10856 if (TARGET_64BIT)
10857 {
10858 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10859 GEN_INT (3)));
10860 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10861 destreg, countreg2));
10862 }
10863 else
10864 {
10865 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10866 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10867 destreg, countreg2));
10868 }
10869 if (label)
10870 {
10871 emit_label (label);
10872 LABEL_NUSES (label) = 1;
10873 }
10874
10875 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10876 emit_insn (gen_strsetsi (destreg,
10877 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10878 if (TARGET_64BIT && (align <= 4 || count == 0))
10879 {
10880 rtx label = ix86_expand_aligntest (countreg, 4);
10881 emit_insn (gen_strsetsi (destreg,
10882 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10883 emit_label (label);
10884 LABEL_NUSES (label) = 1;
10885 }
10886 if (align > 2 && count != 0 && (count & 2))
10887 emit_insn (gen_strsethi (destreg,
10888 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10889 if (align <= 2 || count == 0)
10890 {
10891 rtx label = ix86_expand_aligntest (countreg, 2);
10892 emit_insn (gen_strsethi (destreg,
10893 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10894 emit_label (label);
10895 LABEL_NUSES (label) = 1;
10896 }
10897 if (align > 1 && count != 0 && (count & 1))
10898 emit_insn (gen_strsetqi (destreg,
10899 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10900 if (align <= 1 || count == 0)
10901 {
10902 rtx label = ix86_expand_aligntest (countreg, 1);
10903 emit_insn (gen_strsetqi (destreg,
10904 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10905 emit_label (label);
10906 LABEL_NUSES (label) = 1;
10907 }
10908 }
10909 return 1;
10910 }
10911 /* Expand strlen. */
10912 int
ix86_expand_strlen(out,src,eoschar,align)10913 ix86_expand_strlen (out, src, eoschar, align)
10914 rtx out, src, eoschar, align;
10915 {
10916 rtx addr, scratch1, scratch2, scratch3, scratch4;
10917
10918 /* The generic case of strlen expander is long. Avoid it's
10919 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10920
10921 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10922 && !TARGET_INLINE_ALL_STRINGOPS
10923 && !optimize_size
10924 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10925 return 0;
10926
10927 addr = force_reg (Pmode, XEXP (src, 0));
10928 scratch1 = gen_reg_rtx (Pmode);
10929
10930 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10931 && !optimize_size)
10932 {
10933 /* Well it seems that some optimizer does not combine a call like
10934 foo(strlen(bar), strlen(bar));
10935 when the move and the subtraction is done here. It does calculate
10936 the length just once when these instructions are done inside of
10937 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10938 often used and I use one fewer register for the lifetime of
10939 output_strlen_unroll() this is better. */
10940
10941 emit_move_insn (out, addr);
10942
10943 ix86_expand_strlensi_unroll_1 (out, align);
10944
10945 /* strlensi_unroll_1 returns the address of the zero at the end of
10946 the string, like memchr(), so compute the length by subtracting
10947 the start address. */
10948 if (TARGET_64BIT)
10949 emit_insn (gen_subdi3 (out, out, addr));
10950 else
10951 emit_insn (gen_subsi3 (out, out, addr));
10952 }
10953 else
10954 {
10955 scratch2 = gen_reg_rtx (Pmode);
10956 scratch3 = gen_reg_rtx (Pmode);
10957 scratch4 = force_reg (Pmode, constm1_rtx);
10958
10959 emit_move_insn (scratch3, addr);
10960 eoschar = force_reg (QImode, eoschar);
10961
10962 emit_insn (gen_cld ());
10963 if (TARGET_64BIT)
10964 {
10965 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10966 align, scratch4, scratch3));
10967 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10968 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10969 }
10970 else
10971 {
10972 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10973 align, scratch4, scratch3));
10974 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10975 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10976 }
10977 }
10978 return 1;
10979 }
10980
10981 /* Expand the appropriate insns for doing strlen if not just doing
10982 repnz; scasb
10983
10984 out = result, initialized with the start address
10985 align_rtx = alignment of the address.
10986 scratch = scratch register, initialized with the startaddress when
10987 not aligned, otherwise undefined
10988
10989 This is just the body. It needs the initialisations mentioned above and
10990 some address computing at the end. These things are done in i386.md. */
10991
10992 static void
ix86_expand_strlensi_unroll_1(out,align_rtx)10993 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10994 rtx out, align_rtx;
10995 {
10996 int align;
10997 rtx tmp;
10998 rtx align_2_label = NULL_RTX;
10999 rtx align_3_label = NULL_RTX;
11000 rtx align_4_label = gen_label_rtx ();
11001 rtx end_0_label = gen_label_rtx ();
11002 rtx mem;
11003 rtx tmpreg = gen_reg_rtx (SImode);
11004 rtx scratch = gen_reg_rtx (SImode);
11005
11006 align = 0;
11007 if (GET_CODE (align_rtx) == CONST_INT)
11008 align = INTVAL (align_rtx);
11009
11010 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11011
11012 /* Is there a known alignment and is it less than 4? */
11013 if (align < 4)
11014 {
11015 rtx scratch1 = gen_reg_rtx (Pmode);
11016 emit_move_insn (scratch1, out);
11017 /* Is there a known alignment and is it not 2? */
11018 if (align != 2)
11019 {
11020 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11021 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11022
11023 /* Leave just the 3 lower bits. */
11024 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11025 NULL_RTX, 0, OPTAB_WIDEN);
11026
11027 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11028 Pmode, 1, align_4_label);
11029 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11030 Pmode, 1, align_2_label);
11031 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11032 Pmode, 1, align_3_label);
11033 }
11034 else
11035 {
11036 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11037 check if is aligned to 4 - byte. */
11038
11039 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11040 NULL_RTX, 0, OPTAB_WIDEN);
11041
11042 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11043 Pmode, 1, align_4_label);
11044 }
11045
11046 mem = gen_rtx_MEM (QImode, out);
11047
11048 /* Now compare the bytes. */
11049
11050 /* Compare the first n unaligned byte on a byte per byte basis. */
11051 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11052 QImode, 1, end_0_label);
11053
11054 /* Increment the address. */
11055 if (TARGET_64BIT)
11056 emit_insn (gen_adddi3 (out, out, const1_rtx));
11057 else
11058 emit_insn (gen_addsi3 (out, out, const1_rtx));
11059
11060 /* Not needed with an alignment of 2 */
11061 if (align != 2)
11062 {
11063 emit_label (align_2_label);
11064
11065 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11066 end_0_label);
11067
11068 if (TARGET_64BIT)
11069 emit_insn (gen_adddi3 (out, out, const1_rtx));
11070 else
11071 emit_insn (gen_addsi3 (out, out, const1_rtx));
11072
11073 emit_label (align_3_label);
11074 }
11075
11076 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11077 end_0_label);
11078
11079 if (TARGET_64BIT)
11080 emit_insn (gen_adddi3 (out, out, const1_rtx));
11081 else
11082 emit_insn (gen_addsi3 (out, out, const1_rtx));
11083 }
11084
11085 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11086 align this loop. It gives only huge programs, but does not help to
11087 speed up. */
11088 emit_label (align_4_label);
11089
11090 mem = gen_rtx_MEM (SImode, out);
11091 emit_move_insn (scratch, mem);
11092 if (TARGET_64BIT)
11093 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11094 else
11095 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11096
11097 /* This formula yields a nonzero result iff one of the bytes is zero.
11098 This saves three branches inside loop and many cycles. */
11099
11100 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11101 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11102 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11103 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11104 gen_int_mode (0x80808080, SImode)));
11105 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11106 align_4_label);
11107
11108 if (TARGET_CMOVE)
11109 {
11110 rtx reg = gen_reg_rtx (SImode);
11111 rtx reg2 = gen_reg_rtx (Pmode);
11112 emit_move_insn (reg, tmpreg);
11113 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11114
11115 /* If zero is not in the first two bytes, move two bytes forward. */
11116 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11117 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11118 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11119 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11120 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11121 reg,
11122 tmpreg)));
11123 /* Emit lea manually to avoid clobbering of flags. */
11124 emit_insn (gen_rtx_SET (SImode, reg2,
11125 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11126
11127 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11128 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11129 emit_insn (gen_rtx_SET (VOIDmode, out,
11130 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11131 reg2,
11132 out)));
11133
11134 }
11135 else
11136 {
11137 rtx end_2_label = gen_label_rtx ();
11138 /* Is zero in the first two bytes? */
11139
11140 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11141 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11142 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11143 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11144 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11145 pc_rtx);
11146 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11147 JUMP_LABEL (tmp) = end_2_label;
11148
11149 /* Not in the first two. Move two bytes forward. */
11150 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11151 if (TARGET_64BIT)
11152 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11153 else
11154 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11155
11156 emit_label (end_2_label);
11157
11158 }
11159
11160 /* Avoid branch in fixing the byte. */
11161 tmpreg = gen_lowpart (QImode, tmpreg);
11162 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11163 if (TARGET_64BIT)
11164 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11165 else
11166 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11167
11168 emit_label (end_0_label);
11169 }
11170
11171 void
ix86_expand_call(retval,fnaddr,callarg1,callarg2,pop)11172 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
11173 rtx retval, fnaddr, callarg1, callarg2, pop;
11174 {
11175 rtx use = NULL, call;
11176
11177 if (pop == const0_rtx)
11178 pop = NULL;
11179 if (TARGET_64BIT && pop)
11180 abort ();
11181
11182 #if TARGET_MACHO
11183 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11184 fnaddr = machopic_indirect_call_target (fnaddr);
11185 #else
11186 /* Static functions and indirect calls don't need the pic register. */
11187 if (! TARGET_64BIT && flag_pic
11188 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11189 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11190 use_reg (&use, pic_offset_table_rtx);
11191
11192 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11193 {
11194 rtx al = gen_rtx_REG (QImode, 0);
11195 emit_move_insn (al, callarg2);
11196 use_reg (&use, al);
11197 }
11198 #endif /* TARGET_MACHO */
11199
11200 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11201 {
11202 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11203 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11204 }
11205
11206 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11207 if (retval)
11208 call = gen_rtx_SET (VOIDmode, retval, call);
11209 if (pop)
11210 {
11211 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11212 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11213 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11214 }
11215
11216 call = emit_call_insn (call);
11217 if (use)
11218 CALL_INSN_FUNCTION_USAGE (call) = use;
11219 }
11220
11221
11222 /* Clear stack slot assignments remembered from previous functions.
11223 This is called from INIT_EXPANDERS once before RTL is emitted for each
11224 function. */
11225
11226 static struct machine_function *
ix86_init_machine_status()11227 ix86_init_machine_status ()
11228 {
11229 return ggc_alloc_cleared (sizeof (struct machine_function));
11230 }
11231
11232 /* Return a MEM corresponding to a stack slot with mode MODE.
11233 Allocate a new slot if necessary.
11234
11235 The RTL for a function can have several slots available: N is
11236 which slot to use. */
11237
11238 rtx
assign_386_stack_local(mode,n)11239 assign_386_stack_local (mode, n)
11240 enum machine_mode mode;
11241 int n;
11242 {
11243 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11244 abort ();
11245
11246 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11247 ix86_stack_locals[(int) mode][n]
11248 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11249
11250 return ix86_stack_locals[(int) mode][n];
11251 }
11252
11253 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11254
11255 static GTY(()) rtx ix86_tls_symbol;
11256 rtx
ix86_tls_get_addr()11257 ix86_tls_get_addr ()
11258 {
11259
11260 if (!ix86_tls_symbol)
11261 {
11262 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11263 (TARGET_GNU_TLS && !TARGET_64BIT)
11264 ? "___tls_get_addr"
11265 : "__tls_get_addr");
11266 }
11267
11268 return ix86_tls_symbol;
11269 }
11270
11271 /* Calculate the length of the memory address in the instruction
11272 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11273
11274 static int
memory_address_length(addr)11275 memory_address_length (addr)
11276 rtx addr;
11277 {
11278 struct ix86_address parts;
11279 rtx base, index, disp;
11280 int len;
11281
11282 if (GET_CODE (addr) == PRE_DEC
11283 || GET_CODE (addr) == POST_INC
11284 || GET_CODE (addr) == PRE_MODIFY
11285 || GET_CODE (addr) == POST_MODIFY)
11286 return 0;
11287
11288 if (! ix86_decompose_address (addr, &parts))
11289 abort ();
11290
11291 base = parts.base;
11292 index = parts.index;
11293 disp = parts.disp;
11294 len = 0;
11295
11296 /* Rule of thumb:
11297 - esp as the base always wants an index,
11298 - ebp as the base always wants a displacement. */
11299
11300 /* Register Indirect. */
11301 if (base && !index && !disp)
11302 {
11303 /* esp (for its index) and ebp (for its displacement) need
11304 the two-byte modrm form. */
11305 if (addr == stack_pointer_rtx
11306 || addr == arg_pointer_rtx
11307 || addr == frame_pointer_rtx
11308 || addr == hard_frame_pointer_rtx)
11309 len = 1;
11310 }
11311
11312 /* Direct Addressing. */
11313 else if (disp && !base && !index)
11314 len = 4;
11315
11316 else
11317 {
11318 /* Find the length of the displacement constant. */
11319 if (disp)
11320 {
11321 if (GET_CODE (disp) == CONST_INT
11322 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11323 && base)
11324 len = 1;
11325 else
11326 len = 4;
11327 }
11328 /* ebp always wants a displacement. */
11329 else if (base == hard_frame_pointer_rtx)
11330 len = 1;
11331
11332 /* An index requires the two-byte modrm form... */
11333 if (index
11334 /* ...like esp, which always wants an index. */
11335 || base == stack_pointer_rtx
11336 || base == arg_pointer_rtx
11337 || base == frame_pointer_rtx)
11338 len += 1;
11339 }
11340
11341 return len;
11342 }
11343
11344 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11345 is set, expect that insn have 8bit immediate alternative. */
11346 int
ix86_attr_length_immediate_default(insn,shortform)11347 ix86_attr_length_immediate_default (insn, shortform)
11348 rtx insn;
11349 int shortform;
11350 {
11351 int len = 0;
11352 int i;
11353 extract_insn_cached (insn);
11354 for (i = recog_data.n_operands - 1; i >= 0; --i)
11355 if (CONSTANT_P (recog_data.operand[i]))
11356 {
11357 if (len)
11358 abort ();
11359 if (shortform
11360 && GET_CODE (recog_data.operand[i]) == CONST_INT
11361 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11362 len = 1;
11363 else
11364 {
11365 switch (get_attr_mode (insn))
11366 {
11367 case MODE_QI:
11368 len+=1;
11369 break;
11370 case MODE_HI:
11371 len+=2;
11372 break;
11373 case MODE_SI:
11374 len+=4;
11375 break;
11376 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11377 case MODE_DI:
11378 len+=4;
11379 break;
11380 default:
11381 fatal_insn ("unknown insn mode", insn);
11382 }
11383 }
11384 }
11385 return len;
11386 }
11387 /* Compute default value for "length_address" attribute. */
11388 int
ix86_attr_length_address_default(insn)11389 ix86_attr_length_address_default (insn)
11390 rtx insn;
11391 {
11392 int i;
11393
11394 if (get_attr_type (insn) == TYPE_LEA)
11395 {
11396 rtx set = PATTERN (insn);
11397 if (GET_CODE (set) == SET)
11398 ;
11399 else if (GET_CODE (set) == PARALLEL
11400 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11401 set = XVECEXP (set, 0, 0);
11402 else
11403 {
11404 #ifdef ENABLE_CHECKING
11405 abort ();
11406 #endif
11407 return 0;
11408 }
11409
11410 return memory_address_length (SET_SRC (set));
11411 }
11412
11413 extract_insn_cached (insn);
11414 for (i = recog_data.n_operands - 1; i >= 0; --i)
11415 if (GET_CODE (recog_data.operand[i]) == MEM)
11416 {
11417 return memory_address_length (XEXP (recog_data.operand[i], 0));
11418 break;
11419 }
11420 return 0;
11421 }
11422
11423 /* Return the maximum number of instructions a cpu can issue. */
11424
11425 static int
ix86_issue_rate()11426 ix86_issue_rate ()
11427 {
11428 switch (ix86_cpu)
11429 {
11430 case PROCESSOR_PENTIUM:
11431 case PROCESSOR_K6:
11432 return 2;
11433
11434 case PROCESSOR_PENTIUMPRO:
11435 case PROCESSOR_PENTIUM4:
11436 case PROCESSOR_ATHLON:
11437 return 3;
11438
11439 default:
11440 return 1;
11441 }
11442 }
11443
11444 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11445 by DEP_INSN and nothing set by DEP_INSN. */
11446
11447 static int
ix86_flags_dependant(insn,dep_insn,insn_type)11448 ix86_flags_dependant (insn, dep_insn, insn_type)
11449 rtx insn, dep_insn;
11450 enum attr_type insn_type;
11451 {
11452 rtx set, set2;
11453
11454 /* Simplify the test for uninteresting insns. */
11455 if (insn_type != TYPE_SETCC
11456 && insn_type != TYPE_ICMOV
11457 && insn_type != TYPE_FCMOV
11458 && insn_type != TYPE_IBR)
11459 return 0;
11460
11461 if ((set = single_set (dep_insn)) != 0)
11462 {
11463 set = SET_DEST (set);
11464 set2 = NULL_RTX;
11465 }
11466 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11467 && XVECLEN (PATTERN (dep_insn), 0) == 2
11468 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11469 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11470 {
11471 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11472 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11473 }
11474 else
11475 return 0;
11476
11477 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11478 return 0;
11479
11480 /* This test is true if the dependent insn reads the flags but
11481 not any other potentially set register. */
11482 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11483 return 0;
11484
11485 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11486 return 0;
11487
11488 return 1;
11489 }
11490
11491 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11492 address with operands set by DEP_INSN. */
11493
11494 static int
ix86_agi_dependant(insn,dep_insn,insn_type)11495 ix86_agi_dependant (insn, dep_insn, insn_type)
11496 rtx insn, dep_insn;
11497 enum attr_type insn_type;
11498 {
11499 rtx addr;
11500
11501 if (insn_type == TYPE_LEA
11502 && TARGET_PENTIUM)
11503 {
11504 addr = PATTERN (insn);
11505 if (GET_CODE (addr) == SET)
11506 ;
11507 else if (GET_CODE (addr) == PARALLEL
11508 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11509 addr = XVECEXP (addr, 0, 0);
11510 else
11511 abort ();
11512 addr = SET_SRC (addr);
11513 }
11514 else
11515 {
11516 int i;
11517 extract_insn_cached (insn);
11518 for (i = recog_data.n_operands - 1; i >= 0; --i)
11519 if (GET_CODE (recog_data.operand[i]) == MEM)
11520 {
11521 addr = XEXP (recog_data.operand[i], 0);
11522 goto found;
11523 }
11524 return 0;
11525 found:;
11526 }
11527
11528 return modified_in_p (addr, dep_insn);
11529 }
11530
11531 static int
ix86_adjust_cost(insn,link,dep_insn,cost)11532 ix86_adjust_cost (insn, link, dep_insn, cost)
11533 rtx insn, link, dep_insn;
11534 int cost;
11535 {
11536 enum attr_type insn_type, dep_insn_type;
11537 enum attr_memory memory, dep_memory;
11538 rtx set, set2;
11539 int dep_insn_code_number;
11540
11541 /* Anti and output depenancies have zero cost on all CPUs. */
11542 if (REG_NOTE_KIND (link) != 0)
11543 return 0;
11544
11545 dep_insn_code_number = recog_memoized (dep_insn);
11546
11547 /* If we can't recognize the insns, we can't really do anything. */
11548 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11549 return cost;
11550
11551 insn_type = get_attr_type (insn);
11552 dep_insn_type = get_attr_type (dep_insn);
11553
11554 switch (ix86_cpu)
11555 {
11556 case PROCESSOR_PENTIUM:
11557 /* Address Generation Interlock adds a cycle of latency. */
11558 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11559 cost += 1;
11560
11561 /* ??? Compares pair with jump/setcc. */
11562 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11563 cost = 0;
11564
11565 /* Floating point stores require value to be ready one cycle ealier. */
11566 if (insn_type == TYPE_FMOV
11567 && get_attr_memory (insn) == MEMORY_STORE
11568 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11569 cost += 1;
11570 break;
11571
11572 case PROCESSOR_PENTIUMPRO:
11573 memory = get_attr_memory (insn);
11574 dep_memory = get_attr_memory (dep_insn);
11575
11576 /* Since we can't represent delayed latencies of load+operation,
11577 increase the cost here for non-imov insns. */
11578 if (dep_insn_type != TYPE_IMOV
11579 && dep_insn_type != TYPE_FMOV
11580 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11581 cost += 1;
11582
11583 /* INT->FP conversion is expensive. */
11584 if (get_attr_fp_int_src (dep_insn))
11585 cost += 5;
11586
11587 /* There is one cycle extra latency between an FP op and a store. */
11588 if (insn_type == TYPE_FMOV
11589 && (set = single_set (dep_insn)) != NULL_RTX
11590 && (set2 = single_set (insn)) != NULL_RTX
11591 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11592 && GET_CODE (SET_DEST (set2)) == MEM)
11593 cost += 1;
11594
11595 /* Show ability of reorder buffer to hide latency of load by executing
11596 in parallel with previous instruction in case
11597 previous instruction is not needed to compute the address. */
11598 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11599 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11600 {
11601 /* Claim moves to take one cycle, as core can issue one load
11602 at time and the next load can start cycle later. */
11603 if (dep_insn_type == TYPE_IMOV
11604 || dep_insn_type == TYPE_FMOV)
11605 cost = 1;
11606 else if (cost > 1)
11607 cost--;
11608 }
11609 break;
11610
11611 case PROCESSOR_K6:
11612 memory = get_attr_memory (insn);
11613 dep_memory = get_attr_memory (dep_insn);
11614 /* The esp dependency is resolved before the instruction is really
11615 finished. */
11616 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11617 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11618 return 1;
11619
11620 /* Since we can't represent delayed latencies of load+operation,
11621 increase the cost here for non-imov insns. */
11622 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11623 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11624
11625 /* INT->FP conversion is expensive. */
11626 if (get_attr_fp_int_src (dep_insn))
11627 cost += 5;
11628
11629 /* Show ability of reorder buffer to hide latency of load by executing
11630 in parallel with previous instruction in case
11631 previous instruction is not needed to compute the address. */
11632 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11633 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11634 {
11635 /* Claim moves to take one cycle, as core can issue one load
11636 at time and the next load can start cycle later. */
11637 if (dep_insn_type == TYPE_IMOV
11638 || dep_insn_type == TYPE_FMOV)
11639 cost = 1;
11640 else if (cost > 2)
11641 cost -= 2;
11642 else
11643 cost = 1;
11644 }
11645 break;
11646
11647 case PROCESSOR_ATHLON:
11648 memory = get_attr_memory (insn);
11649 dep_memory = get_attr_memory (dep_insn);
11650
11651 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11652 {
11653 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11654 cost += 2;
11655 else
11656 cost += 3;
11657 }
11658 /* Show ability of reorder buffer to hide latency of load by executing
11659 in parallel with previous instruction in case
11660 previous instruction is not needed to compute the address. */
11661 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11662 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11663 {
11664 /* Claim moves to take one cycle, as core can issue one load
11665 at time and the next load can start cycle later. */
11666 if (dep_insn_type == TYPE_IMOV
11667 || dep_insn_type == TYPE_FMOV)
11668 cost = 0;
11669 else if (cost >= 3)
11670 cost -= 3;
11671 else
11672 cost = 0;
11673 }
11674
11675 default:
11676 break;
11677 }
11678
11679 return cost;
11680 }
11681
11682 static union
11683 {
11684 struct ppro_sched_data
11685 {
11686 rtx decode[3];
11687 int issued_this_cycle;
11688 } ppro;
11689 } ix86_sched_data;
11690
11691 static enum attr_ppro_uops
ix86_safe_ppro_uops(insn)11692 ix86_safe_ppro_uops (insn)
11693 rtx insn;
11694 {
11695 if (recog_memoized (insn) >= 0)
11696 return get_attr_ppro_uops (insn);
11697 else
11698 return PPRO_UOPS_MANY;
11699 }
11700
11701 static void
ix86_dump_ppro_packet(dump)11702 ix86_dump_ppro_packet (dump)
11703 FILE *dump;
11704 {
11705 if (ix86_sched_data.ppro.decode[0])
11706 {
11707 fprintf (dump, "PPRO packet: %d",
11708 INSN_UID (ix86_sched_data.ppro.decode[0]));
11709 if (ix86_sched_data.ppro.decode[1])
11710 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11711 if (ix86_sched_data.ppro.decode[2])
11712 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11713 fputc ('\n', dump);
11714 }
11715 }
11716
11717 /* We're beginning a new block. Initialize data structures as necessary. */
11718
11719 static void
ix86_sched_init(dump,sched_verbose,veclen)11720 ix86_sched_init (dump, sched_verbose, veclen)
11721 FILE *dump ATTRIBUTE_UNUSED;
11722 int sched_verbose ATTRIBUTE_UNUSED;
11723 int veclen ATTRIBUTE_UNUSED;
11724 {
11725 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11726 }
11727
11728 /* Shift INSN to SLOT, and shift everything else down. */
11729
11730 static void
ix86_reorder_insn(insnp,slot)11731 ix86_reorder_insn (insnp, slot)
11732 rtx *insnp, *slot;
11733 {
11734 if (insnp != slot)
11735 {
11736 rtx insn = *insnp;
11737 do
11738 insnp[0] = insnp[1];
11739 while (++insnp != slot);
11740 *insnp = insn;
11741 }
11742 }
11743
11744 static void
ix86_sched_reorder_ppro(ready,e_ready)11745 ix86_sched_reorder_ppro (ready, e_ready)
11746 rtx *ready;
11747 rtx *e_ready;
11748 {
11749 rtx decode[3];
11750 enum attr_ppro_uops cur_uops;
11751 int issued_this_cycle;
11752 rtx *insnp;
11753 int i;
11754
11755 /* At this point .ppro.decode contains the state of the three
11756 decoders from last "cycle". That is, those insns that were
11757 actually independent. But here we're scheduling for the
11758 decoder, and we may find things that are decodable in the
11759 same cycle. */
11760
11761 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11762 issued_this_cycle = 0;
11763
11764 insnp = e_ready;
11765 cur_uops = ix86_safe_ppro_uops (*insnp);
11766
11767 /* If the decoders are empty, and we've a complex insn at the
11768 head of the priority queue, let it issue without complaint. */
11769 if (decode[0] == NULL)
11770 {
11771 if (cur_uops == PPRO_UOPS_MANY)
11772 {
11773 decode[0] = *insnp;
11774 goto ppro_done;
11775 }
11776
11777 /* Otherwise, search for a 2-4 uop unsn to issue. */
11778 while (cur_uops != PPRO_UOPS_FEW)
11779 {
11780 if (insnp == ready)
11781 break;
11782 cur_uops = ix86_safe_ppro_uops (*--insnp);
11783 }
11784
11785 /* If so, move it to the head of the line. */
11786 if (cur_uops == PPRO_UOPS_FEW)
11787 ix86_reorder_insn (insnp, e_ready);
11788
11789 /* Issue the head of the queue. */
11790 issued_this_cycle = 1;
11791 decode[0] = *e_ready--;
11792 }
11793
11794 /* Look for simple insns to fill in the other two slots. */
11795 for (i = 1; i < 3; ++i)
11796 if (decode[i] == NULL)
11797 {
11798 if (ready > e_ready)
11799 goto ppro_done;
11800
11801 insnp = e_ready;
11802 cur_uops = ix86_safe_ppro_uops (*insnp);
11803 while (cur_uops != PPRO_UOPS_ONE)
11804 {
11805 if (insnp == ready)
11806 break;
11807 cur_uops = ix86_safe_ppro_uops (*--insnp);
11808 }
11809
11810 /* Found one. Move it to the head of the queue and issue it. */
11811 if (cur_uops == PPRO_UOPS_ONE)
11812 {
11813 ix86_reorder_insn (insnp, e_ready);
11814 decode[i] = *e_ready--;
11815 issued_this_cycle++;
11816 continue;
11817 }
11818
11819 /* ??? Didn't find one. Ideally, here we would do a lazy split
11820 of 2-uop insns, issue one and queue the other. */
11821 }
11822
11823 ppro_done:
11824 if (issued_this_cycle == 0)
11825 issued_this_cycle = 1;
11826 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11827 }
11828
11829 /* We are about to being issuing insns for this clock cycle.
11830 Override the default sort algorithm to better slot instructions. */
11831 static int
ix86_sched_reorder(dump,sched_verbose,ready,n_readyp,clock_var)11832 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11833 FILE *dump ATTRIBUTE_UNUSED;
11834 int sched_verbose ATTRIBUTE_UNUSED;
11835 rtx *ready;
11836 int *n_readyp;
11837 int clock_var ATTRIBUTE_UNUSED;
11838 {
11839 int n_ready = *n_readyp;
11840 rtx *e_ready = ready + n_ready - 1;
11841
11842 /* Make sure to go ahead and initialize key items in
11843 ix86_sched_data if we are not going to bother trying to
11844 reorder the ready queue. */
11845 if (n_ready < 2)
11846 {
11847 ix86_sched_data.ppro.issued_this_cycle = 1;
11848 goto out;
11849 }
11850
11851 switch (ix86_cpu)
11852 {
11853 default:
11854 break;
11855
11856 case PROCESSOR_PENTIUMPRO:
11857 ix86_sched_reorder_ppro (ready, e_ready);
11858 break;
11859 }
11860
11861 out:
11862 return ix86_issue_rate ();
11863 }
11864
11865 /* We are about to issue INSN. Return the number of insns left on the
11866 ready queue that can be issued this cycle. */
11867
11868 static int
ix86_variable_issue(dump,sched_verbose,insn,can_issue_more)11869 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11870 FILE *dump;
11871 int sched_verbose;
11872 rtx insn;
11873 int can_issue_more;
11874 {
11875 int i;
11876 switch (ix86_cpu)
11877 {
11878 default:
11879 return can_issue_more - 1;
11880
11881 case PROCESSOR_PENTIUMPRO:
11882 {
11883 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11884
11885 if (uops == PPRO_UOPS_MANY)
11886 {
11887 if (sched_verbose)
11888 ix86_dump_ppro_packet (dump);
11889 ix86_sched_data.ppro.decode[0] = insn;
11890 ix86_sched_data.ppro.decode[1] = NULL;
11891 ix86_sched_data.ppro.decode[2] = NULL;
11892 if (sched_verbose)
11893 ix86_dump_ppro_packet (dump);
11894 ix86_sched_data.ppro.decode[0] = NULL;
11895 }
11896 else if (uops == PPRO_UOPS_FEW)
11897 {
11898 if (sched_verbose)
11899 ix86_dump_ppro_packet (dump);
11900 ix86_sched_data.ppro.decode[0] = insn;
11901 ix86_sched_data.ppro.decode[1] = NULL;
11902 ix86_sched_data.ppro.decode[2] = NULL;
11903 }
11904 else
11905 {
11906 for (i = 0; i < 3; ++i)
11907 if (ix86_sched_data.ppro.decode[i] == NULL)
11908 {
11909 ix86_sched_data.ppro.decode[i] = insn;
11910 break;
11911 }
11912 if (i == 3)
11913 abort ();
11914 if (i == 2)
11915 {
11916 if (sched_verbose)
11917 ix86_dump_ppro_packet (dump);
11918 ix86_sched_data.ppro.decode[0] = NULL;
11919 ix86_sched_data.ppro.decode[1] = NULL;
11920 ix86_sched_data.ppro.decode[2] = NULL;
11921 }
11922 }
11923 }
11924 return --ix86_sched_data.ppro.issued_this_cycle;
11925 }
11926 }
11927
11928 static int
ia32_use_dfa_pipeline_interface()11929 ia32_use_dfa_pipeline_interface ()
11930 {
11931 if (ix86_cpu == PROCESSOR_PENTIUM)
11932 return 1;
11933 return 0;
11934 }
11935
11936 /* How many alternative schedules to try. This should be as wide as the
11937 scheduling freedom in the DFA, but no wider. Making this value too
11938 large results extra work for the scheduler. */
11939
11940 static int
ia32_multipass_dfa_lookahead()11941 ia32_multipass_dfa_lookahead ()
11942 {
11943 if (ix86_cpu == PROCESSOR_PENTIUM)
11944 return 2;
11945 else
11946 return 0;
11947 }
11948
11949
11950 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11951 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11952 appropriate. */
11953
11954 void
ix86_set_move_mem_attrs(insns,dstref,srcref,dstreg,srcreg)11955 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11956 rtx insns;
11957 rtx dstref, srcref, dstreg, srcreg;
11958 {
11959 rtx insn;
11960
11961 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11962 if (INSN_P (insn))
11963 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11964 dstreg, srcreg);
11965 }
11966
11967 /* Subroutine of above to actually do the updating by recursively walking
11968 the rtx. */
11969
11970 static void
ix86_set_move_mem_attrs_1(x,dstref,srcref,dstreg,srcreg)11971 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11972 rtx x;
11973 rtx dstref, srcref, dstreg, srcreg;
11974 {
11975 enum rtx_code code = GET_CODE (x);
11976 const char *format_ptr = GET_RTX_FORMAT (code);
11977 int i, j;
11978
11979 if (code == MEM && XEXP (x, 0) == dstreg)
11980 MEM_COPY_ATTRIBUTES (x, dstref);
11981 else if (code == MEM && XEXP (x, 0) == srcreg)
11982 MEM_COPY_ATTRIBUTES (x, srcref);
11983
11984 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11985 {
11986 if (*format_ptr == 'e')
11987 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11988 dstreg, srcreg);
11989 else if (*format_ptr == 'E')
11990 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11991 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11992 dstreg, srcreg);
11993 }
11994 }
11995
11996 /* Compute the alignment given to a constant that is being placed in memory.
11997 EXP is the constant and ALIGN is the alignment that the object would
11998 ordinarily have.
11999 The value of this function is used instead of that alignment to align
12000 the object. */
12001
12002 int
ix86_constant_alignment(exp,align)12003 ix86_constant_alignment (exp, align)
12004 tree exp;
12005 int align;
12006 {
12007 if (TREE_CODE (exp) == REAL_CST)
12008 {
12009 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12010 return 64;
12011 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12012 return 128;
12013 }
12014 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12015 && align < 256)
12016 return 256;
12017
12018 return align;
12019 }
12020
12021 /* Compute the alignment for a static variable.
12022 TYPE is the data type, and ALIGN is the alignment that
12023 the object would ordinarily have. The value of this function is used
12024 instead of that alignment to align the object. */
12025
12026 int
ix86_data_alignment(type,align)12027 ix86_data_alignment (type, align)
12028 tree type;
12029 int align;
12030 {
12031 if (AGGREGATE_TYPE_P (type)
12032 && TYPE_SIZE (type)
12033 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12034 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12035 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12036 return 256;
12037
12038 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12039 to 16byte boundary. */
12040 if (TARGET_64BIT)
12041 {
12042 if (AGGREGATE_TYPE_P (type)
12043 && TYPE_SIZE (type)
12044 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12045 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12046 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12047 return 128;
12048 }
12049
12050 if (TREE_CODE (type) == ARRAY_TYPE)
12051 {
12052 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12053 return 64;
12054 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12055 return 128;
12056 }
12057 else if (TREE_CODE (type) == COMPLEX_TYPE)
12058 {
12059
12060 if (TYPE_MODE (type) == DCmode && align < 64)
12061 return 64;
12062 if (TYPE_MODE (type) == XCmode && align < 128)
12063 return 128;
12064 }
12065 else if ((TREE_CODE (type) == RECORD_TYPE
12066 || TREE_CODE (type) == UNION_TYPE
12067 || TREE_CODE (type) == QUAL_UNION_TYPE)
12068 && TYPE_FIELDS (type))
12069 {
12070 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12071 return 64;
12072 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12073 return 128;
12074 }
12075 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12076 || TREE_CODE (type) == INTEGER_TYPE)
12077 {
12078 if (TYPE_MODE (type) == DFmode && align < 64)
12079 return 64;
12080 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12081 return 128;
12082 }
12083
12084 return align;
12085 }
12086
12087 /* Compute the alignment for a local variable.
12088 TYPE is the data type, and ALIGN is the alignment that
12089 the object would ordinarily have. The value of this macro is used
12090 instead of that alignment to align the object. */
12091
12092 int
ix86_local_alignment(type,align)12093 ix86_local_alignment (type, align)
12094 tree type;
12095 int align;
12096 {
12097 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12098 to 16byte boundary. */
12099 if (TARGET_64BIT)
12100 {
12101 if (AGGREGATE_TYPE_P (type)
12102 && TYPE_SIZE (type)
12103 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12104 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12105 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12106 return 128;
12107 }
12108 if (TREE_CODE (type) == ARRAY_TYPE)
12109 {
12110 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12111 return 64;
12112 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12113 return 128;
12114 }
12115 else if (TREE_CODE (type) == COMPLEX_TYPE)
12116 {
12117 if (TYPE_MODE (type) == DCmode && align < 64)
12118 return 64;
12119 if (TYPE_MODE (type) == XCmode && align < 128)
12120 return 128;
12121 }
12122 else if ((TREE_CODE (type) == RECORD_TYPE
12123 || TREE_CODE (type) == UNION_TYPE
12124 || TREE_CODE (type) == QUAL_UNION_TYPE)
12125 && TYPE_FIELDS (type))
12126 {
12127 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12128 return 64;
12129 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12130 return 128;
12131 }
12132 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12133 || TREE_CODE (type) == INTEGER_TYPE)
12134 {
12135
12136 if (TYPE_MODE (type) == DFmode && align < 64)
12137 return 64;
12138 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12139 return 128;
12140 }
12141 return align;
12142 }
12143
12144 /* Emit RTL insns to initialize the variable parts of a trampoline.
12145 FNADDR is an RTX for the address of the function's pure code.
12146 CXT is an RTX for the static chain value for the function. */
12147 void
x86_initialize_trampoline(tramp,fnaddr,cxt)12148 x86_initialize_trampoline (tramp, fnaddr, cxt)
12149 rtx tramp, fnaddr, cxt;
12150 {
12151 if (!TARGET_64BIT)
12152 {
12153 /* Compute offset from the end of the jmp to the target function. */
12154 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12155 plus_constant (tramp, 10),
12156 NULL_RTX, 1, OPTAB_DIRECT);
12157 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12158 gen_int_mode (0xb9, QImode));
12159 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12160 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12161 gen_int_mode (0xe9, QImode));
12162 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12163 }
12164 else
12165 {
12166 int offset = 0;
12167 /* Try to load address using shorter movl instead of movabs.
12168 We may want to support movq for kernel mode, but kernel does not use
12169 trampolines at the moment. */
12170 if (x86_64_zero_extended_value (fnaddr))
12171 {
12172 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12173 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12174 gen_int_mode (0xbb41, HImode));
12175 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12176 gen_lowpart (SImode, fnaddr));
12177 offset += 6;
12178 }
12179 else
12180 {
12181 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12182 gen_int_mode (0xbb49, HImode));
12183 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12184 fnaddr);
12185 offset += 10;
12186 }
12187 /* Load static chain using movabs to r10. */
12188 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12189 gen_int_mode (0xba49, HImode));
12190 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12191 cxt);
12192 offset += 10;
12193 /* Jump to the r11 */
12194 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12195 gen_int_mode (0xff49, HImode));
12196 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12197 gen_int_mode (0xe3, QImode));
12198 offset += 3;
12199 if (offset > TRAMPOLINE_SIZE)
12200 abort ();
12201 }
12202
12203 #ifdef TRANSFER_FROM_TRAMPOLINE
12204 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12205 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12206 #endif
12207 }
12208
12209 #define def_builtin(MASK, NAME, TYPE, CODE) \
12210 do { \
12211 if ((MASK) & target_flags \
12212 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12213 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12214 NULL, NULL_TREE); \
12215 } while (0)
12216
12217 struct builtin_description
12218 {
12219 const unsigned int mask;
12220 const enum insn_code icode;
12221 const char *const name;
12222 const enum ix86_builtins code;
12223 const enum rtx_code comparison;
12224 const unsigned int flag;
12225 };
12226
12227 static const struct builtin_description bdesc_comi[] =
12228 {
12229 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12230 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12231 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12232 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12233 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12234 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12235 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12236 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12237 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12238 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12239 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12240 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12241 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12242 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12243 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12244 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12245 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12246 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12247 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12248 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12249 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12250 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12251 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12252 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12253 };
12254
12255 static const struct builtin_description bdesc_2arg[] =
12256 {
12257 /* SSE */
12258 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12259 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12260 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12261 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12262 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12263 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12264 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12265 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12266
12267 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12268 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12269 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12270 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12271 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12272 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12273 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12274 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12275 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12276 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12277 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12278 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12279 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12280 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12281 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12282 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12283 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12284 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12285 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12286 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12287
12288 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12289 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12290 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12291 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12292
12293 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12294 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12295 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12296 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12297
12298 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12299 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12300 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12301 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12302 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12303
12304 /* MMX */
12305 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12306 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12307 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12308 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12309 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12310 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12311 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12312 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12313
12314 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12315 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12316 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12317 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12318 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12319 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12320 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12321 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12322
12323 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12324 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12325 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12326
12327 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12328 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12329 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12330 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12331
12332 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12333 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12334
12335 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12336 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12337 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12338 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12339 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12340 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12341
12342 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12343 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12344 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12345 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12346
12347 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12348 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12349 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12350 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12351 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12352 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12353
12354 /* Special. */
12355 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12356 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12357 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12358
12359 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12360 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12361 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12362
12363 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12364 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12365 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12366 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12367 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12368 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12369
12370 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12371 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12372 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12373 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12374 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12375 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12376
12377 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12378 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12379 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12380 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12381
12382 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12383 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12384
12385 /* SSE2 */
12386 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12387 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12388 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12389 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12390 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12391 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12392 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12393 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12394
12395 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12396 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12397 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12398 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12399 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12400 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12401 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12402 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12403 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12404 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12405 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12406 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12407 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12408 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12409 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12410 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12411 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12412 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12413 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12414 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12415
12416 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12417 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12418 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12419 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12420
12421 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12422 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12423 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12424 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12425
12426 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12427 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12428 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12429
12430 /* SSE2 MMX */
12431 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12432 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12433 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12434 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12435 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12436 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12437 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12438 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12439
12440 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12441 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12442 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12443 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12444 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12445 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12446 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12447 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12448
12449 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12450 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12451 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12452 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12453
12454 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12455 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12456 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12457 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12458
12459 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12460 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12461
12462 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12463 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12464 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12465 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12466 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12467 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12468
12469 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12470 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12471 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12472 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12473
12474 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12475 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12476 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12477 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12478 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12479 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12480 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12481 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12482
12483 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12484 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12485 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12486
12487 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12488 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12489
12490 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12491 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12492 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12493 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12494 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12495 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12496
12497 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12498 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12499 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12500 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12501 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12502 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12503
12504 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12505 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12507 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12508
12509 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12510
12511 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12512 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12513 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12514 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12515
12516 /* SSE3 MMX */
12517 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12518 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12519 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12520 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12521 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12522 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12523 };
12524
12525 static const struct builtin_description bdesc_1arg[] =
12526 {
12527 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12528 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12529
12530 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12531 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12532 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12533
12534 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12535 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12536 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12537 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12538 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12539 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12540
12541 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12542 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12543 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12544 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12545
12546 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12547
12548 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12549 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12550
12551 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12552 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12553 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12554 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12555 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12556
12557 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12558
12559 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12561 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12562 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12563
12564 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12565 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12566 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12567
12568 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12569
12570 /* SSE3 */
12571 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12572 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12573 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12574 };
12575
12576 void
ix86_init_builtins()12577 ix86_init_builtins ()
12578 {
12579 if (TARGET_MMX)
12580 ix86_init_mmx_sse_builtins ();
12581 }
12582
12583 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12584 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12585 builtins. */
12586 static void
ix86_init_mmx_sse_builtins()12587 ix86_init_mmx_sse_builtins ()
12588 {
12589 const struct builtin_description * d;
12590 size_t i;
12591
12592 tree pchar_type_node = build_pointer_type (char_type_node);
12593 tree pcchar_type_node = build_pointer_type (
12594 build_type_variant (char_type_node, 1, 0));
12595 tree pfloat_type_node = build_pointer_type (float_type_node);
12596 tree pcfloat_type_node = build_pointer_type (
12597 build_type_variant (float_type_node, 1, 0));
12598 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12599 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12600 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12601
12602 /* Comparisons. */
12603 tree int_ftype_v4sf_v4sf
12604 = build_function_type_list (integer_type_node,
12605 V4SF_type_node, V4SF_type_node, NULL_TREE);
12606 tree v4si_ftype_v4sf_v4sf
12607 = build_function_type_list (V4SI_type_node,
12608 V4SF_type_node, V4SF_type_node, NULL_TREE);
12609 /* MMX/SSE/integer conversions. */
12610 tree int_ftype_v4sf
12611 = build_function_type_list (integer_type_node,
12612 V4SF_type_node, NULL_TREE);
12613 tree int64_ftype_v4sf
12614 = build_function_type_list (long_long_integer_type_node,
12615 V4SF_type_node, NULL_TREE);
12616 tree int_ftype_v8qi
12617 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12618 tree v4sf_ftype_v4sf_int
12619 = build_function_type_list (V4SF_type_node,
12620 V4SF_type_node, integer_type_node, NULL_TREE);
12621 tree v4sf_ftype_v4sf_int64
12622 = build_function_type_list (V4SF_type_node,
12623 V4SF_type_node, long_long_integer_type_node,
12624 NULL_TREE);
12625 tree v4sf_ftype_v4sf_v2si
12626 = build_function_type_list (V4SF_type_node,
12627 V4SF_type_node, V2SI_type_node, NULL_TREE);
12628 tree int_ftype_v4hi_int
12629 = build_function_type_list (integer_type_node,
12630 V4HI_type_node, integer_type_node, NULL_TREE);
12631 tree v4hi_ftype_v4hi_int_int
12632 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12633 integer_type_node, integer_type_node,
12634 NULL_TREE);
12635 /* Miscellaneous. */
12636 tree v8qi_ftype_v4hi_v4hi
12637 = build_function_type_list (V8QI_type_node,
12638 V4HI_type_node, V4HI_type_node, NULL_TREE);
12639 tree v4hi_ftype_v2si_v2si
12640 = build_function_type_list (V4HI_type_node,
12641 V2SI_type_node, V2SI_type_node, NULL_TREE);
12642 tree v4sf_ftype_v4sf_v4sf_int
12643 = build_function_type_list (V4SF_type_node,
12644 V4SF_type_node, V4SF_type_node,
12645 integer_type_node, NULL_TREE);
12646 tree v2si_ftype_v4hi_v4hi
12647 = build_function_type_list (V2SI_type_node,
12648 V4HI_type_node, V4HI_type_node, NULL_TREE);
12649 tree v4hi_ftype_v4hi_int
12650 = build_function_type_list (V4HI_type_node,
12651 V4HI_type_node, integer_type_node, NULL_TREE);
12652 tree v4hi_ftype_v4hi_di
12653 = build_function_type_list (V4HI_type_node,
12654 V4HI_type_node, long_long_unsigned_type_node,
12655 NULL_TREE);
12656 tree v2si_ftype_v2si_di
12657 = build_function_type_list (V2SI_type_node,
12658 V2SI_type_node, long_long_unsigned_type_node,
12659 NULL_TREE);
12660 tree void_ftype_void
12661 = build_function_type (void_type_node, void_list_node);
12662 tree void_ftype_unsigned
12663 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12664 tree void_ftype_unsigned_unsigned
12665 = build_function_type_list (void_type_node, unsigned_type_node,
12666 unsigned_type_node, NULL_TREE);
12667 tree void_ftype_pcvoid_unsigned_unsigned
12668 = build_function_type_list (void_type_node, const_ptr_type_node,
12669 unsigned_type_node, unsigned_type_node,
12670 NULL_TREE);
12671 tree unsigned_ftype_void
12672 = build_function_type (unsigned_type_node, void_list_node);
12673 tree di_ftype_void
12674 = build_function_type (long_long_unsigned_type_node, void_list_node);
12675 tree v4sf_ftype_void
12676 = build_function_type (V4SF_type_node, void_list_node);
12677 tree v2si_ftype_v4sf
12678 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12679 /* Loads/stores. */
12680 tree void_ftype_v8qi_v8qi_pchar
12681 = build_function_type_list (void_type_node,
12682 V8QI_type_node, V8QI_type_node,
12683 pchar_type_node, NULL_TREE);
12684 tree v4sf_ftype_pcfloat
12685 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12686 /* @@@ the type is bogus */
12687 tree v4sf_ftype_v4sf_pv2si
12688 = build_function_type_list (V4SF_type_node,
12689 V4SF_type_node, pv2si_type_node, NULL_TREE);
12690 tree void_ftype_pv2si_v4sf
12691 = build_function_type_list (void_type_node,
12692 pv2si_type_node, V4SF_type_node, NULL_TREE);
12693 tree void_ftype_pfloat_v4sf
12694 = build_function_type_list (void_type_node,
12695 pfloat_type_node, V4SF_type_node, NULL_TREE);
12696 tree void_ftype_pdi_di
12697 = build_function_type_list (void_type_node,
12698 pdi_type_node, long_long_unsigned_type_node,
12699 NULL_TREE);
12700 tree void_ftype_pv2di_v2di
12701 = build_function_type_list (void_type_node,
12702 pv2di_type_node, V2DI_type_node, NULL_TREE);
12703 /* Normal vector unops. */
12704 tree v4sf_ftype_v4sf
12705 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12706
12707 /* Normal vector binops. */
12708 tree v4sf_ftype_v4sf_v4sf
12709 = build_function_type_list (V4SF_type_node,
12710 V4SF_type_node, V4SF_type_node, NULL_TREE);
12711 tree v8qi_ftype_v8qi_v8qi
12712 = build_function_type_list (V8QI_type_node,
12713 V8QI_type_node, V8QI_type_node, NULL_TREE);
12714 tree v4hi_ftype_v4hi_v4hi
12715 = build_function_type_list (V4HI_type_node,
12716 V4HI_type_node, V4HI_type_node, NULL_TREE);
12717 tree v2si_ftype_v2si_v2si
12718 = build_function_type_list (V2SI_type_node,
12719 V2SI_type_node, V2SI_type_node, NULL_TREE);
12720 tree di_ftype_di_di
12721 = build_function_type_list (long_long_unsigned_type_node,
12722 long_long_unsigned_type_node,
12723 long_long_unsigned_type_node, NULL_TREE);
12724
12725 tree v2si_ftype_v2sf
12726 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12727 tree v2sf_ftype_v2si
12728 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12729 tree v2si_ftype_v2si
12730 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12731 tree v2sf_ftype_v2sf
12732 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12733 tree v2sf_ftype_v2sf_v2sf
12734 = build_function_type_list (V2SF_type_node,
12735 V2SF_type_node, V2SF_type_node, NULL_TREE);
12736 tree v2si_ftype_v2sf_v2sf
12737 = build_function_type_list (V2SI_type_node,
12738 V2SF_type_node, V2SF_type_node, NULL_TREE);
12739 tree pint_type_node = build_pointer_type (integer_type_node);
12740 tree pcint_type_node = build_pointer_type (
12741 build_type_variant (integer_type_node, 1, 0));
12742 tree pdouble_type_node = build_pointer_type (double_type_node);
12743 tree pcdouble_type_node = build_pointer_type (
12744 build_type_variant (double_type_node, 1, 0));
12745 tree int_ftype_v2df_v2df
12746 = build_function_type_list (integer_type_node,
12747 V2DF_type_node, V2DF_type_node, NULL_TREE);
12748
12749 tree ti_ftype_void
12750 = build_function_type (intTI_type_node, void_list_node);
12751 tree v2di_ftype_void
12752 = build_function_type (V2DI_type_node, void_list_node);
12753 tree ti_ftype_ti_ti
12754 = build_function_type_list (intTI_type_node,
12755 intTI_type_node, intTI_type_node, NULL_TREE);
12756 tree void_ftype_pcvoid
12757 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12758 tree v2di_ftype_di
12759 = build_function_type_list (V2DI_type_node,
12760 long_long_unsigned_type_node, NULL_TREE);
12761 tree di_ftype_v2di
12762 = build_function_type_list (long_long_unsigned_type_node,
12763 V2DI_type_node, NULL_TREE);
12764 tree v4sf_ftype_v4si
12765 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12766 tree v4si_ftype_v4sf
12767 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12768 tree v2df_ftype_v4si
12769 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12770 tree v4si_ftype_v2df
12771 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12772 tree v2si_ftype_v2df
12773 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12774 tree v4sf_ftype_v2df
12775 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12776 tree v2df_ftype_v2si
12777 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12778 tree v2df_ftype_v4sf
12779 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12780 tree int_ftype_v2df
12781 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12782 tree int64_ftype_v2df
12783 = build_function_type_list (long_long_integer_type_node,
12784 V2DF_type_node, NULL_TREE);
12785 tree v2df_ftype_v2df_int
12786 = build_function_type_list (V2DF_type_node,
12787 V2DF_type_node, integer_type_node, NULL_TREE);
12788 tree v2df_ftype_v2df_int64
12789 = build_function_type_list (V2DF_type_node,
12790 V2DF_type_node, long_long_integer_type_node,
12791 NULL_TREE);
12792 tree v4sf_ftype_v4sf_v2df
12793 = build_function_type_list (V4SF_type_node,
12794 V4SF_type_node, V2DF_type_node, NULL_TREE);
12795 tree v2df_ftype_v2df_v4sf
12796 = build_function_type_list (V2DF_type_node,
12797 V2DF_type_node, V4SF_type_node, NULL_TREE);
12798 tree v2df_ftype_v2df_v2df_int
12799 = build_function_type_list (V2DF_type_node,
12800 V2DF_type_node, V2DF_type_node,
12801 integer_type_node,
12802 NULL_TREE);
12803 tree v2df_ftype_v2df_pv2si
12804 = build_function_type_list (V2DF_type_node,
12805 V2DF_type_node, pv2si_type_node, NULL_TREE);
12806 tree void_ftype_pv2si_v2df
12807 = build_function_type_list (void_type_node,
12808 pv2si_type_node, V2DF_type_node, NULL_TREE);
12809 tree void_ftype_pdouble_v2df
12810 = build_function_type_list (void_type_node,
12811 pdouble_type_node, V2DF_type_node, NULL_TREE);
12812 tree void_ftype_pint_int
12813 = build_function_type_list (void_type_node,
12814 pint_type_node, integer_type_node, NULL_TREE);
12815 tree void_ftype_v16qi_v16qi_pchar
12816 = build_function_type_list (void_type_node,
12817 V16QI_type_node, V16QI_type_node,
12818 pchar_type_node, NULL_TREE);
12819 tree v2df_ftype_pcdouble
12820 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12821 tree v2df_ftype_v2df_v2df
12822 = build_function_type_list (V2DF_type_node,
12823 V2DF_type_node, V2DF_type_node, NULL_TREE);
12824 tree v16qi_ftype_v16qi_v16qi
12825 = build_function_type_list (V16QI_type_node,
12826 V16QI_type_node, V16QI_type_node, NULL_TREE);
12827 tree v8hi_ftype_v8hi_v8hi
12828 = build_function_type_list (V8HI_type_node,
12829 V8HI_type_node, V8HI_type_node, NULL_TREE);
12830 tree v4si_ftype_v4si_v4si
12831 = build_function_type_list (V4SI_type_node,
12832 V4SI_type_node, V4SI_type_node, NULL_TREE);
12833 tree v2di_ftype_v2di_v2di
12834 = build_function_type_list (V2DI_type_node,
12835 V2DI_type_node, V2DI_type_node, NULL_TREE);
12836 tree v2di_ftype_v2df_v2df
12837 = build_function_type_list (V2DI_type_node,
12838 V2DF_type_node, V2DF_type_node, NULL_TREE);
12839 tree v2df_ftype_v2df
12840 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12841 tree v2df_ftype_double
12842 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12843 tree v2df_ftype_double_double
12844 = build_function_type_list (V2DF_type_node,
12845 double_type_node, double_type_node, NULL_TREE);
12846 tree int_ftype_v8hi_int
12847 = build_function_type_list (integer_type_node,
12848 V8HI_type_node, integer_type_node, NULL_TREE);
12849 tree v8hi_ftype_v8hi_int_int
12850 = build_function_type_list (V8HI_type_node,
12851 V8HI_type_node, integer_type_node,
12852 integer_type_node, NULL_TREE);
12853 tree v2di_ftype_v2di_int
12854 = build_function_type_list (V2DI_type_node,
12855 V2DI_type_node, integer_type_node, NULL_TREE);
12856 tree v4si_ftype_v4si_int
12857 = build_function_type_list (V4SI_type_node,
12858 V4SI_type_node, integer_type_node, NULL_TREE);
12859 tree v8hi_ftype_v8hi_int
12860 = build_function_type_list (V8HI_type_node,
12861 V8HI_type_node, integer_type_node, NULL_TREE);
12862 tree v8hi_ftype_v8hi_v2di
12863 = build_function_type_list (V8HI_type_node,
12864 V8HI_type_node, V2DI_type_node, NULL_TREE);
12865 tree v4si_ftype_v4si_v2di
12866 = build_function_type_list (V4SI_type_node,
12867 V4SI_type_node, V2DI_type_node, NULL_TREE);
12868 tree v4si_ftype_v8hi_v8hi
12869 = build_function_type_list (V4SI_type_node,
12870 V8HI_type_node, V8HI_type_node, NULL_TREE);
12871 tree di_ftype_v8qi_v8qi
12872 = build_function_type_list (long_long_unsigned_type_node,
12873 V8QI_type_node, V8QI_type_node, NULL_TREE);
12874 tree v2di_ftype_v16qi_v16qi
12875 = build_function_type_list (V2DI_type_node,
12876 V16QI_type_node, V16QI_type_node, NULL_TREE);
12877 tree int_ftype_v16qi
12878 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12879 tree v16qi_ftype_pcchar
12880 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12881 tree void_ftype_pchar_v16qi
12882 = build_function_type_list (void_type_node,
12883 pchar_type_node, V16QI_type_node, NULL_TREE);
12884 tree v4si_ftype_pcint
12885 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12886 tree void_ftype_pcint_v4si
12887 = build_function_type_list (void_type_node,
12888 pcint_type_node, V4SI_type_node, NULL_TREE);
12889 tree v2di_ftype_v2di
12890 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12891
12892 /* Add all builtins that are more or less simple operations on two
12893 operands. */
12894 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12895 {
12896 /* Use one of the operands; the target can have a different mode for
12897 mask-generating compares. */
12898 enum machine_mode mode;
12899 tree type;
12900
12901 if (d->name == 0)
12902 continue;
12903 mode = insn_data[d->icode].operand[1].mode;
12904
12905 switch (mode)
12906 {
12907 case V16QImode:
12908 type = v16qi_ftype_v16qi_v16qi;
12909 break;
12910 case V8HImode:
12911 type = v8hi_ftype_v8hi_v8hi;
12912 break;
12913 case V4SImode:
12914 type = v4si_ftype_v4si_v4si;
12915 break;
12916 case V2DImode:
12917 type = v2di_ftype_v2di_v2di;
12918 break;
12919 case V2DFmode:
12920 type = v2df_ftype_v2df_v2df;
12921 break;
12922 case TImode:
12923 type = ti_ftype_ti_ti;
12924 break;
12925 case V4SFmode:
12926 type = v4sf_ftype_v4sf_v4sf;
12927 break;
12928 case V8QImode:
12929 type = v8qi_ftype_v8qi_v8qi;
12930 break;
12931 case V4HImode:
12932 type = v4hi_ftype_v4hi_v4hi;
12933 break;
12934 case V2SImode:
12935 type = v2si_ftype_v2si_v2si;
12936 break;
12937 case DImode:
12938 type = di_ftype_di_di;
12939 break;
12940
12941 default:
12942 abort ();
12943 }
12944
12945 /* Override for comparisons. */
12946 if (d->icode == CODE_FOR_maskcmpv4sf3
12947 || d->icode == CODE_FOR_maskncmpv4sf3
12948 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12949 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12950 type = v4si_ftype_v4sf_v4sf;
12951
12952 if (d->icode == CODE_FOR_maskcmpv2df3
12953 || d->icode == CODE_FOR_maskncmpv2df3
12954 || d->icode == CODE_FOR_vmmaskcmpv2df3
12955 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12956 type = v2di_ftype_v2df_v2df;
12957
12958 def_builtin (d->mask, d->name, type, d->code);
12959 }
12960
12961 /* Add the remaining MMX insns with somewhat more complicated types. */
12962 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12963 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12964 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12965 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12966 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12967
12968 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12969 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12970 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12971
12972 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12973 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12974
12975 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12976 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12977
12978 /* comi/ucomi insns. */
12979 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12980 if (d->mask == MASK_SSE2)
12981 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12982 else
12983 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12984
12985 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12986 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12987 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12988
12989 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12990 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12991 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12992 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12993 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12994 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12995 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12996 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12997 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12998 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12999 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13000
13001 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13002 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13003
13004 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13005
13006 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13007 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13008 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13009 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13010 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13011 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13012
13013 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13014 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13015 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13016 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13017
13018 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13019 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13020 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13021 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13022
13023 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13024
13025 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13026
13027 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13028 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13029 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13030 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13031 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13032 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13033
13034 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13035
13036 /* Original 3DNow! */
13037 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13038 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13039 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13040 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13041 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13042 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13043 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13044 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13045 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13046 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13047 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13048 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13049 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13050 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13051 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13052 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13053 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13054 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13055 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13056 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13057
13058 /* 3DNow! extension as used in the Athlon CPU. */
13059 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13060 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13061 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13062 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13063 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13064 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13065
13066 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13067
13068 /* SSE2 */
13069 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13070 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13071
13072 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13073 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13074 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13075
13076 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13077 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13078 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13079 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13080 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13081 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13082
13083 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13084 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13085 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13086 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13087
13088 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13089 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13090 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13091 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13092 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13093
13094 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13095 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13096 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13097 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13098
13099 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13100 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13101
13102 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13103
13104 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13105 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13106
13107 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13108 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13110 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13111 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13112
13113 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13114
13115 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13116 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13117 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13118 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13119
13120 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13121 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13122 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13123
13124 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13125 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13126 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13127 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13128
13129 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13130 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13131 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13132 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13133 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13135 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13136
13137 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13139 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13140
13141 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13142 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13143 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13144 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13145 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13146 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13147 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13148
13149 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13150
13151 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13152 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13153 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13154
13155 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13156 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13157 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13158
13159 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13160 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13161
13162 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13163 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13164 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13165 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13166
13167 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13168 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13169 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13170 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13171
13172 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13173 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13174
13175 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13176
13177 /* Prescott New Instructions. */
13178 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13179 void_ftype_pcvoid_unsigned_unsigned,
13180 IX86_BUILTIN_MONITOR);
13181 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13182 void_ftype_unsigned_unsigned,
13183 IX86_BUILTIN_MWAIT);
13184 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13185 v4sf_ftype_v4sf,
13186 IX86_BUILTIN_MOVSHDUP);
13187 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13188 v4sf_ftype_v4sf,
13189 IX86_BUILTIN_MOVSLDUP);
13190 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13191 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13192 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13193 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13194 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13195 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13196 }
13197
13198 /* Errors in the source file can cause expand_expr to return const0_rtx
13199 where we expect a vector. To avoid crashing, use one of the vector
13200 clear instructions. */
13201 static rtx
safe_vector_operand(x,mode)13202 safe_vector_operand (x, mode)
13203 rtx x;
13204 enum machine_mode mode;
13205 {
13206 if (x != const0_rtx)
13207 return x;
13208 x = gen_reg_rtx (mode);
13209
13210 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13211 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13212 : gen_rtx_SUBREG (DImode, x, 0)));
13213 else
13214 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13215 : gen_rtx_SUBREG (V4SFmode, x, 0)));
13216 return x;
13217 }
13218
13219 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13220
13221 static rtx
ix86_expand_binop_builtin(icode,arglist,target)13222 ix86_expand_binop_builtin (icode, arglist, target)
13223 enum insn_code icode;
13224 tree arglist;
13225 rtx target;
13226 {
13227 rtx pat;
13228 tree arg0 = TREE_VALUE (arglist);
13229 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13230 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13231 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13232 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13233 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13234 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13235
13236 if (VECTOR_MODE_P (mode0))
13237 op0 = safe_vector_operand (op0, mode0);
13238 if (VECTOR_MODE_P (mode1))
13239 op1 = safe_vector_operand (op1, mode1);
13240
13241 if (! target
13242 || GET_MODE (target) != tmode
13243 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13244 target = gen_reg_rtx (tmode);
13245
13246 if (GET_MODE (op1) == SImode && mode1 == TImode)
13247 {
13248 rtx x = gen_reg_rtx (V4SImode);
13249 emit_insn (gen_sse2_loadd (x, op1));
13250 op1 = gen_lowpart (TImode, x);
13251 }
13252
13253 /* In case the insn wants input operands in modes different from
13254 the result, abort. */
13255 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13256 abort ();
13257
13258 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13259 op0 = copy_to_mode_reg (mode0, op0);
13260 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13261 op1 = copy_to_mode_reg (mode1, op1);
13262
13263 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13264 yet one of the two must not be a memory. This is normally enforced
13265 by expanders, but we didn't bother to create one here. */
13266 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13267 op0 = copy_to_mode_reg (mode0, op0);
13268
13269 pat = GEN_FCN (icode) (target, op0, op1);
13270 if (! pat)
13271 return 0;
13272 emit_insn (pat);
13273 return target;
13274 }
13275
13276 /* Subroutine of ix86_expand_builtin to take care of stores. */
13277
13278 static rtx
ix86_expand_store_builtin(icode,arglist)13279 ix86_expand_store_builtin (icode, arglist)
13280 enum insn_code icode;
13281 tree arglist;
13282 {
13283 rtx pat;
13284 tree arg0 = TREE_VALUE (arglist);
13285 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13286 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13287 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13288 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13289 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13290
13291 if (VECTOR_MODE_P (mode1))
13292 op1 = safe_vector_operand (op1, mode1);
13293
13294 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13295 op1 = copy_to_mode_reg (mode1, op1);
13296
13297 pat = GEN_FCN (icode) (op0, op1);
13298 if (pat)
13299 emit_insn (pat);
13300 return 0;
13301 }
13302
13303 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13304
13305 static rtx
ix86_expand_unop_builtin(icode,arglist,target,do_load)13306 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13307 enum insn_code icode;
13308 tree arglist;
13309 rtx target;
13310 int do_load;
13311 {
13312 rtx pat;
13313 tree arg0 = TREE_VALUE (arglist);
13314 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13315 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13316 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13317
13318 if (! target
13319 || GET_MODE (target) != tmode
13320 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13321 target = gen_reg_rtx (tmode);
13322 if (do_load)
13323 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13324 else
13325 {
13326 if (VECTOR_MODE_P (mode0))
13327 op0 = safe_vector_operand (op0, mode0);
13328
13329 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13330 op0 = copy_to_mode_reg (mode0, op0);
13331 }
13332
13333 pat = GEN_FCN (icode) (target, op0);
13334 if (! pat)
13335 return 0;
13336 emit_insn (pat);
13337 return target;
13338 }
13339
13340 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13341 sqrtss, rsqrtss, rcpss. */
13342
13343 static rtx
ix86_expand_unop1_builtin(icode,arglist,target)13344 ix86_expand_unop1_builtin (icode, arglist, target)
13345 enum insn_code icode;
13346 tree arglist;
13347 rtx target;
13348 {
13349 rtx pat;
13350 tree arg0 = TREE_VALUE (arglist);
13351 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13352 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13353 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13354
13355 if (! target
13356 || GET_MODE (target) != tmode
13357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13358 target = gen_reg_rtx (tmode);
13359
13360 if (VECTOR_MODE_P (mode0))
13361 op0 = safe_vector_operand (op0, mode0);
13362
13363 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13364 op0 = copy_to_mode_reg (mode0, op0);
13365
13366 op1 = op0;
13367 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13368 op1 = copy_to_mode_reg (mode0, op1);
13369
13370 pat = GEN_FCN (icode) (target, op0, op1);
13371 if (! pat)
13372 return 0;
13373 emit_insn (pat);
13374 return target;
13375 }
13376
13377 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13378
13379 static rtx
ix86_expand_sse_compare(d,arglist,target)13380 ix86_expand_sse_compare (d, arglist, target)
13381 const struct builtin_description *d;
13382 tree arglist;
13383 rtx target;
13384 {
13385 rtx pat;
13386 tree arg0 = TREE_VALUE (arglist);
13387 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13388 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13389 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13390 rtx op2;
13391 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13392 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13393 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13394 enum rtx_code comparison = d->comparison;
13395
13396 if (VECTOR_MODE_P (mode0))
13397 op0 = safe_vector_operand (op0, mode0);
13398 if (VECTOR_MODE_P (mode1))
13399 op1 = safe_vector_operand (op1, mode1);
13400
13401 /* Swap operands if we have a comparison that isn't available in
13402 hardware. */
13403 if (d->flag)
13404 {
13405 rtx tmp = gen_reg_rtx (mode1);
13406 emit_move_insn (tmp, op1);
13407 op1 = op0;
13408 op0 = tmp;
13409 }
13410
13411 if (! target
13412 || GET_MODE (target) != tmode
13413 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13414 target = gen_reg_rtx (tmode);
13415
13416 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13417 op0 = copy_to_mode_reg (mode0, op0);
13418 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13419 op1 = copy_to_mode_reg (mode1, op1);
13420
13421 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13422 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13423 if (! pat)
13424 return 0;
13425 emit_insn (pat);
13426 return target;
13427 }
13428
13429 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13430
13431 static rtx
ix86_expand_sse_comi(d,arglist,target)13432 ix86_expand_sse_comi (d, arglist, target)
13433 const struct builtin_description *d;
13434 tree arglist;
13435 rtx target;
13436 {
13437 rtx pat;
13438 tree arg0 = TREE_VALUE (arglist);
13439 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13440 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13441 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13442 rtx op2;
13443 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13444 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13445 enum rtx_code comparison = d->comparison;
13446
13447 if (VECTOR_MODE_P (mode0))
13448 op0 = safe_vector_operand (op0, mode0);
13449 if (VECTOR_MODE_P (mode1))
13450 op1 = safe_vector_operand (op1, mode1);
13451
13452 /* Swap operands if we have a comparison that isn't available in
13453 hardware. */
13454 if (d->flag)
13455 {
13456 rtx tmp = op1;
13457 op1 = op0;
13458 op0 = tmp;
13459 }
13460
13461 target = gen_reg_rtx (SImode);
13462 emit_move_insn (target, const0_rtx);
13463 target = gen_rtx_SUBREG (QImode, target, 0);
13464
13465 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13466 op0 = copy_to_mode_reg (mode0, op0);
13467 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13468 op1 = copy_to_mode_reg (mode1, op1);
13469
13470 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13471 pat = GEN_FCN (d->icode) (op0, op1);
13472 if (! pat)
13473 return 0;
13474 emit_insn (pat);
13475 emit_insn (gen_rtx_SET (VOIDmode,
13476 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13477 gen_rtx_fmt_ee (comparison, QImode,
13478 SET_DEST (pat),
13479 const0_rtx)));
13480
13481 return SUBREG_REG (target);
13482 }
13483
13484 /* Expand an expression EXP that calls a built-in function,
13485 with result going to TARGET if that's convenient
13486 (and in mode MODE if that's convenient).
13487 SUBTARGET may be used as the target for computing one of EXP's operands.
13488 IGNORE is nonzero if the value is to be ignored. */
13489
13490 rtx
ix86_expand_builtin(exp,target,subtarget,mode,ignore)13491 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13492 tree exp;
13493 rtx target;
13494 rtx subtarget ATTRIBUTE_UNUSED;
13495 enum machine_mode mode ATTRIBUTE_UNUSED;
13496 int ignore ATTRIBUTE_UNUSED;
13497 {
13498 const struct builtin_description *d;
13499 size_t i;
13500 enum insn_code icode;
13501 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13502 tree arglist = TREE_OPERAND (exp, 1);
13503 tree arg0, arg1, arg2;
13504 rtx op0, op1, op2, pat;
13505 enum machine_mode tmode, mode0, mode1, mode2;
13506 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13507
13508 switch (fcode)
13509 {
13510 case IX86_BUILTIN_EMMS:
13511 emit_insn (gen_emms ());
13512 return 0;
13513
13514 case IX86_BUILTIN_SFENCE:
13515 emit_insn (gen_sfence ());
13516 return 0;
13517
13518 case IX86_BUILTIN_PEXTRW:
13519 case IX86_BUILTIN_PEXTRW128:
13520 icode = (fcode == IX86_BUILTIN_PEXTRW
13521 ? CODE_FOR_mmx_pextrw
13522 : CODE_FOR_sse2_pextrw);
13523 arg0 = TREE_VALUE (arglist);
13524 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13525 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13526 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13527 tmode = insn_data[icode].operand[0].mode;
13528 mode0 = insn_data[icode].operand[1].mode;
13529 mode1 = insn_data[icode].operand[2].mode;
13530
13531 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13532 op0 = copy_to_mode_reg (mode0, op0);
13533 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13534 {
13535 /* @@@ better error message */
13536 error ("selector must be an immediate");
13537 return gen_reg_rtx (tmode);
13538 }
13539 if (target == 0
13540 || GET_MODE (target) != tmode
13541 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13542 target = gen_reg_rtx (tmode);
13543 pat = GEN_FCN (icode) (target, op0, op1);
13544 if (! pat)
13545 return 0;
13546 emit_insn (pat);
13547 return target;
13548
13549 case IX86_BUILTIN_PINSRW:
13550 case IX86_BUILTIN_PINSRW128:
13551 icode = (fcode == IX86_BUILTIN_PINSRW
13552 ? CODE_FOR_mmx_pinsrw
13553 : CODE_FOR_sse2_pinsrw);
13554 arg0 = TREE_VALUE (arglist);
13555 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13556 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13557 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13558 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13559 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13560 tmode = insn_data[icode].operand[0].mode;
13561 mode0 = insn_data[icode].operand[1].mode;
13562 mode1 = insn_data[icode].operand[2].mode;
13563 mode2 = insn_data[icode].operand[3].mode;
13564
13565 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13566 op0 = copy_to_mode_reg (mode0, op0);
13567 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13568 op1 = copy_to_mode_reg (mode1, op1);
13569 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13570 {
13571 /* @@@ better error message */
13572 error ("selector must be an immediate");
13573 return const0_rtx;
13574 }
13575 if (target == 0
13576 || GET_MODE (target) != tmode
13577 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13578 target = gen_reg_rtx (tmode);
13579 pat = GEN_FCN (icode) (target, op0, op1, op2);
13580 if (! pat)
13581 return 0;
13582 emit_insn (pat);
13583 return target;
13584
13585 case IX86_BUILTIN_MASKMOVQ:
13586 case IX86_BUILTIN_MASKMOVDQU:
13587 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13588 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13589 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13590 : CODE_FOR_sse2_maskmovdqu));
13591 /* Note the arg order is different from the operand order. */
13592 arg1 = TREE_VALUE (arglist);
13593 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13594 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13595 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13596 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13597 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13598 mode0 = insn_data[icode].operand[0].mode;
13599 mode1 = insn_data[icode].operand[1].mode;
13600 mode2 = insn_data[icode].operand[2].mode;
13601
13602 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13603 op0 = copy_to_mode_reg (mode0, op0);
13604 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13605 op1 = copy_to_mode_reg (mode1, op1);
13606 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13607 op2 = copy_to_mode_reg (mode2, op2);
13608 pat = GEN_FCN (icode) (op0, op1, op2);
13609 if (! pat)
13610 return 0;
13611 emit_insn (pat);
13612 return 0;
13613
13614 case IX86_BUILTIN_SQRTSS:
13615 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13616 case IX86_BUILTIN_RSQRTSS:
13617 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13618 case IX86_BUILTIN_RCPSS:
13619 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13620
13621 case IX86_BUILTIN_LOADAPS:
13622 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13623
13624 case IX86_BUILTIN_LOADUPS:
13625 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13626
13627 case IX86_BUILTIN_STOREAPS:
13628 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13629
13630 case IX86_BUILTIN_STOREUPS:
13631 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13632
13633 case IX86_BUILTIN_LOADSS:
13634 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13635
13636 case IX86_BUILTIN_STORESS:
13637 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13638
13639 case IX86_BUILTIN_LOADHPS:
13640 case IX86_BUILTIN_LOADLPS:
13641 case IX86_BUILTIN_LOADHPD:
13642 case IX86_BUILTIN_LOADLPD:
13643 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13644 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13645 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13646 : CODE_FOR_sse2_movlpd);
13647 arg0 = TREE_VALUE (arglist);
13648 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13649 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13650 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13651 tmode = insn_data[icode].operand[0].mode;
13652 mode0 = insn_data[icode].operand[1].mode;
13653 mode1 = insn_data[icode].operand[2].mode;
13654
13655 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13656 op0 = copy_to_mode_reg (mode0, op0);
13657 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13658 if (target == 0
13659 || GET_MODE (target) != tmode
13660 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13661 target = gen_reg_rtx (tmode);
13662 pat = GEN_FCN (icode) (target, op0, op1);
13663 if (! pat)
13664 return 0;
13665 emit_insn (pat);
13666 return target;
13667
13668 case IX86_BUILTIN_STOREHPS:
13669 case IX86_BUILTIN_STORELPS:
13670 case IX86_BUILTIN_STOREHPD:
13671 case IX86_BUILTIN_STORELPD:
13672 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13673 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13674 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13675 : CODE_FOR_sse2_movlpd);
13676 arg0 = TREE_VALUE (arglist);
13677 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13678 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13679 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13680 mode0 = insn_data[icode].operand[1].mode;
13681 mode1 = insn_data[icode].operand[2].mode;
13682
13683 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13684 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13685 op1 = copy_to_mode_reg (mode1, op1);
13686
13687 pat = GEN_FCN (icode) (op0, op0, op1);
13688 if (! pat)
13689 return 0;
13690 emit_insn (pat);
13691 return 0;
13692
13693 case IX86_BUILTIN_MOVNTPS:
13694 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13695 case IX86_BUILTIN_MOVNTQ:
13696 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13697
13698 case IX86_BUILTIN_LDMXCSR:
13699 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13700 target = assign_386_stack_local (SImode, 0);
13701 emit_move_insn (target, op0);
13702 emit_insn (gen_ldmxcsr (target));
13703 return 0;
13704
13705 case IX86_BUILTIN_STMXCSR:
13706 target = assign_386_stack_local (SImode, 0);
13707 emit_insn (gen_stmxcsr (target));
13708 return copy_to_mode_reg (SImode, target);
13709
13710 case IX86_BUILTIN_SHUFPS:
13711 case IX86_BUILTIN_SHUFPD:
13712 icode = (fcode == IX86_BUILTIN_SHUFPS
13713 ? CODE_FOR_sse_shufps
13714 : CODE_FOR_sse2_shufpd);
13715 arg0 = TREE_VALUE (arglist);
13716 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13717 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13718 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13719 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13720 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13721 tmode = insn_data[icode].operand[0].mode;
13722 mode0 = insn_data[icode].operand[1].mode;
13723 mode1 = insn_data[icode].operand[2].mode;
13724 mode2 = insn_data[icode].operand[3].mode;
13725
13726 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13727 op0 = copy_to_mode_reg (mode0, op0);
13728 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13729 op1 = copy_to_mode_reg (mode1, op1);
13730 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13731 {
13732 /* @@@ better error message */
13733 error ("mask must be an immediate");
13734 return gen_reg_rtx (tmode);
13735 }
13736 if (target == 0
13737 || GET_MODE (target) != tmode
13738 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13739 target = gen_reg_rtx (tmode);
13740 pat = GEN_FCN (icode) (target, op0, op1, op2);
13741 if (! pat)
13742 return 0;
13743 emit_insn (pat);
13744 return target;
13745
13746 case IX86_BUILTIN_PSHUFW:
13747 case IX86_BUILTIN_PSHUFD:
13748 case IX86_BUILTIN_PSHUFHW:
13749 case IX86_BUILTIN_PSHUFLW:
13750 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13751 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13752 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13753 : CODE_FOR_mmx_pshufw);
13754 arg0 = TREE_VALUE (arglist);
13755 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13756 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13757 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13758 tmode = insn_data[icode].operand[0].mode;
13759 mode1 = insn_data[icode].operand[1].mode;
13760 mode2 = insn_data[icode].operand[2].mode;
13761
13762 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13763 op0 = copy_to_mode_reg (mode1, op0);
13764 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13765 {
13766 /* @@@ better error message */
13767 error ("mask must be an immediate");
13768 return const0_rtx;
13769 }
13770 if (target == 0
13771 || GET_MODE (target) != tmode
13772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13773 target = gen_reg_rtx (tmode);
13774 pat = GEN_FCN (icode) (target, op0, op1);
13775 if (! pat)
13776 return 0;
13777 emit_insn (pat);
13778 return target;
13779
13780 case IX86_BUILTIN_PSLLDQI128:
13781 case IX86_BUILTIN_PSRLDQI128:
13782 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13783 : CODE_FOR_sse2_lshrti3);
13784 arg0 = TREE_VALUE (arglist);
13785 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13786 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13787 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13788 tmode = insn_data[icode].operand[0].mode;
13789 mode1 = insn_data[icode].operand[1].mode;
13790 mode2 = insn_data[icode].operand[2].mode;
13791
13792 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13793 {
13794 op0 = copy_to_reg (op0);
13795 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13796 }
13797 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13798 {
13799 error ("shift must be an immediate");
13800 return const0_rtx;
13801 }
13802 target = gen_reg_rtx (V2DImode);
13803 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13804 if (! pat)
13805 return 0;
13806 emit_insn (pat);
13807 return target;
13808
13809 case IX86_BUILTIN_FEMMS:
13810 emit_insn (gen_femms ());
13811 return NULL_RTX;
13812
13813 case IX86_BUILTIN_PAVGUSB:
13814 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13815
13816 case IX86_BUILTIN_PF2ID:
13817 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13818
13819 case IX86_BUILTIN_PFACC:
13820 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13821
13822 case IX86_BUILTIN_PFADD:
13823 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13824
13825 case IX86_BUILTIN_PFCMPEQ:
13826 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13827
13828 case IX86_BUILTIN_PFCMPGE:
13829 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13830
13831 case IX86_BUILTIN_PFCMPGT:
13832 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13833
13834 case IX86_BUILTIN_PFMAX:
13835 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13836
13837 case IX86_BUILTIN_PFMIN:
13838 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13839
13840 case IX86_BUILTIN_PFMUL:
13841 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13842
13843 case IX86_BUILTIN_PFRCP:
13844 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13845
13846 case IX86_BUILTIN_PFRCPIT1:
13847 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13848
13849 case IX86_BUILTIN_PFRCPIT2:
13850 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13851
13852 case IX86_BUILTIN_PFRSQIT1:
13853 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13854
13855 case IX86_BUILTIN_PFRSQRT:
13856 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13857
13858 case IX86_BUILTIN_PFSUB:
13859 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13860
13861 case IX86_BUILTIN_PFSUBR:
13862 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13863
13864 case IX86_BUILTIN_PI2FD:
13865 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13866
13867 case IX86_BUILTIN_PMULHRW:
13868 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13869
13870 case IX86_BUILTIN_PF2IW:
13871 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13872
13873 case IX86_BUILTIN_PFNACC:
13874 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13875
13876 case IX86_BUILTIN_PFPNACC:
13877 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13878
13879 case IX86_BUILTIN_PI2FW:
13880 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13881
13882 case IX86_BUILTIN_PSWAPDSI:
13883 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13884
13885 case IX86_BUILTIN_PSWAPDSF:
13886 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13887
13888 case IX86_BUILTIN_SSE_ZERO:
13889 target = gen_reg_rtx (V4SFmode);
13890 emit_insn (gen_sse_clrv4sf (target));
13891 return target;
13892
13893 case IX86_BUILTIN_MMX_ZERO:
13894 target = gen_reg_rtx (DImode);
13895 emit_insn (gen_mmx_clrdi (target));
13896 return target;
13897
13898 case IX86_BUILTIN_CLRTI:
13899 target = gen_reg_rtx (V2DImode);
13900 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13901 return target;
13902
13903
13904 case IX86_BUILTIN_SQRTSD:
13905 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13906 case IX86_BUILTIN_LOADAPD:
13907 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13908 case IX86_BUILTIN_LOADUPD:
13909 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13910
13911 case IX86_BUILTIN_STOREAPD:
13912 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13913 case IX86_BUILTIN_STOREUPD:
13914 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13915
13916 case IX86_BUILTIN_LOADSD:
13917 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13918
13919 case IX86_BUILTIN_STORESD:
13920 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13921
13922 case IX86_BUILTIN_SETPD1:
13923 target = assign_386_stack_local (DFmode, 0);
13924 arg0 = TREE_VALUE (arglist);
13925 emit_move_insn (adjust_address (target, DFmode, 0),
13926 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13927 op0 = gen_reg_rtx (V2DFmode);
13928 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13929 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13930 return op0;
13931
13932 case IX86_BUILTIN_SETPD:
13933 target = assign_386_stack_local (V2DFmode, 0);
13934 arg0 = TREE_VALUE (arglist);
13935 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13936 emit_move_insn (adjust_address (target, DFmode, 0),
13937 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13938 emit_move_insn (adjust_address (target, DFmode, 8),
13939 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13940 op0 = gen_reg_rtx (V2DFmode);
13941 emit_insn (gen_sse2_movapd (op0, target));
13942 return op0;
13943
13944 case IX86_BUILTIN_LOADRPD:
13945 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13946 gen_reg_rtx (V2DFmode), 1);
13947 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13948 return target;
13949
13950 case IX86_BUILTIN_LOADPD1:
13951 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13952 gen_reg_rtx (V2DFmode), 1);
13953 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13954 return target;
13955
13956 case IX86_BUILTIN_STOREPD1:
13957 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13958 case IX86_BUILTIN_STORERPD:
13959 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13960
13961 case IX86_BUILTIN_CLRPD:
13962 target = gen_reg_rtx (V2DFmode);
13963 emit_insn (gen_sse_clrv2df (target));
13964 return target;
13965
13966 case IX86_BUILTIN_MFENCE:
13967 emit_insn (gen_sse2_mfence ());
13968 return 0;
13969 case IX86_BUILTIN_LFENCE:
13970 emit_insn (gen_sse2_lfence ());
13971 return 0;
13972
13973 case IX86_BUILTIN_CLFLUSH:
13974 arg0 = TREE_VALUE (arglist);
13975 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13976 icode = CODE_FOR_sse2_clflush;
13977 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13978 op0 = copy_to_mode_reg (Pmode, op0);
13979
13980 emit_insn (gen_sse2_clflush (op0));
13981 return 0;
13982
13983 case IX86_BUILTIN_MOVNTPD:
13984 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13985 case IX86_BUILTIN_MOVNTDQ:
13986 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13987 case IX86_BUILTIN_MOVNTI:
13988 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13989
13990 case IX86_BUILTIN_LOADDQA:
13991 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13992 case IX86_BUILTIN_LOADDQU:
13993 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13994 case IX86_BUILTIN_LOADD:
13995 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13996
13997 case IX86_BUILTIN_STOREDQA:
13998 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13999 case IX86_BUILTIN_STOREDQU:
14000 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14001 case IX86_BUILTIN_STORED:
14002 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14003
14004 case IX86_BUILTIN_MONITOR:
14005 arg0 = TREE_VALUE (arglist);
14006 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14007 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14008 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14009 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14010 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14011 if (!REG_P (op0))
14012 op0 = copy_to_mode_reg (SImode, op0);
14013 if (!REG_P (op1))
14014 op1 = copy_to_mode_reg (SImode, op1);
14015 if (!REG_P (op2))
14016 op2 = copy_to_mode_reg (SImode, op2);
14017 emit_insn (gen_monitor (op0, op1, op2));
14018 return 0;
14019
14020 case IX86_BUILTIN_MWAIT:
14021 arg0 = TREE_VALUE (arglist);
14022 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14023 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14024 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14025 if (!REG_P (op0))
14026 op0 = copy_to_mode_reg (SImode, op0);
14027 if (!REG_P (op1))
14028 op1 = copy_to_mode_reg (SImode, op1);
14029 emit_insn (gen_mwait (op0, op1));
14030 return 0;
14031
14032 case IX86_BUILTIN_LOADDDUP:
14033 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14034
14035 case IX86_BUILTIN_LDDQU:
14036 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14037 1);
14038
14039 default:
14040 break;
14041 }
14042
14043 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14044 if (d->code == fcode)
14045 {
14046 /* Compares are treated specially. */
14047 if (d->icode == CODE_FOR_maskcmpv4sf3
14048 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14049 || d->icode == CODE_FOR_maskncmpv4sf3
14050 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14051 || d->icode == CODE_FOR_maskcmpv2df3
14052 || d->icode == CODE_FOR_vmmaskcmpv2df3
14053 || d->icode == CODE_FOR_maskncmpv2df3
14054 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14055 return ix86_expand_sse_compare (d, arglist, target);
14056
14057 return ix86_expand_binop_builtin (d->icode, arglist, target);
14058 }
14059
14060 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14061 if (d->code == fcode)
14062 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14063
14064 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14065 if (d->code == fcode)
14066 return ix86_expand_sse_comi (d, arglist, target);
14067
14068 /* @@@ Should really do something sensible here. */
14069 return 0;
14070 }
14071
14072 /* Store OPERAND to the memory after reload is completed. This means
14073 that we can't easily use assign_stack_local. */
14074 rtx
ix86_force_to_memory(mode,operand)14075 ix86_force_to_memory (mode, operand)
14076 enum machine_mode mode;
14077 rtx operand;
14078 {
14079 rtx result;
14080 if (!reload_completed)
14081 abort ();
14082 if (TARGET_64BIT && TARGET_RED_ZONE)
14083 {
14084 result = gen_rtx_MEM (mode,
14085 gen_rtx_PLUS (Pmode,
14086 stack_pointer_rtx,
14087 GEN_INT (-RED_ZONE_SIZE)));
14088 emit_move_insn (result, operand);
14089 }
14090 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14091 {
14092 switch (mode)
14093 {
14094 case HImode:
14095 case SImode:
14096 operand = gen_lowpart (DImode, operand);
14097 /* FALLTHRU */
14098 case DImode:
14099 emit_insn (
14100 gen_rtx_SET (VOIDmode,
14101 gen_rtx_MEM (DImode,
14102 gen_rtx_PRE_DEC (DImode,
14103 stack_pointer_rtx)),
14104 operand));
14105 break;
14106 default:
14107 abort ();
14108 }
14109 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14110 }
14111 else
14112 {
14113 switch (mode)
14114 {
14115 case DImode:
14116 {
14117 rtx operands[2];
14118 split_di (&operand, 1, operands, operands + 1);
14119 emit_insn (
14120 gen_rtx_SET (VOIDmode,
14121 gen_rtx_MEM (SImode,
14122 gen_rtx_PRE_DEC (Pmode,
14123 stack_pointer_rtx)),
14124 operands[1]));
14125 emit_insn (
14126 gen_rtx_SET (VOIDmode,
14127 gen_rtx_MEM (SImode,
14128 gen_rtx_PRE_DEC (Pmode,
14129 stack_pointer_rtx)),
14130 operands[0]));
14131 }
14132 break;
14133 case HImode:
14134 /* It is better to store HImodes as SImodes. */
14135 if (!TARGET_PARTIAL_REG_STALL)
14136 operand = gen_lowpart (SImode, operand);
14137 /* FALLTHRU */
14138 case SImode:
14139 emit_insn (
14140 gen_rtx_SET (VOIDmode,
14141 gen_rtx_MEM (GET_MODE (operand),
14142 gen_rtx_PRE_DEC (SImode,
14143 stack_pointer_rtx)),
14144 operand));
14145 break;
14146 default:
14147 abort ();
14148 }
14149 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14150 }
14151 return result;
14152 }
14153
14154 /* Free operand from the memory. */
14155 void
ix86_free_from_memory(mode)14156 ix86_free_from_memory (mode)
14157 enum machine_mode mode;
14158 {
14159 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14160 {
14161 int size;
14162
14163 if (mode == DImode || TARGET_64BIT)
14164 size = 8;
14165 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14166 size = 2;
14167 else
14168 size = 4;
14169 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14170 to pop or add instruction if registers are available. */
14171 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14172 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14173 GEN_INT (size))));
14174 }
14175 }
14176
14177 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14178 QImode must go into class Q_REGS.
14179 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14180 movdf to do mem-to-mem moves through integer regs. */
14181 enum reg_class
ix86_preferred_reload_class(x,class)14182 ix86_preferred_reload_class (x, class)
14183 rtx x;
14184 enum reg_class class;
14185 {
14186 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14187 return NO_REGS;
14188 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14189 {
14190 /* SSE can't load any constant directly yet. */
14191 if (SSE_CLASS_P (class))
14192 return NO_REGS;
14193 /* Floats can load 0 and 1. */
14194 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14195 {
14196 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14197 if (MAYBE_SSE_CLASS_P (class))
14198 return (reg_class_subset_p (class, GENERAL_REGS)
14199 ? GENERAL_REGS : FLOAT_REGS);
14200 else
14201 return class;
14202 }
14203 /* General regs can load everything. */
14204 if (reg_class_subset_p (class, GENERAL_REGS))
14205 return GENERAL_REGS;
14206 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14207 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14208 return NO_REGS;
14209 }
14210 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14211 return NO_REGS;
14212 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14213 return Q_REGS;
14214 return class;
14215 }
14216
14217 /* If we are copying between general and FP registers, we need a memory
14218 location. The same is true for SSE and MMX registers.
14219
14220 The macro can't work reliably when one of the CLASSES is class containing
14221 registers from multiple units (SSE, MMX, integer). We avoid this by never
14222 combining those units in single alternative in the machine description.
14223 Ensure that this constraint holds to avoid unexpected surprises.
14224
14225 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14226 enforce these sanity checks. */
14227 int
ix86_secondary_memory_needed(class1,class2,mode,strict)14228 ix86_secondary_memory_needed (class1, class2, mode, strict)
14229 enum reg_class class1, class2;
14230 enum machine_mode mode;
14231 int strict;
14232 {
14233 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14234 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14235 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14236 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14237 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14238 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14239 {
14240 if (strict)
14241 abort ();
14242 else
14243 return 1;
14244 }
14245 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14246 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14247 && (mode) != SImode)
14248 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14249 && (mode) != SImode));
14250 }
14251 /* Return the cost of moving data from a register in class CLASS1 to
14252 one in class CLASS2.
14253
14254 It is not required that the cost always equal 2 when FROM is the same as TO;
14255 on some machines it is expensive to move between registers if they are not
14256 general registers. */
14257 int
ix86_register_move_cost(mode,class1,class2)14258 ix86_register_move_cost (mode, class1, class2)
14259 enum machine_mode mode;
14260 enum reg_class class1, class2;
14261 {
14262 /* In case we require secondary memory, compute cost of the store followed
14263 by load. In order to avoid bad register allocation choices, we need
14264 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14265
14266 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14267 {
14268 int cost = 1;
14269
14270 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14271 MEMORY_MOVE_COST (mode, class1, 1));
14272 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14273 MEMORY_MOVE_COST (mode, class2, 1));
14274
14275 /* In case of copying from general_purpose_register we may emit multiple
14276 stores followed by single load causing memory size mismatch stall.
14277 Count this as arbitarily high cost of 20. */
14278 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14279 cost += 20;
14280
14281 /* In the case of FP/MMX moves, the registers actually overlap, and we
14282 have to switch modes in order to treat them differently. */
14283 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14284 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14285 cost += 20;
14286
14287 return cost;
14288 }
14289
14290 /* Moves between SSE/MMX and integer unit are expensive. */
14291 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14292 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14293 return ix86_cost->mmxsse_to_integer;
14294 if (MAYBE_FLOAT_CLASS_P (class1))
14295 return ix86_cost->fp_move;
14296 if (MAYBE_SSE_CLASS_P (class1))
14297 return ix86_cost->sse_move;
14298 if (MAYBE_MMX_CLASS_P (class1))
14299 return ix86_cost->mmx_move;
14300 return 2;
14301 }
14302
14303 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14304 int
ix86_hard_regno_mode_ok(regno,mode)14305 ix86_hard_regno_mode_ok (regno, mode)
14306 int regno;
14307 enum machine_mode mode;
14308 {
14309 /* Flags and only flags can only hold CCmode values. */
14310 if (CC_REGNO_P (regno))
14311 return GET_MODE_CLASS (mode) == MODE_CC;
14312 if (GET_MODE_CLASS (mode) == MODE_CC
14313 || GET_MODE_CLASS (mode) == MODE_RANDOM
14314 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14315 return 0;
14316 if (FP_REGNO_P (regno))
14317 return VALID_FP_MODE_P (mode);
14318 if (SSE_REGNO_P (regno))
14319 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14320 if (MMX_REGNO_P (regno))
14321 return (TARGET_MMX
14322 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14323 /* We handle both integer and floats in the general purpose registers.
14324 In future we should be able to handle vector modes as well. */
14325 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14326 return 0;
14327 /* Take care for QImode values - they can be in non-QI regs, but then
14328 they do cause partial register stalls. */
14329 if (regno < 4 || mode != QImode || TARGET_64BIT)
14330 return 1;
14331 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14332 }
14333
14334 /* Return the cost of moving data of mode M between a
14335 register and memory. A value of 2 is the default; this cost is
14336 relative to those in `REGISTER_MOVE_COST'.
14337
14338 If moving between registers and memory is more expensive than
14339 between two registers, you should define this macro to express the
14340 relative cost.
14341
14342 Model also increased moving costs of QImode registers in non
14343 Q_REGS classes.
14344 */
14345 int
ix86_memory_move_cost(mode,class,in)14346 ix86_memory_move_cost (mode, class, in)
14347 enum machine_mode mode;
14348 enum reg_class class;
14349 int in;
14350 {
14351 if (FLOAT_CLASS_P (class))
14352 {
14353 int index;
14354 switch (mode)
14355 {
14356 case SFmode:
14357 index = 0;
14358 break;
14359 case DFmode:
14360 index = 1;
14361 break;
14362 case XFmode:
14363 case TFmode:
14364 index = 2;
14365 break;
14366 default:
14367 return 100;
14368 }
14369 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14370 }
14371 if (SSE_CLASS_P (class))
14372 {
14373 int index;
14374 switch (GET_MODE_SIZE (mode))
14375 {
14376 case 4:
14377 index = 0;
14378 break;
14379 case 8:
14380 index = 1;
14381 break;
14382 case 16:
14383 index = 2;
14384 break;
14385 default:
14386 return 100;
14387 }
14388 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14389 }
14390 if (MMX_CLASS_P (class))
14391 {
14392 int index;
14393 switch (GET_MODE_SIZE (mode))
14394 {
14395 case 4:
14396 index = 0;
14397 break;
14398 case 8:
14399 index = 1;
14400 break;
14401 default:
14402 return 100;
14403 }
14404 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14405 }
14406 switch (GET_MODE_SIZE (mode))
14407 {
14408 case 1:
14409 if (in)
14410 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14411 : ix86_cost->movzbl_load);
14412 else
14413 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14414 : ix86_cost->int_store[0] + 4);
14415 break;
14416 case 2:
14417 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14418 default:
14419 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14420 if (mode == TFmode)
14421 mode = XFmode;
14422 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14423 * ((int) GET_MODE_SIZE (mode)
14424 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14425 }
14426 }
14427
14428 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14429 static void
ix86_svr3_asm_out_constructor(symbol,priority)14430 ix86_svr3_asm_out_constructor (symbol, priority)
14431 rtx symbol;
14432 int priority ATTRIBUTE_UNUSED;
14433 {
14434 init_section ();
14435 fputs ("\tpushl $", asm_out_file);
14436 assemble_name (asm_out_file, XSTR (symbol, 0));
14437 fputc ('\n', asm_out_file);
14438 }
14439 #endif
14440
14441 #if TARGET_MACHO
14442
14443 static int current_machopic_label_num;
14444
14445 /* Given a symbol name and its associated stub, write out the
14446 definition of the stub. */
14447
14448 void
machopic_output_stub(file,symb,stub)14449 machopic_output_stub (file, symb, stub)
14450 FILE *file;
14451 const char *symb, *stub;
14452 {
14453 unsigned int length;
14454 char *binder_name, *symbol_name, lazy_ptr_name[32];
14455 int label = ++current_machopic_label_num;
14456
14457 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14458 symb = (*targetm.strip_name_encoding) (symb);
14459
14460 length = strlen (stub);
14461 binder_name = alloca (length + 32);
14462 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14463
14464 length = strlen (symb);
14465 symbol_name = alloca (length + 32);
14466 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14467
14468 sprintf (lazy_ptr_name, "L%d$lz", label);
14469
14470 if (MACHOPIC_PURE)
14471 machopic_picsymbol_stub_section ();
14472 else
14473 machopic_symbol_stub_section ();
14474
14475 fprintf (file, "%s:\n", stub);
14476 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14477
14478 if (MACHOPIC_PURE)
14479 {
14480 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14481 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14482 fprintf (file, "\tjmp %%edx\n");
14483 }
14484 else
14485 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14486
14487 fprintf (file, "%s:\n", binder_name);
14488
14489 if (MACHOPIC_PURE)
14490 {
14491 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14492 fprintf (file, "\tpushl %%eax\n");
14493 }
14494 else
14495 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14496
14497 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14498
14499 machopic_lazy_symbol_ptr_section ();
14500 fprintf (file, "%s:\n", lazy_ptr_name);
14501 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14502 fprintf (file, "\t.long %s\n", binder_name);
14503 }
14504 #endif /* TARGET_MACHO */
14505
14506 /* Order the registers for register allocator. */
14507
14508 void
x86_order_regs_for_local_alloc()14509 x86_order_regs_for_local_alloc ()
14510 {
14511 int pos = 0;
14512 int i;
14513
14514 /* First allocate the local general purpose registers. */
14515 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14516 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14517 reg_alloc_order [pos++] = i;
14518
14519 /* Global general purpose registers. */
14520 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14521 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14522 reg_alloc_order [pos++] = i;
14523
14524 /* x87 registers come first in case we are doing FP math
14525 using them. */
14526 if (!TARGET_SSE_MATH)
14527 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14528 reg_alloc_order [pos++] = i;
14529
14530 /* SSE registers. */
14531 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14532 reg_alloc_order [pos++] = i;
14533 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14534 reg_alloc_order [pos++] = i;
14535
14536 /* x87 registerts. */
14537 if (TARGET_SSE_MATH)
14538 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14539 reg_alloc_order [pos++] = i;
14540
14541 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14542 reg_alloc_order [pos++] = i;
14543
14544 /* Initialize the rest of array as we do not allocate some registers
14545 at all. */
14546 while (pos < FIRST_PSEUDO_REGISTER)
14547 reg_alloc_order [pos++] = 0;
14548 }
14549
14550 /* Returns an expression indicating where the this parameter is
14551 located on entry to the FUNCTION. */
14552
14553 static rtx
x86_this_parameter(function)14554 x86_this_parameter (function)
14555 tree function;
14556 {
14557 tree type = TREE_TYPE (function);
14558
14559 if (TARGET_64BIT)
14560 {
14561 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14562 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14563 }
14564
14565 if (ix86_fntype_regparm (type) > 0)
14566 {
14567 tree parm;
14568
14569 parm = TYPE_ARG_TYPES (type);
14570 /* Figure out whether or not the function has a variable number of
14571 arguments. */
14572 for (; parm; parm = TREE_CHAIN (parm))
14573 if (TREE_VALUE (parm) == void_type_node)
14574 break;
14575 /* If not, the this parameter is in %eax. */
14576 if (parm)
14577 return gen_rtx_REG (SImode, 0);
14578 }
14579
14580 if (aggregate_value_p (TREE_TYPE (type)))
14581 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14582 else
14583 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14584 }
14585
14586 /* Determine whether x86_output_mi_thunk can succeed. */
14587
14588 static bool
x86_can_output_mi_thunk(thunk,delta,vcall_offset,function)14589 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14590 tree thunk ATTRIBUTE_UNUSED;
14591 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14592 HOST_WIDE_INT vcall_offset;
14593 tree function;
14594 {
14595 /* 64-bit can handle anything. */
14596 if (TARGET_64BIT)
14597 return true;
14598
14599 /* For 32-bit, everything's fine if we have one free register. */
14600 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14601 return true;
14602
14603 /* Need a free register for vcall_offset. */
14604 if (vcall_offset)
14605 return false;
14606
14607 /* Need a free register for GOT references. */
14608 if (flag_pic && !(*targetm.binds_local_p) (function))
14609 return false;
14610
14611 /* Otherwise ok. */
14612 return true;
14613 }
14614
14615 /* Output the assembler code for a thunk function. THUNK_DECL is the
14616 declaration for the thunk function itself, FUNCTION is the decl for
14617 the target function. DELTA is an immediate constant offset to be
14618 added to THIS. If VCALL_OFFSET is non-zero, the word at
14619 *(*this + vcall_offset) should be added to THIS. */
14620
14621 static void
x86_output_mi_thunk(file,thunk,delta,vcall_offset,function)14622 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14623 FILE *file ATTRIBUTE_UNUSED;
14624 tree thunk ATTRIBUTE_UNUSED;
14625 HOST_WIDE_INT delta;
14626 HOST_WIDE_INT vcall_offset;
14627 tree function;
14628 {
14629 rtx xops[3];
14630 rtx this = x86_this_parameter (function);
14631 rtx this_reg, tmp;
14632
14633 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14634 pull it in now and let DELTA benefit. */
14635 if (REG_P (this))
14636 this_reg = this;
14637 else if (vcall_offset)
14638 {
14639 /* Put the this parameter into %eax. */
14640 xops[0] = this;
14641 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14642 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14643 }
14644 else
14645 this_reg = NULL_RTX;
14646
14647 /* Adjust the this parameter by a fixed constant. */
14648 if (delta)
14649 {
14650 xops[0] = GEN_INT (delta);
14651 xops[1] = this_reg ? this_reg : this;
14652 if (TARGET_64BIT)
14653 {
14654 if (!x86_64_general_operand (xops[0], DImode))
14655 {
14656 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14657 xops[1] = tmp;
14658 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14659 xops[0] = tmp;
14660 xops[1] = this;
14661 }
14662 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14663 }
14664 else
14665 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14666 }
14667
14668 /* Adjust the this parameter by a value stored in the vtable. */
14669 if (vcall_offset)
14670 {
14671 if (TARGET_64BIT)
14672 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14673 else
14674 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14675
14676 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14677 xops[1] = tmp;
14678 if (TARGET_64BIT)
14679 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14680 else
14681 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14682
14683 /* Adjust the this parameter. */
14684 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14685 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14686 {
14687 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14688 xops[0] = GEN_INT (vcall_offset);
14689 xops[1] = tmp2;
14690 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14691 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14692 }
14693 xops[1] = this_reg;
14694 if (TARGET_64BIT)
14695 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14696 else
14697 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14698 }
14699
14700 /* If necessary, drop THIS back to its stack slot. */
14701 if (this_reg && this_reg != this)
14702 {
14703 xops[0] = this_reg;
14704 xops[1] = this;
14705 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14706 }
14707
14708 xops[0] = DECL_RTL (function);
14709 if (TARGET_64BIT)
14710 {
14711 if (!flag_pic || (*targetm.binds_local_p) (function))
14712 output_asm_insn ("jmp\t%P0", xops);
14713 else
14714 {
14715 tmp = XEXP (xops[0], 0);
14716 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14717 tmp = gen_rtx_CONST (Pmode, tmp);
14718 tmp = gen_rtx_MEM (QImode, tmp);
14719 xops[0] = tmp;
14720 output_asm_insn ("jmp\t%A0", xops);
14721 }
14722 }
14723 else
14724 {
14725 if (!flag_pic || (*targetm.binds_local_p) (function))
14726 output_asm_insn ("jmp\t%P0", xops);
14727 else
14728 {
14729 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14730 output_set_got (tmp);
14731
14732 xops[1] = tmp;
14733 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14734 output_asm_insn ("jmp\t{*}%1", xops);
14735 }
14736 }
14737 }
14738
14739 int
x86_field_alignment(field,computed)14740 x86_field_alignment (field, computed)
14741 tree field;
14742 int computed;
14743 {
14744 enum machine_mode mode;
14745 tree type = TREE_TYPE (field);
14746
14747 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14748 return computed;
14749 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14750 ? get_inner_array_type (type) : type);
14751 if (mode == DFmode || mode == DCmode
14752 || GET_MODE_CLASS (mode) == MODE_INT
14753 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14754 return MIN (32, computed);
14755 return computed;
14756 }
14757
14758 /* Output assembler code to FILE to increment profiler label # LABELNO
14759 for profiling a function entry. */
14760 void
x86_function_profiler(file,labelno)14761 x86_function_profiler (file, labelno)
14762 FILE *file;
14763 int labelno;
14764 {
14765 if (TARGET_64BIT)
14766 if (flag_pic)
14767 {
14768 #ifndef NO_PROFILE_COUNTERS
14769 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14770 #endif
14771 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14772 }
14773 else
14774 {
14775 #ifndef NO_PROFILE_COUNTERS
14776 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14777 #endif
14778 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14779 }
14780 else if (flag_pic)
14781 {
14782 #ifndef NO_PROFILE_COUNTERS
14783 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14784 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14785 #endif
14786 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14787 }
14788 else
14789 {
14790 #ifndef NO_PROFILE_COUNTERS
14791 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14792 PROFILE_COUNT_REGISTER);
14793 #endif
14794 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14795 }
14796 }
14797
14798 /* Implement machine specific optimizations.
14799 At the moment we implement single transformation: AMD Athlon works faster
14800 when RET is not destination of conditional jump or directly preceeded
14801 by other jump instruction. We avoid the penalty by inserting NOP just
14802 before the RET instructions in such cases. */
14803 void
x86_machine_dependent_reorg(first)14804 x86_machine_dependent_reorg (first)
14805 rtx first ATTRIBUTE_UNUSED;
14806 {
14807 edge e;
14808
14809 if (!TARGET_ATHLON || !optimize || optimize_size)
14810 return;
14811 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14812 {
14813 basic_block bb = e->src;
14814 rtx ret = bb->end;
14815 rtx prev;
14816 bool insert = false;
14817
14818 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14819 continue;
14820 prev = prev_nonnote_insn (ret);
14821 if (prev && GET_CODE (prev) == CODE_LABEL)
14822 {
14823 edge e;
14824 for (e = bb->pred; e; e = e->pred_next)
14825 if (EDGE_FREQUENCY (e) && e->src->index > 0
14826 && !(e->flags & EDGE_FALLTHRU))
14827 insert = 1;
14828 }
14829 if (!insert)
14830 {
14831 prev = prev_real_insn (ret);
14832 if (prev && GET_CODE (prev) == JUMP_INSN
14833 && any_condjump_p (prev))
14834 insert = 1;
14835 }
14836 if (insert)
14837 emit_insn_before (gen_nop (), ret);
14838 }
14839 }
14840
14841 /* Return if we do not know how to pass TYPE solely in registers. */
14842 bool
ix86_must_pass_in_stack(mode,type)14843 ix86_must_pass_in_stack (mode, type)
14844 enum machine_mode mode;
14845 tree type;
14846 {
14847 if (default_must_pass_in_stack (mode, type))
14848 return true;
14849 return (!TARGET_64BIT && type && mode == TImode);
14850 }
14851
14852 #include "gt-i386.h"
14853